mirror of
https://github.com/jorenchik/mdemory.git
synced 2026-03-22 00:26:21 +00:00
restrutured removing the go source
This commit is contained in:
284
src/transpiler/lexer.cpp
Normal file
284
src/transpiler/lexer.cpp
Normal file
@@ -0,0 +1,284 @@
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <format>
|
||||
#include <regex>
|
||||
|
||||
#include "lexer.h"
|
||||
#include "config.h"
|
||||
#include "result.h"
|
||||
|
||||
std::vector<Token> tokens;
|
||||
std::vector<char> buffer;
|
||||
int32_t row;
|
||||
int32_t column;
|
||||
int32_t previousRow;
|
||||
int32_t previousColumn;
|
||||
bool textStarted = false;
|
||||
bool identifierStarted = false;
|
||||
bool sof;
|
||||
|
||||
void trimString(std::string &str, std::string trimChars) {
|
||||
int padSize = 0;
|
||||
bool pad = false;
|
||||
for (size_t i = 0; i < str.size(); ++i) {
|
||||
for (size_t k = 0; k < trimChars.size(); ++k) {
|
||||
if (str[i] == trimChars[k]) {
|
||||
padSize++;
|
||||
pad = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!pad) {
|
||||
break;
|
||||
}
|
||||
pad = false;
|
||||
}
|
||||
if (padSize > 0) {
|
||||
str.erase(0, padSize);
|
||||
}
|
||||
padSize = 0;
|
||||
pad = false;
|
||||
for (size_t i = str.size(); i-- > 0;) {
|
||||
for (size_t k = 0; k < trimChars.size(); ++k) {
|
||||
if (str[i] == trimChars[k]) {
|
||||
padSize++;
|
||||
pad = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!pad) {
|
||||
break;
|
||||
}
|
||||
pad = false;
|
||||
}
|
||||
if (padSize > 0) {
|
||||
str.erase(str.end() - padSize, str.end());
|
||||
}
|
||||
}
|
||||
|
||||
void makeTokenWithTokenBuffer(
|
||||
TokenType ttype,
|
||||
size_t tokenLen,
|
||||
TokenType textType
|
||||
) {
|
||||
std::string token(buffer.end() - tokenLen, buffer.end());
|
||||
if (buffer.size() > tokenLen) {
|
||||
std::string prevFragment(buffer.begin(), buffer.end() - tokenLen);
|
||||
trimString(prevFragment, " \n\t");
|
||||
if (prevFragment.length() > 0) {
|
||||
tokens.push_back(Token{
|
||||
textType,
|
||||
prevFragment,
|
||||
previousRow,
|
||||
previousColumn
|
||||
});
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
|
||||
tokens.push_back(Token{
|
||||
ttype,
|
||||
token,
|
||||
row,
|
||||
column
|
||||
});
|
||||
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
row = 1;
|
||||
column = 1;
|
||||
previousRow = 1;
|
||||
previousColumn = 1;
|
||||
textStarted = false;
|
||||
tokens.clear();
|
||||
buffer.clear();
|
||||
|
||||
if (fileRunes.find_first_not_of(" \n\t") == std::string::npos) {
|
||||
return {tokens, ""};
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < fileRunes.size(); ++i) {
|
||||
char c = fileRunes[i];
|
||||
|
||||
// AdvancePointer
|
||||
if (c == '\n') {
|
||||
row += 1;
|
||||
column = 0;
|
||||
}
|
||||
|
||||
if (c == '\\') {
|
||||
i += 1;
|
||||
if (i < fileRunes.size()) {
|
||||
buffer.push_back(fileRunes[i]);
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
buffer.push_back(c);
|
||||
}
|
||||
|
||||
// SkipWhitetext
|
||||
if (!textStarted) {
|
||||
if (c == '\n') {
|
||||
previousRow += 1;
|
||||
previousColumn = 1;
|
||||
} else if (c == ' ') {
|
||||
previousColumn += 1;
|
||||
} else if (c == '\t') {
|
||||
previousColumn += 4;
|
||||
} else {
|
||||
textStarted = true;
|
||||
}
|
||||
}
|
||||
|
||||
// EmitTokens
|
||||
switch (c) {
|
||||
case '[':
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::CooldownStart,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
textStarted = false;
|
||||
identifierStarted = true;
|
||||
break;
|
||||
case ']':
|
||||
if (!identifierStarted) {
|
||||
return {
|
||||
tokens,
|
||||
"Cannot end identifier if it is not started",
|
||||
tokens[i].row,
|
||||
tokens[i].column
|
||||
};
|
||||
}
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::CooldownEnd,
|
||||
1,
|
||||
TokenType::Cooldown
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
textStarted = false;
|
||||
identifierStarted = false;
|
||||
break;
|
||||
case '-':
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::ElementDashStart,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
textStarted = false;
|
||||
break;
|
||||
case '^':
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::ElementOrderModifier,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
textStarted = false;
|
||||
break;
|
||||
case ':':
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::MatchGroupEnd,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
textStarted = false;
|
||||
break;
|
||||
case '>':
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::QuestionEnd,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
break;
|
||||
case '+':
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::ElementPlusStart,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
previousRow = row;
|
||||
previousColumn = column;
|
||||
textStarted = false;
|
||||
break;
|
||||
}
|
||||
|
||||
column += 1;
|
||||
}
|
||||
|
||||
makeTokenWithTokenBuffer(
|
||||
TokenType::EndOfFile,
|
||||
0,
|
||||
TokenType::TextFragment
|
||||
);
|
||||
|
||||
if (debug) {
|
||||
std::cout << "SECTION: Lexer output:\n";
|
||||
std::cout << std::format("Token count: {}", tokens.size()) << std::endl;
|
||||
for (const Token& token : tokens) {
|
||||
std::cout << token.ToString();
|
||||
}
|
||||
std::cout << "SECTION END: Lexer output\n";
|
||||
}
|
||||
|
||||
return {tokens, ""};
|
||||
}
|
||||
|
||||
std::regex nextLineExp(
|
||||
"\n",
|
||||
std::regex_constants::ECMAScript
|
||||
);
|
||||
|
||||
std::regex doubleSpaceExp(
|
||||
"\\s\\s+",
|
||||
std::regex_constants::ECMAScript
|
||||
);
|
||||
|
||||
std::string Token::ToString(const TokenType* ttype) {
|
||||
switch (*ttype) {
|
||||
case TokenType::TextFragment: return "text fragment";
|
||||
case TokenType::QuestionEnd: return "question end symbol";
|
||||
case TokenType::MatchGroupEnd: return "match group end";
|
||||
case TokenType::ElementDashStart: return "dash element start";
|
||||
case TokenType::ElementOrderModifier: return "order element modifier";
|
||||
case TokenType::ElementPlusStart: return "plus element start";
|
||||
case TokenType::Cooldown: return "cooldown";
|
||||
case TokenType::CooldownStart: return "start of cooldown";
|
||||
case TokenType::CooldownEnd: return "end of cooldown";
|
||||
case TokenType::StartOfFile: return "start of the file";
|
||||
case TokenType::EndOfFile: return "end of file";
|
||||
default: return "unrecognized token";
|
||||
}
|
||||
}
|
||||
|
||||
std::string Token::ToString() const {
|
||||
std::string contentStr = content;
|
||||
if (tokenType == TokenType::TextFragment) {
|
||||
contentStr = std::regex_replace(contentStr, nextLineExp, "");
|
||||
contentStr = std::regex_replace(contentStr, doubleSpaceExp, " ");
|
||||
}
|
||||
return std::format(
|
||||
"{}: \"{}\" ({}:{})\n",
|
||||
ToString(&tokenType),
|
||||
contentStr,
|
||||
row,
|
||||
column
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user