transpiler comments and refactoring

This commit is contained in:
jorenchik
2024-10-27 15:05:46 +02:00
parent 0585530a1e
commit 39839f95e6
4 changed files with 142 additions and 130 deletions

View File

@@ -18,9 +18,14 @@ int32_t previousRow;
int32_t previousColumn;
bool textStarted = false;
bool identifierStarted = false;
bool sof;
bool sof;
/*
* TODO
*/
void trimString(std::string &str, std::string trimChars) {
// Noņem kreisās puses simbolus.
int padSize = 0;
bool pad = false;
for (size_t i = 0; i < str.size(); ++i) {
@@ -39,6 +44,8 @@ void trimString(std::string &str, std::string trimChars) {
if (padSize > 0) {
str.erase(0, padSize);
}
// Noņem labās puses simbolus.
padSize = 0;
pad = false;
for (size_t i = str.size(); i-- > 0;) {
@@ -59,7 +66,12 @@ void trimString(std::string &str, std::string trimChars) {
}
}
void makeTokenWithTokenBuffer(
/*
* Izveido tekstvienību, iegūstot to no bufera beigām.
* Ja buferī ir teksta vienība pirms tekstvienības, pievieno to pirms beigu
* tekstvienības.
*/
void tokenWithBuffer(
TokenType ttype,
size_t tokenLen,
TokenType textType
@@ -91,7 +103,10 @@ void makeTokenWithTokenBuffer(
buffer.clear();
}
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
/*
* Pārveido simbolu virkni tekstvienību sarakstā.
* */
Result<std::vector<Token>> tokenizeMdem(const std::string& content) {
row = 1;
column = 1;
previousRow = 1;
@@ -100,31 +115,28 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
tokens.clear();
buffer.clear();
if (fileRunes.find_first_not_of(" \n\t") == std::string::npos) {
// Beidz, ja satur tikai tukšumus vai neko.
if (content.find_first_not_of(" \n\t") == std::string::npos) {
return {tokens, ""};
}
for (size_t i = 0; i < fileRunes.size(); ++i) {
char c = fileRunes[i];
for (size_t i = 0; i < content.size(); ++i) {
char c = content[i];
// AdvancePointer
// Apstrādā īpašos simbolus un tekstu.
if (c == '\n') {
row += 1;
column = 0;
}
// Add escape char
if (c == '\\') {
i += 1;
if (i < fileRunes.size()) {
buffer.push_back(fileRunes[i]);
if (i < content.size()) {
buffer.push_back(content[i]);
}
continue;
} else {
buffer.push_back(c);
}
// SkipWhitetext
if (!textStarted) {
if (c == '\n') {
previousRow += 1;
@@ -138,10 +150,10 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
}
}
// EmitTokens
// Emitē tekstvienības.
switch (c) {
case '[': {
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::CooldownStart,
1,
TokenType::TextFragment
@@ -160,7 +172,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
tokens[i].column
};
}
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::CooldownEnd,
1,
TokenType::Cooldown
@@ -171,7 +183,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
identifierStarted = false;
} break;
case '-': {
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::ElementDashStart,
1,
TokenType::TextFragment
@@ -181,7 +193,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false;
} break;
case '^': {
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::ElementOrderModifier,
1,
TokenType::TextFragment
@@ -191,7 +203,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false;
} break;
case ':': {
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::MatchGroupEnd,
1,
TokenType::TextFragment
@@ -201,7 +213,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false;
} break;
case '>': {
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::QuestionEnd,
1,
TokenType::TextFragment
@@ -211,7 +223,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false;
} break;
case '+': {
makeTokenWithTokenBuffer(
tokenWithBuffer(
TokenType::ElementPlusStart,
1,
TokenType::TextFragment
@@ -225,7 +237,8 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
column += 1;
}
makeTokenWithTokenBuffer(
// Pievieno beigu simbolu, lai atvieglotu parsēšanu.
tokenWithBuffer(
TokenType::EndOfFile,
0,
TokenType::TextFragment
@@ -235,7 +248,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
std::cout << "SECTION: Lexer output:\n";
std::cout << std::format("Token count: {}", tokens.size()) << std::endl;
for (const Token& token : tokens) {
std::cout << token.ToString();
std::cout << token.toString();
}
std::cout << "SECTION END: Lexer output\n";
}
@@ -243,17 +256,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
return {tokens, ""};
}
std::regex nextLineExp(
"\n",
std::regex_constants::ECMAScript
);
std::regex doubleSpaceExp(
"\\s\\s+",
std::regex_constants::ECMAScript
);
std::string Token::ToString(const TokenType* ttype) {
std::string Token::toString(const TokenType* ttype) {
switch (*ttype) {
case TokenType::TextFragment: return "text fragment";
case TokenType::QuestionEnd: return "question end symbol";
@@ -270,15 +273,17 @@ std::string Token::ToString(const TokenType* ttype) {
}
}
std::string Token::ToString() const {
std::string Token::toString() const {
std::string contentStr = content;
static const std::regex nextLineExp("\n", std::regex_constants::ECMAScript);
static const std::regex doubleSpaceExp("\\s\\s+", std::regex_constants::ECMAScript);
if (tokenType == TokenType::TextFragment) {
contentStr = std::regex_replace(contentStr, nextLineExp, "");
contentStr = std::regex_replace(contentStr, doubleSpaceExp, " ");
}
return std::format(
"{}: \"{}\" ({}:{})\n",
ToString(&tokenType),
toString(&tokenType),
contentStr,
row,
column