restrutured removing the go source

This commit is contained in:
jorenchik
2024-10-14 21:13:44 +03:00
parent ff4beecb5b
commit 2cef7007a2
138 changed files with 9911 additions and 1209 deletions

284
src/transpiler/lexer.cpp Normal file
View File

@@ -0,0 +1,284 @@
#include <cstdint>
#include <cstdio>
#include <iostream>
#include <vector>
#include <string>
#include <format>
#include <regex>
#include "lexer.h"
#include "config.h"
#include "result.h"
std::vector<Token> tokens;
std::vector<char> buffer;
int32_t row;
int32_t column;
int32_t previousRow;
int32_t previousColumn;
bool textStarted = false;
bool identifierStarted = false;
bool sof;
void trimString(std::string &str, std::string trimChars) {
int padSize = 0;
bool pad = false;
for (size_t i = 0; i < str.size(); ++i) {
for (size_t k = 0; k < trimChars.size(); ++k) {
if (str[i] == trimChars[k]) {
padSize++;
pad = true;
break;
}
}
if (!pad) {
break;
}
pad = false;
}
if (padSize > 0) {
str.erase(0, padSize);
}
padSize = 0;
pad = false;
for (size_t i = str.size(); i-- > 0;) {
for (size_t k = 0; k < trimChars.size(); ++k) {
if (str[i] == trimChars[k]) {
padSize++;
pad = true;
break;
}
}
if (!pad) {
break;
}
pad = false;
}
if (padSize > 0) {
str.erase(str.end() - padSize, str.end());
}
}
void makeTokenWithTokenBuffer(
TokenType ttype,
size_t tokenLen,
TokenType textType
) {
std::string token(buffer.end() - tokenLen, buffer.end());
if (buffer.size() > tokenLen) {
std::string prevFragment(buffer.begin(), buffer.end() - tokenLen);
trimString(prevFragment, " \n\t");
if (prevFragment.length() > 0) {
tokens.push_back(Token{
textType,
prevFragment,
previousRow,
previousColumn
});
}
}
buffer.clear();
tokens.push_back(Token{
ttype,
token,
row,
column
});
previousRow = row;
previousColumn = column;
buffer.clear();
}
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
row = 1;
column = 1;
previousRow = 1;
previousColumn = 1;
textStarted = false;
tokens.clear();
buffer.clear();
if (fileRunes.find_first_not_of(" \n\t") == std::string::npos) {
return {tokens, ""};
}
for (size_t i = 0; i < fileRunes.size(); ++i) {
char c = fileRunes[i];
// AdvancePointer
if (c == '\n') {
row += 1;
column = 0;
}
if (c == '\\') {
i += 1;
if (i < fileRunes.size()) {
buffer.push_back(fileRunes[i]);
}
continue;
} else {
buffer.push_back(c);
}
// SkipWhitetext
if (!textStarted) {
if (c == '\n') {
previousRow += 1;
previousColumn = 1;
} else if (c == ' ') {
previousColumn += 1;
} else if (c == '\t') {
previousColumn += 4;
} else {
textStarted = true;
}
}
// EmitTokens
switch (c) {
case '[':
makeTokenWithTokenBuffer(
TokenType::CooldownStart,
1,
TokenType::TextFragment
);
previousRow = row;
previousColumn = column;
textStarted = false;
identifierStarted = true;
break;
case ']':
if (!identifierStarted) {
return {
tokens,
"Cannot end identifier if it is not started",
tokens[i].row,
tokens[i].column
};
}
makeTokenWithTokenBuffer(
TokenType::CooldownEnd,
1,
TokenType::Cooldown
);
previousRow = row;
previousColumn = column;
textStarted = false;
identifierStarted = false;
break;
case '-':
makeTokenWithTokenBuffer(
TokenType::ElementDashStart,
1,
TokenType::TextFragment
);
previousRow = row;
previousColumn = column;
textStarted = false;
break;
case '^':
makeTokenWithTokenBuffer(
TokenType::ElementOrderModifier,
1,
TokenType::TextFragment
);
previousRow = row;
previousColumn = column;
textStarted = false;
break;
case ':':
makeTokenWithTokenBuffer(
TokenType::MatchGroupEnd,
1,
TokenType::TextFragment
);
previousRow = row;
previousColumn = column;
textStarted = false;
break;
case '>':
makeTokenWithTokenBuffer(
TokenType::QuestionEnd,
1,
TokenType::TextFragment
);
previousRow = row;
previousColumn = column;
break;
case '+':
makeTokenWithTokenBuffer(
TokenType::ElementPlusStart,
1,
TokenType::TextFragment
);
previousRow = row;
previousColumn = column;
textStarted = false;
break;
}
column += 1;
}
makeTokenWithTokenBuffer(
TokenType::EndOfFile,
0,
TokenType::TextFragment
);
if (debug) {
std::cout << "SECTION: Lexer output:\n";
std::cout << std::format("Token count: {}", tokens.size()) << std::endl;
for (const Token& token : tokens) {
std::cout << token.ToString();
}
std::cout << "SECTION END: Lexer output\n";
}
return {tokens, ""};
}
std::regex nextLineExp(
"\n",
std::regex_constants::ECMAScript
);
std::regex doubleSpaceExp(
"\\s\\s+",
std::regex_constants::ECMAScript
);
std::string Token::ToString(const TokenType* ttype) {
switch (*ttype) {
case TokenType::TextFragment: return "text fragment";
case TokenType::QuestionEnd: return "question end symbol";
case TokenType::MatchGroupEnd: return "match group end";
case TokenType::ElementDashStart: return "dash element start";
case TokenType::ElementOrderModifier: return "order element modifier";
case TokenType::ElementPlusStart: return "plus element start";
case TokenType::Cooldown: return "cooldown";
case TokenType::CooldownStart: return "start of cooldown";
case TokenType::CooldownEnd: return "end of cooldown";
case TokenType::StartOfFile: return "start of the file";
case TokenType::EndOfFile: return "end of file";
default: return "unrecognized token";
}
}
std::string Token::ToString() const {
std::string contentStr = content;
if (tokenType == TokenType::TextFragment) {
contentStr = std::regex_replace(contentStr, nextLineExp, "");
contentStr = std::regex_replace(contentStr, doubleSpaceExp, " ");
}
return std::format(
"{}: \"{}\" ({}:{})\n",
ToString(&tokenType),
contentStr,
row,
column
);
}