transpiler comments and refactoring

This commit is contained in:
jorenchik
2024-10-27 15:05:46 +02:00
parent 0585530a1e
commit 39839f95e6
4 changed files with 142 additions and 130 deletions

View File

@@ -25,8 +25,8 @@ struct Token {
int32_t row; int32_t row;
int32_t column; int32_t column;
std::string ToString() const; std::string toString() const;
static std::string ToString(const TokenType* ttype); static std::string toString(const TokenType* ttype);
}; };
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes); Result<std::vector<Token>> tokenizeMdem(const std::string& content);

View File

@@ -6,7 +6,6 @@
#include "lexer.h" #include "lexer.h"
#include "result.h" #include "result.h"
struct Question { struct Question {
double cooldown; double cooldown;
std::string questionText; std::string questionText;
@@ -16,6 +15,13 @@ struct Question {
virtual ~Question() = default; virtual ~Question() = default;
}; };
struct QuestionElement {
bool isDash;
bool isGroup;
std::string content;
};
struct Choice { struct Choice {
std::string answer; std::string answer;
bool isCorrect; bool isCorrect;

View File

@@ -20,7 +20,12 @@ bool textStarted = false;
bool identifierStarted = false; bool identifierStarted = false;
bool sof; bool sof;
/*
* TODO
*/
void trimString(std::string &str, std::string trimChars) { void trimString(std::string &str, std::string trimChars) {
// Noņem kreisās puses simbolus.
int padSize = 0; int padSize = 0;
bool pad = false; bool pad = false;
for (size_t i = 0; i < str.size(); ++i) { for (size_t i = 0; i < str.size(); ++i) {
@@ -39,6 +44,8 @@ void trimString(std::string &str, std::string trimChars) {
if (padSize > 0) { if (padSize > 0) {
str.erase(0, padSize); str.erase(0, padSize);
} }
// Noņem labās puses simbolus.
padSize = 0; padSize = 0;
pad = false; pad = false;
for (size_t i = str.size(); i-- > 0;) { for (size_t i = str.size(); i-- > 0;) {
@@ -59,7 +66,12 @@ void trimString(std::string &str, std::string trimChars) {
} }
} }
void makeTokenWithTokenBuffer( /*
* Izveido tekstvienību, iegūstot to no bufera beigām.
* Ja buferī ir teksta vienība pirms tekstvienības, pievieno to pirms beigu
* tekstvienības.
*/
void tokenWithBuffer(
TokenType ttype, TokenType ttype,
size_t tokenLen, size_t tokenLen,
TokenType textType TokenType textType
@@ -91,7 +103,10 @@ void makeTokenWithTokenBuffer(
buffer.clear(); buffer.clear();
} }
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) { /*
* Pārveido simbolu virkni tekstvienību sarakstā.
* */
Result<std::vector<Token>> tokenizeMdem(const std::string& content) {
row = 1; row = 1;
column = 1; column = 1;
previousRow = 1; previousRow = 1;
@@ -100,31 +115,28 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
tokens.clear(); tokens.clear();
buffer.clear(); buffer.clear();
if (fileRunes.find_first_not_of(" \n\t") == std::string::npos) { // Beidz, ja satur tikai tukšumus vai neko.
if (content.find_first_not_of(" \n\t") == std::string::npos) {
return {tokens, ""}; return {tokens, ""};
} }
for (size_t i = 0; i < fileRunes.size(); ++i) { for (size_t i = 0; i < content.size(); ++i) {
char c = fileRunes[i]; char c = content[i];
// AdvancePointer // Apstrādā īpašos simbolus un tekstu.
if (c == '\n') { if (c == '\n') {
row += 1; row += 1;
column = 0; column = 0;
} }
// Add escape char
if (c == '\\') { if (c == '\\') {
i += 1; i += 1;
if (i < fileRunes.size()) { if (i < content.size()) {
buffer.push_back(fileRunes[i]); buffer.push_back(content[i]);
} }
continue; continue;
} else { } else {
buffer.push_back(c); buffer.push_back(c);
} }
// SkipWhitetext
if (!textStarted) { if (!textStarted) {
if (c == '\n') { if (c == '\n') {
previousRow += 1; previousRow += 1;
@@ -138,10 +150,10 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
} }
} }
// EmitTokens // Emitē tekstvienības.
switch (c) { switch (c) {
case '[': { case '[': {
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::CooldownStart, TokenType::CooldownStart,
1, 1,
TokenType::TextFragment TokenType::TextFragment
@@ -160,7 +172,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
tokens[i].column tokens[i].column
}; };
} }
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::CooldownEnd, TokenType::CooldownEnd,
1, 1,
TokenType::Cooldown TokenType::Cooldown
@@ -171,7 +183,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
identifierStarted = false; identifierStarted = false;
} break; } break;
case '-': { case '-': {
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::ElementDashStart, TokenType::ElementDashStart,
1, 1,
TokenType::TextFragment TokenType::TextFragment
@@ -181,7 +193,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false; textStarted = false;
} break; } break;
case '^': { case '^': {
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::ElementOrderModifier, TokenType::ElementOrderModifier,
1, 1,
TokenType::TextFragment TokenType::TextFragment
@@ -191,7 +203,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false; textStarted = false;
} break; } break;
case ':': { case ':': {
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::MatchGroupEnd, TokenType::MatchGroupEnd,
1, 1,
TokenType::TextFragment TokenType::TextFragment
@@ -201,7 +213,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false; textStarted = false;
} break; } break;
case '>': { case '>': {
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::QuestionEnd, TokenType::QuestionEnd,
1, 1,
TokenType::TextFragment TokenType::TextFragment
@@ -211,7 +223,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
textStarted = false; textStarted = false;
} break; } break;
case '+': { case '+': {
makeTokenWithTokenBuffer( tokenWithBuffer(
TokenType::ElementPlusStart, TokenType::ElementPlusStart,
1, 1,
TokenType::TextFragment TokenType::TextFragment
@@ -225,7 +237,8 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
column += 1; column += 1;
} }
makeTokenWithTokenBuffer( // Pievieno beigu simbolu, lai atvieglotu parsēšanu.
tokenWithBuffer(
TokenType::EndOfFile, TokenType::EndOfFile,
0, 0,
TokenType::TextFragment TokenType::TextFragment
@@ -235,7 +248,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
std::cout << "SECTION: Lexer output:\n"; std::cout << "SECTION: Lexer output:\n";
std::cout << std::format("Token count: {}", tokens.size()) << std::endl; std::cout << std::format("Token count: {}", tokens.size()) << std::endl;
for (const Token& token : tokens) { for (const Token& token : tokens) {
std::cout << token.ToString(); std::cout << token.toString();
} }
std::cout << "SECTION END: Lexer output\n"; std::cout << "SECTION END: Lexer output\n";
} }
@@ -243,17 +256,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
return {tokens, ""}; return {tokens, ""};
} }
std::regex nextLineExp( std::string Token::toString(const TokenType* ttype) {
"\n",
std::regex_constants::ECMAScript
);
std::regex doubleSpaceExp(
"\\s\\s+",
std::regex_constants::ECMAScript
);
std::string Token::ToString(const TokenType* ttype) {
switch (*ttype) { switch (*ttype) {
case TokenType::TextFragment: return "text fragment"; case TokenType::TextFragment: return "text fragment";
case TokenType::QuestionEnd: return "question end symbol"; case TokenType::QuestionEnd: return "question end symbol";
@@ -270,15 +273,17 @@ std::string Token::ToString(const TokenType* ttype) {
} }
} }
std::string Token::ToString() const { std::string Token::toString() const {
std::string contentStr = content; std::string contentStr = content;
static const std::regex nextLineExp("\n", std::regex_constants::ECMAScript);
static const std::regex doubleSpaceExp("\\s\\s+", std::regex_constants::ECMAScript);
if (tokenType == TokenType::TextFragment) { if (tokenType == TokenType::TextFragment) {
contentStr = std::regex_replace(contentStr, nextLineExp, ""); contentStr = std::regex_replace(contentStr, nextLineExp, "");
contentStr = std::regex_replace(contentStr, doubleSpaceExp, " "); contentStr = std::regex_replace(contentStr, doubleSpaceExp, " ");
} }
return std::format( return std::format(
"{}: \"{}\" ({}:{})\n", "{}: \"{}\" ({}:{})\n",
ToString(&tokenType), toString(&tokenType),
contentStr, contentStr,
row, row,
column column

View File

@@ -5,7 +5,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <map> #include <map>
#include <algorithm>
#include <sstream> #include <sstream>
#include <format> #include <format>
@@ -15,123 +14,85 @@
#include "parser.h" #include "parser.h"
#include "stringUtils.h" #include "stringUtils.h"
struct QuestionElement { typedef std::map<TokenType, std::vector<TokenType>> TokenAutomata;
bool isDash;
bool isGroup;
std::string content;
};
std::string MultiElementQuestion::toString() const { TokenAutomata *automata = nullptr;
std::stringstream ss; /*
for (const auto& choice : choices) { * Galīgs automāts, kas nosaka, kādā secībā ir var būt tekstvienības.
char opener; * */
if (type == MultiElementType::Order) { void initParserAutomata() {
opener = '^'; automata = new TokenAutomata;
} else if (choice.isCorrect) { (*automata)[TokenType::TextFragment] = {
opener = '+';
} else {
opener = '-';
}
ss << opener << " " << choice.answer << "; ";
}
return std::format(
"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
section,
cooldown,
questionText,
ss.str()
);
}
std::string GroupQuestion::toString() const {
std::stringstream ss;
for (auto group: groups) {
ss << group.name << ": ";
for (auto el: group.elements) {
ss << el << ", ";
}
ss << "; ";
}
return std::format(
"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
section,
cooldown,
questionText,
ss.str()
);
}
// Automaton for validating token transitions
std::map<TokenType, std::vector<TokenType>> automata;
bool contains(const std::vector<TokenType>& vec, TokenType element) {
return std::find(vec.begin(), vec.end(), element) != vec.end();
}
// Automata for validating the parser state
std::map<TokenType, std::vector<TokenType>> parserAutomata() {
std::map<TokenType, std::vector<TokenType>> automata;
automata[TokenType::TextFragment] = {
TokenType::QuestionEnd, TokenType::QuestionEnd,
TokenType::ElementDashStart, TokenType::ElementDashStart,
TokenType::ElementPlusStart, TokenType::ElementPlusStart,
TokenType::MatchGroupEnd, TokenType::MatchGroupEnd,
TokenType::EndOfFile, TokenType::EndOfFile,
}; };
automata[TokenType::MatchGroupEnd] = { (*automata)[TokenType::MatchGroupEnd] = {
TokenType::ElementDashStart TokenType::ElementDashStart
}; };
automata[TokenType::QuestionEnd] = { (*automata)[TokenType::QuestionEnd] = {
TokenType::ElementDashStart, TokenType::ElementDashStart,
TokenType::ElementPlusStart TokenType::ElementPlusStart
}; };
automata[TokenType::ElementDashStart] = { (*automata)[TokenType::ElementDashStart] = {
TokenType::CooldownStart, TokenType::CooldownStart,
TokenType::TextFragment, TokenType::TextFragment,
TokenType::ElementOrderModifier TokenType::ElementOrderModifier
}; };
automata[TokenType::ElementOrderModifier] = { (*automata)[TokenType::ElementOrderModifier] = {
TokenType::TextFragment TokenType::TextFragment
}; };
automata[TokenType::ElementPlusStart] = { (*automata)[TokenType::ElementPlusStart] = {
TokenType::TextFragment TokenType::TextFragment
}; };
automata[TokenType::Cooldown] = { (*automata)[TokenType::Cooldown] = {
TokenType::CooldownEnd, TokenType::CooldownEnd,
}; };
automata[TokenType::CooldownStart] = { (*automata)[TokenType::CooldownStart] = {
TokenType::Cooldown TokenType::Cooldown
}; };
automata[TokenType::CooldownEnd] = { (*automata)[TokenType::CooldownEnd] = {
TokenType::TextFragment TokenType::TextFragment
}; };
automata[TokenType::StartOfFile] = { (*automata)[TokenType::StartOfFile] = {
TokenType::TextFragment, TokenType::TextFragment,
TokenType::ElementDashStart, TokenType::ElementDashStart,
TokenType::EndOfFile TokenType::EndOfFile
}; };
automata[TokenType::EndOfFile] = {}; (*automata)[TokenType::EndOfFile] = {};
return automata;
} }
std::string capitalize(const std::string& str) { /*
* Pārbauda, vai vai tekstvienību sarakstu akceptē atbilst atbilst valodas
* automāts.
* */
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
if (!automata) {
initParserAutomata();
}
for (size_t i = 0; i < tokens.size() - 1; ++i) {
Token token = tokens[i];
Token nextToken = tokens[i + 1];
if (
std::find(
(*automata)[token.tokenType].begin(),
(*automata)[token.tokenType].end(),
nextToken.tokenType
) == (*automata)[token.tokenType].end()) {
auto capitalize = [](const std::string& str) {
if (str.empty()) return str; if (str.empty()) return str;
std::string result = str; std::string result = str;
result[0] = std::towupper(result[0]); result[0] = std::towupper(result[0]);
return result; return result;
} };
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
automata = parserAutomata();
for (size_t i = 0; i < tokens.size() - 1; ++i) {
Token token = tokens[i];
Token nextToken = tokens[i + 1];
if (!contains(automata[token.tokenType], nextToken.tokenType)) {
return { return {
.error=std::format( .error=std::format(
"Invalid token sequence: {} cannot precede {}", "Invalid token sequence: {} cannot precede {}",
std::string(capitalize(Token::ToString(&token.tokenType))), std::string(capitalize(Token::toString(&token.tokenType))),
std::string(capitalize(Token::ToString(&nextToken.tokenType))) std::string(capitalize(Token::toString(&nextToken.tokenType)))
), ),
.row=token.row, .row=token.row,
.column=token.column .column=token.column
@@ -141,17 +102,6 @@ Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
return {}; return {};
} }
time_t parseToUTCTime(const std::string datetime, std::string format) {
std::tm tm = {};
std::istringstream ss(datetime);
ss >> std::get_time(&tm, format.c_str());
if (ss.fail()) {
throw std::runtime_error("Failed to parse datetime string");
}
std::time_t time = timegm(&tm);
return time;
}
// @Fix: Prevent duplicate group names and questions in ordered question (to // @Fix: Prevent duplicate group names and questions in ordered question (to
// simplify checking in practice). // simplify checking in practice).
Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) { Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
@@ -192,6 +142,17 @@ Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) { if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) {
try { try {
auto parseToUTCTime = [](const std::string datetime, std::string format) {
std::tm tm = {};
std::istringstream ss(datetime);
ss >> std::get_time(&tm, format.c_str());
if (ss.fail()) {
throw std::runtime_error("Failed to parse datetime string");
}
std::time_t time = timegm(&tm);
return time;
};
time = parseToUTCTime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M"); time = parseToUTCTime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M");
} catch (std::exception e) { } catch (std::exception e) {
return makeResult( return makeResult(
@@ -396,3 +357,43 @@ Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
Token() Token()
); );
} }
std::string MultiElementQuestion::toString() const {
std::stringstream ss;
for (const auto& choice : choices) {
char opener;
if (type == MultiElementType::Order) {
opener = '^';
} else if (choice.isCorrect) {
opener = '+';
} else {
opener = '-';
}
ss << opener << " " << choice.answer << "; ";
}
return std::format(
"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
section,
cooldown,
questionText,
ss.str()
);
}
std::string GroupQuestion::toString() const {
std::stringstream ss;
for (auto group: groups) {
ss << group.name << ": ";
for (auto el: group.elements) {
ss << el << ", ";
}
ss << "; ";
}
return std::format(
"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
section,
cooldown,
questionText,
ss.str()
);
}