diff --git a/src/include/lexer.h b/src/include/lexer.h index 5ee7811..f2c2407 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -25,8 +25,8 @@ struct Token { int32_t row; int32_t column; - std::string ToString() const; - static std::string ToString(const TokenType* ttype); + std::string toString() const; + static std::string toString(const TokenType* ttype); }; -Result> tokenizeMdem(const std::string& fileRunes); +Result> tokenizeMdem(const std::string& content); diff --git a/src/include/parser.h b/src/include/parser.h index f8560da..251619e 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -6,7 +6,6 @@ #include "lexer.h" #include "result.h" - struct Question { double cooldown; std::string questionText; @@ -16,6 +15,13 @@ struct Question { virtual ~Question() = default; }; +struct QuestionElement { + bool isDash; + bool isGroup; + std::string content; +}; + + struct Choice { std::string answer; bool isCorrect; diff --git a/src/transpiler/lexer.cpp b/src/transpiler/lexer.cpp index 2df6326..2ef48c5 100644 --- a/src/transpiler/lexer.cpp +++ b/src/transpiler/lexer.cpp @@ -18,9 +18,14 @@ int32_t previousRow; int32_t previousColumn; bool textStarted = false; bool identifierStarted = false; -bool sof; +bool sof; +/* + * TODO + */ void trimString(std::string &str, std::string trimChars) { + + // Noņem kreisās puses simbolus. int padSize = 0; bool pad = false; for (size_t i = 0; i < str.size(); ++i) { @@ -39,6 +44,8 @@ void trimString(std::string &str, std::string trimChars) { if (padSize > 0) { str.erase(0, padSize); } + + // Noņem labās puses simbolus. padSize = 0; pad = false; for (size_t i = str.size(); i-- > 0;) { @@ -59,7 +66,12 @@ void trimString(std::string &str, std::string trimChars) { } } -void makeTokenWithTokenBuffer( +/* + * Izveido tekstvienību, iegūstot to no bufera beigām. + * Ja buferī ir teksta vienība pirms tekstvienības, pievieno to pirms beigu + * tekstvienības. + */ +void tokenWithBuffer( TokenType ttype, size_t tokenLen, TokenType textType @@ -91,7 +103,10 @@ void makeTokenWithTokenBuffer( buffer.clear(); } -Result> tokenizeMdem(const std::string& fileRunes) { +/* + * Pārveido simbolu virkni tekstvienību sarakstā. + * */ +Result> tokenizeMdem(const std::string& content) { row = 1; column = 1; previousRow = 1; @@ -100,31 +115,28 @@ Result> tokenizeMdem(const std::string& fileRunes) { tokens.clear(); buffer.clear(); - if (fileRunes.find_first_not_of(" \n\t") == std::string::npos) { + // Beidz, ja satur tikai tukšumus vai neko. + if (content.find_first_not_of(" \n\t") == std::string::npos) { return {tokens, ""}; } - for (size_t i = 0; i < fileRunes.size(); ++i) { - char c = fileRunes[i]; + for (size_t i = 0; i < content.size(); ++i) { + char c = content[i]; - // AdvancePointer + // Apstrādā īpašos simbolus un tekstu. if (c == '\n') { row += 1; column = 0; } - - // Add escape char if (c == '\\') { i += 1; - if (i < fileRunes.size()) { - buffer.push_back(fileRunes[i]); + if (i < content.size()) { + buffer.push_back(content[i]); } continue; } else { buffer.push_back(c); } - - // SkipWhitetext if (!textStarted) { if (c == '\n') { previousRow += 1; @@ -138,10 +150,10 @@ Result> tokenizeMdem(const std::string& fileRunes) { } } - // EmitTokens + // Emitē tekstvienības. switch (c) { case '[': { - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::CooldownStart, 1, TokenType::TextFragment @@ -160,7 +172,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { tokens[i].column }; } - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::CooldownEnd, 1, TokenType::Cooldown @@ -171,7 +183,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { identifierStarted = false; } break; case '-': { - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::ElementDashStart, 1, TokenType::TextFragment @@ -181,7 +193,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { textStarted = false; } break; case '^': { - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::ElementOrderModifier, 1, TokenType::TextFragment @@ -191,7 +203,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { textStarted = false; } break; case ':': { - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::MatchGroupEnd, 1, TokenType::TextFragment @@ -201,7 +213,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { textStarted = false; } break; case '>': { - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::QuestionEnd, 1, TokenType::TextFragment @@ -211,7 +223,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { textStarted = false; } break; case '+': { - makeTokenWithTokenBuffer( + tokenWithBuffer( TokenType::ElementPlusStart, 1, TokenType::TextFragment @@ -225,7 +237,8 @@ Result> tokenizeMdem(const std::string& fileRunes) { column += 1; } - makeTokenWithTokenBuffer( + // Pievieno beigu simbolu, lai atvieglotu parsēšanu. + tokenWithBuffer( TokenType::EndOfFile, 0, TokenType::TextFragment @@ -235,7 +248,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { std::cout << "SECTION: Lexer output:\n"; std::cout << std::format("Token count: {}", tokens.size()) << std::endl; for (const Token& token : tokens) { - std::cout << token.ToString(); + std::cout << token.toString(); } std::cout << "SECTION END: Lexer output\n"; } @@ -243,17 +256,7 @@ Result> tokenizeMdem(const std::string& fileRunes) { return {tokens, ""}; } -std::regex nextLineExp( - "\n", - std::regex_constants::ECMAScript -); - -std::regex doubleSpaceExp( - "\\s\\s+", - std::regex_constants::ECMAScript -); - -std::string Token::ToString(const TokenType* ttype) { +std::string Token::toString(const TokenType* ttype) { switch (*ttype) { case TokenType::TextFragment: return "text fragment"; case TokenType::QuestionEnd: return "question end symbol"; @@ -270,15 +273,17 @@ std::string Token::ToString(const TokenType* ttype) { } } -std::string Token::ToString() const { +std::string Token::toString() const { std::string contentStr = content; + static const std::regex nextLineExp("\n", std::regex_constants::ECMAScript); + static const std::regex doubleSpaceExp("\\s\\s+", std::regex_constants::ECMAScript); if (tokenType == TokenType::TextFragment) { contentStr = std::regex_replace(contentStr, nextLineExp, ""); contentStr = std::regex_replace(contentStr, doubleSpaceExp, " "); } return std::format( "{}: \"{}\" ({}:{})\n", - ToString(&tokenType), + toString(&tokenType), contentStr, row, column diff --git a/src/transpiler/parser.cpp b/src/transpiler/parser.cpp index ec46f43..3fd48ae 100644 --- a/src/transpiler/parser.cpp +++ b/src/transpiler/parser.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -15,123 +14,85 @@ #include "parser.h" #include "stringUtils.h" -struct QuestionElement { - bool isDash; - bool isGroup; - std::string content; -}; +typedef std::map> TokenAutomata; -std::string MultiElementQuestion::toString() const { - std::stringstream ss; - for (const auto& choice : choices) { - char opener; - if (type == MultiElementType::Order) { - opener = '^'; - } else if (choice.isCorrect) { - opener = '+'; - } else { - opener = '-'; - } - ss << opener << " " << choice.answer << "; "; - } - return std::format( - "\nsection:{}\nid:{}\n{}\n{}", - section, - cooldown, - questionText, - ss.str() - ); -} - -std::string GroupQuestion::toString() const { - std::stringstream ss; - for (auto group: groups) { - ss << group.name << ": "; - for (auto el: group.elements) { - ss << el << ", "; - } - ss << "; "; - } - return std::format( - "\nsection:{}\nid:{}\n{}\n{}", - section, - cooldown, - questionText, - ss.str() - ); -} - -// Automaton for validating token transitions -std::map> automata; - -bool contains(const std::vector& vec, TokenType element) { - return std::find(vec.begin(), vec.end(), element) != vec.end(); -} - -// Automata for validating the parser state -std::map> parserAutomata() { - std::map> automata; - automata[TokenType::TextFragment] = { +TokenAutomata *automata = nullptr; +/* + * Galīgs automāts, kas nosaka, kādā secībā ir var būt tekstvienības. + * */ +void initParserAutomata() { + automata = new TokenAutomata; + (*automata)[TokenType::TextFragment] = { TokenType::QuestionEnd, TokenType::ElementDashStart, TokenType::ElementPlusStart, TokenType::MatchGroupEnd, TokenType::EndOfFile, }; - automata[TokenType::MatchGroupEnd] = { + (*automata)[TokenType::MatchGroupEnd] = { TokenType::ElementDashStart }; - automata[TokenType::QuestionEnd] = { + (*automata)[TokenType::QuestionEnd] = { TokenType::ElementDashStart, TokenType::ElementPlusStart }; - automata[TokenType::ElementDashStart] = { + (*automata)[TokenType::ElementDashStart] = { TokenType::CooldownStart, TokenType::TextFragment, TokenType::ElementOrderModifier }; - automata[TokenType::ElementOrderModifier] = { + (*automata)[TokenType::ElementOrderModifier] = { TokenType::TextFragment }; - automata[TokenType::ElementPlusStart] = { + (*automata)[TokenType::ElementPlusStart] = { TokenType::TextFragment }; - automata[TokenType::Cooldown] = { + (*automata)[TokenType::Cooldown] = { TokenType::CooldownEnd, }; - automata[TokenType::CooldownStart] = { + (*automata)[TokenType::CooldownStart] = { TokenType::Cooldown }; - automata[TokenType::CooldownEnd] = { + (*automata)[TokenType::CooldownEnd] = { TokenType::TextFragment }; - automata[TokenType::StartOfFile] = { + (*automata)[TokenType::StartOfFile] = { TokenType::TextFragment, TokenType::ElementDashStart, TokenType::EndOfFile }; - automata[TokenType::EndOfFile] = {}; - return automata; -} - -std::string capitalize(const std::string& str) { - if (str.empty()) return str; - std::string result = str; - result[0] = std::towupper(result[0]); - return result; + (*automata)[TokenType::EndOfFile] = {}; } +/* + * Pārbauda, vai vai tekstvienību sarakstu akceptē atbilst atbilst valodas + * automāts. + * */ Result ValidateGrammar(const std::vector& tokens) { - automata = parserAutomata(); + if (!automata) { + initParserAutomata(); + } for (size_t i = 0; i < tokens.size() - 1; ++i) { Token token = tokens[i]; Token nextToken = tokens[i + 1]; - if (!contains(automata[token.tokenType], nextToken.tokenType)) { + if ( + std::find( + (*automata)[token.tokenType].begin(), + (*automata)[token.tokenType].end(), + nextToken.tokenType + ) == (*automata)[token.tokenType].end()) { + + auto capitalize = [](const std::string& str) { + if (str.empty()) return str; + std::string result = str; + result[0] = std::towupper(result[0]); + return result; + }; return { .error=std::format( "Invalid token sequence: {} cannot precede {}", - std::string(capitalize(Token::ToString(&token.tokenType))), - std::string(capitalize(Token::ToString(&nextToken.tokenType))) + std::string(capitalize(Token::toString(&token.tokenType))), + std::string(capitalize(Token::toString(&nextToken.tokenType))) ), .row=token.row, .column=token.column @@ -141,17 +102,6 @@ Result ValidateGrammar(const std::vector& tokens) { return {}; } -time_t parseToUTCTime(const std::string datetime, std::string format) { - std::tm tm = {}; - std::istringstream ss(datetime); - ss >> std::get_time(&tm, format.c_str()); - if (ss.fail()) { - throw std::runtime_error("Failed to parse datetime string"); - } - std::time_t time = timegm(&tm); - return time; -} - // @Fix: Prevent duplicate group names and questions in ordered question (to // simplify checking in practice). Result parseQuestions(const std::vector& tokens) { @@ -192,6 +142,17 @@ Result parseQuestions(const std::vector& tokens) { if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) { try { + auto parseToUTCTime = [](const std::string datetime, std::string format) { + std::tm tm = {}; + std::istringstream ss(datetime); + ss >> std::get_time(&tm, format.c_str()); + if (ss.fail()) { + throw std::runtime_error("Failed to parse datetime string"); + } + std::time_t time = timegm(&tm); + return time; + }; + time = parseToUTCTime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M"); } catch (std::exception e) { return makeResult( @@ -396,3 +357,43 @@ Result parseQuestions(const std::vector& tokens) { Token() ); } + +std::string MultiElementQuestion::toString() const { + std::stringstream ss; + for (const auto& choice : choices) { + char opener; + if (type == MultiElementType::Order) { + opener = '^'; + } else if (choice.isCorrect) { + opener = '+'; + } else { + opener = '-'; + } + ss << opener << " " << choice.answer << "; "; + } + return std::format( + "\nsection:{}\nid:{}\n{}\n{}", + section, + cooldown, + questionText, + ss.str() + ); +} + +std::string GroupQuestion::toString() const { + std::stringstream ss; + for (auto group: groups) { + ss << group.name << ": "; + for (auto el: group.elements) { + ss << el << ", "; + } + ss << "; "; + } + return std::format( + "\nsection:{}\nid:{}\n{}\n{}", + section, + cooldown, + questionText, + ss.str() + ); +}