mirror of
https://github.com/jorenchik/mdemory.git
synced 2026-03-22 00:26:21 +00:00
transpiler comments and refactoring
This commit is contained in:
@@ -25,8 +25,8 @@ struct Token {
|
||||
int32_t row;
|
||||
int32_t column;
|
||||
|
||||
std::string ToString() const;
|
||||
static std::string ToString(const TokenType* ttype);
|
||||
std::string toString() const;
|
||||
static std::string toString(const TokenType* ttype);
|
||||
};
|
||||
|
||||
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes);
|
||||
Result<std::vector<Token>> tokenizeMdem(const std::string& content);
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include "lexer.h"
|
||||
#include "result.h"
|
||||
|
||||
|
||||
struct Question {
|
||||
double cooldown;
|
||||
std::string questionText;
|
||||
@@ -16,6 +15,13 @@ struct Question {
|
||||
virtual ~Question() = default;
|
||||
};
|
||||
|
||||
struct QuestionElement {
|
||||
bool isDash;
|
||||
bool isGroup;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
|
||||
struct Choice {
|
||||
std::string answer;
|
||||
bool isCorrect;
|
||||
|
||||
@@ -20,7 +20,12 @@ bool textStarted = false;
|
||||
bool identifierStarted = false;
|
||||
bool sof;
|
||||
|
||||
/*
|
||||
* TODO
|
||||
*/
|
||||
void trimString(std::string &str, std::string trimChars) {
|
||||
|
||||
// Noņem kreisās puses simbolus.
|
||||
int padSize = 0;
|
||||
bool pad = false;
|
||||
for (size_t i = 0; i < str.size(); ++i) {
|
||||
@@ -39,6 +44,8 @@ void trimString(std::string &str, std::string trimChars) {
|
||||
if (padSize > 0) {
|
||||
str.erase(0, padSize);
|
||||
}
|
||||
|
||||
// Noņem labās puses simbolus.
|
||||
padSize = 0;
|
||||
pad = false;
|
||||
for (size_t i = str.size(); i-- > 0;) {
|
||||
@@ -59,7 +66,12 @@ void trimString(std::string &str, std::string trimChars) {
|
||||
}
|
||||
}
|
||||
|
||||
void makeTokenWithTokenBuffer(
|
||||
/*
|
||||
* Izveido tekstvienību, iegūstot to no bufera beigām.
|
||||
* Ja buferī ir teksta vienība pirms tekstvienības, pievieno to pirms beigu
|
||||
* tekstvienības.
|
||||
*/
|
||||
void tokenWithBuffer(
|
||||
TokenType ttype,
|
||||
size_t tokenLen,
|
||||
TokenType textType
|
||||
@@ -91,7 +103,10 @@ void makeTokenWithTokenBuffer(
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
/*
|
||||
* Pārveido simbolu virkni tekstvienību sarakstā.
|
||||
* */
|
||||
Result<std::vector<Token>> tokenizeMdem(const std::string& content) {
|
||||
row = 1;
|
||||
column = 1;
|
||||
previousRow = 1;
|
||||
@@ -100,31 +115,28 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
tokens.clear();
|
||||
buffer.clear();
|
||||
|
||||
if (fileRunes.find_first_not_of(" \n\t") == std::string::npos) {
|
||||
// Beidz, ja satur tikai tukšumus vai neko.
|
||||
if (content.find_first_not_of(" \n\t") == std::string::npos) {
|
||||
return {tokens, ""};
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < fileRunes.size(); ++i) {
|
||||
char c = fileRunes[i];
|
||||
for (size_t i = 0; i < content.size(); ++i) {
|
||||
char c = content[i];
|
||||
|
||||
// AdvancePointer
|
||||
// Apstrādā īpašos simbolus un tekstu.
|
||||
if (c == '\n') {
|
||||
row += 1;
|
||||
column = 0;
|
||||
}
|
||||
|
||||
// Add escape char
|
||||
if (c == '\\') {
|
||||
i += 1;
|
||||
if (i < fileRunes.size()) {
|
||||
buffer.push_back(fileRunes[i]);
|
||||
if (i < content.size()) {
|
||||
buffer.push_back(content[i]);
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
buffer.push_back(c);
|
||||
}
|
||||
|
||||
// SkipWhitetext
|
||||
if (!textStarted) {
|
||||
if (c == '\n') {
|
||||
previousRow += 1;
|
||||
@@ -138,10 +150,10 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
}
|
||||
}
|
||||
|
||||
// EmitTokens
|
||||
// Emitē tekstvienības.
|
||||
switch (c) {
|
||||
case '[': {
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::CooldownStart,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
@@ -160,7 +172,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
tokens[i].column
|
||||
};
|
||||
}
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::CooldownEnd,
|
||||
1,
|
||||
TokenType::Cooldown
|
||||
@@ -171,7 +183,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
identifierStarted = false;
|
||||
} break;
|
||||
case '-': {
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::ElementDashStart,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
@@ -181,7 +193,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
textStarted = false;
|
||||
} break;
|
||||
case '^': {
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::ElementOrderModifier,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
@@ -191,7 +203,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
textStarted = false;
|
||||
} break;
|
||||
case ':': {
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::MatchGroupEnd,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
@@ -201,7 +213,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
textStarted = false;
|
||||
} break;
|
||||
case '>': {
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::QuestionEnd,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
@@ -211,7 +223,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
textStarted = false;
|
||||
} break;
|
||||
case '+': {
|
||||
makeTokenWithTokenBuffer(
|
||||
tokenWithBuffer(
|
||||
TokenType::ElementPlusStart,
|
||||
1,
|
||||
TokenType::TextFragment
|
||||
@@ -225,7 +237,8 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
column += 1;
|
||||
}
|
||||
|
||||
makeTokenWithTokenBuffer(
|
||||
// Pievieno beigu simbolu, lai atvieglotu parsēšanu.
|
||||
tokenWithBuffer(
|
||||
TokenType::EndOfFile,
|
||||
0,
|
||||
TokenType::TextFragment
|
||||
@@ -235,7 +248,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
std::cout << "SECTION: Lexer output:\n";
|
||||
std::cout << std::format("Token count: {}", tokens.size()) << std::endl;
|
||||
for (const Token& token : tokens) {
|
||||
std::cout << token.ToString();
|
||||
std::cout << token.toString();
|
||||
}
|
||||
std::cout << "SECTION END: Lexer output\n";
|
||||
}
|
||||
@@ -243,17 +256,7 @@ Result<std::vector<Token>> tokenizeMdem(const std::string& fileRunes) {
|
||||
return {tokens, ""};
|
||||
}
|
||||
|
||||
std::regex nextLineExp(
|
||||
"\n",
|
||||
std::regex_constants::ECMAScript
|
||||
);
|
||||
|
||||
std::regex doubleSpaceExp(
|
||||
"\\s\\s+",
|
||||
std::regex_constants::ECMAScript
|
||||
);
|
||||
|
||||
std::string Token::ToString(const TokenType* ttype) {
|
||||
std::string Token::toString(const TokenType* ttype) {
|
||||
switch (*ttype) {
|
||||
case TokenType::TextFragment: return "text fragment";
|
||||
case TokenType::QuestionEnd: return "question end symbol";
|
||||
@@ -270,15 +273,17 @@ std::string Token::ToString(const TokenType* ttype) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string Token::ToString() const {
|
||||
std::string Token::toString() const {
|
||||
std::string contentStr = content;
|
||||
static const std::regex nextLineExp("\n", std::regex_constants::ECMAScript);
|
||||
static const std::regex doubleSpaceExp("\\s\\s+", std::regex_constants::ECMAScript);
|
||||
if (tokenType == TokenType::TextFragment) {
|
||||
contentStr = std::regex_replace(contentStr, nextLineExp, "");
|
||||
contentStr = std::regex_replace(contentStr, doubleSpaceExp, " ");
|
||||
}
|
||||
return std::format(
|
||||
"{}: \"{}\" ({}:{})\n",
|
||||
ToString(&tokenType),
|
||||
toString(&tokenType),
|
||||
contentStr,
|
||||
row,
|
||||
column
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <format>
|
||||
|
||||
@@ -15,123 +14,85 @@
|
||||
#include "parser.h"
|
||||
#include "stringUtils.h"
|
||||
|
||||
struct QuestionElement {
|
||||
bool isDash;
|
||||
bool isGroup;
|
||||
std::string content;
|
||||
};
|
||||
typedef std::map<TokenType, std::vector<TokenType>> TokenAutomata;
|
||||
|
||||
std::string MultiElementQuestion::toString() const {
|
||||
std::stringstream ss;
|
||||
for (const auto& choice : choices) {
|
||||
char opener;
|
||||
if (type == MultiElementType::Order) {
|
||||
opener = '^';
|
||||
} else if (choice.isCorrect) {
|
||||
opener = '+';
|
||||
} else {
|
||||
opener = '-';
|
||||
}
|
||||
ss << opener << " " << choice.answer << "; ";
|
||||
}
|
||||
return std::format(
|
||||
"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
|
||||
section,
|
||||
cooldown,
|
||||
questionText,
|
||||
ss.str()
|
||||
);
|
||||
}
|
||||
|
||||
std::string GroupQuestion::toString() const {
|
||||
std::stringstream ss;
|
||||
for (auto group: groups) {
|
||||
ss << group.name << ": ";
|
||||
for (auto el: group.elements) {
|
||||
ss << el << ", ";
|
||||
}
|
||||
ss << "; ";
|
||||
}
|
||||
return std::format(
|
||||
"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
|
||||
section,
|
||||
cooldown,
|
||||
questionText,
|
||||
ss.str()
|
||||
);
|
||||
}
|
||||
|
||||
// Automaton for validating token transitions
|
||||
std::map<TokenType, std::vector<TokenType>> automata;
|
||||
|
||||
bool contains(const std::vector<TokenType>& vec, TokenType element) {
|
||||
return std::find(vec.begin(), vec.end(), element) != vec.end();
|
||||
}
|
||||
|
||||
// Automata for validating the parser state
|
||||
std::map<TokenType, std::vector<TokenType>> parserAutomata() {
|
||||
std::map<TokenType, std::vector<TokenType>> automata;
|
||||
automata[TokenType::TextFragment] = {
|
||||
TokenAutomata *automata = nullptr;
|
||||
/*
|
||||
* Galīgs automāts, kas nosaka, kādā secībā ir var būt tekstvienības.
|
||||
* */
|
||||
void initParserAutomata() {
|
||||
automata = new TokenAutomata;
|
||||
(*automata)[TokenType::TextFragment] = {
|
||||
TokenType::QuestionEnd,
|
||||
TokenType::ElementDashStart,
|
||||
TokenType::ElementPlusStart,
|
||||
TokenType::MatchGroupEnd,
|
||||
TokenType::EndOfFile,
|
||||
};
|
||||
automata[TokenType::MatchGroupEnd] = {
|
||||
(*automata)[TokenType::MatchGroupEnd] = {
|
||||
TokenType::ElementDashStart
|
||||
};
|
||||
automata[TokenType::QuestionEnd] = {
|
||||
(*automata)[TokenType::QuestionEnd] = {
|
||||
TokenType::ElementDashStart,
|
||||
TokenType::ElementPlusStart
|
||||
};
|
||||
automata[TokenType::ElementDashStart] = {
|
||||
(*automata)[TokenType::ElementDashStart] = {
|
||||
TokenType::CooldownStart,
|
||||
TokenType::TextFragment,
|
||||
TokenType::ElementOrderModifier
|
||||
};
|
||||
automata[TokenType::ElementOrderModifier] = {
|
||||
(*automata)[TokenType::ElementOrderModifier] = {
|
||||
TokenType::TextFragment
|
||||
};
|
||||
automata[TokenType::ElementPlusStart] = {
|
||||
(*automata)[TokenType::ElementPlusStart] = {
|
||||
TokenType::TextFragment
|
||||
};
|
||||
automata[TokenType::Cooldown] = {
|
||||
(*automata)[TokenType::Cooldown] = {
|
||||
TokenType::CooldownEnd,
|
||||
};
|
||||
automata[TokenType::CooldownStart] = {
|
||||
(*automata)[TokenType::CooldownStart] = {
|
||||
TokenType::Cooldown
|
||||
};
|
||||
automata[TokenType::CooldownEnd] = {
|
||||
(*automata)[TokenType::CooldownEnd] = {
|
||||
TokenType::TextFragment
|
||||
};
|
||||
automata[TokenType::StartOfFile] = {
|
||||
(*automata)[TokenType::StartOfFile] = {
|
||||
TokenType::TextFragment,
|
||||
TokenType::ElementDashStart,
|
||||
TokenType::EndOfFile
|
||||
};
|
||||
automata[TokenType::EndOfFile] = {};
|
||||
return automata;
|
||||
(*automata)[TokenType::EndOfFile] = {};
|
||||
}
|
||||
|
||||
std::string capitalize(const std::string& str) {
|
||||
/*
|
||||
* Pārbauda, vai vai tekstvienību sarakstu akceptē atbilst atbilst valodas
|
||||
* automāts.
|
||||
* */
|
||||
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
|
||||
if (!automata) {
|
||||
initParserAutomata();
|
||||
}
|
||||
for (size_t i = 0; i < tokens.size() - 1; ++i) {
|
||||
Token token = tokens[i];
|
||||
Token nextToken = tokens[i + 1];
|
||||
if (
|
||||
std::find(
|
||||
(*automata)[token.tokenType].begin(),
|
||||
(*automata)[token.tokenType].end(),
|
||||
nextToken.tokenType
|
||||
) == (*automata)[token.tokenType].end()) {
|
||||
|
||||
auto capitalize = [](const std::string& str) {
|
||||
if (str.empty()) return str;
|
||||
std::string result = str;
|
||||
result[0] = std::towupper(result[0]);
|
||||
return result;
|
||||
}
|
||||
|
||||
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
|
||||
automata = parserAutomata();
|
||||
for (size_t i = 0; i < tokens.size() - 1; ++i) {
|
||||
Token token = tokens[i];
|
||||
Token nextToken = tokens[i + 1];
|
||||
if (!contains(automata[token.tokenType], nextToken.tokenType)) {
|
||||
};
|
||||
return {
|
||||
.error=std::format(
|
||||
"Invalid token sequence: {} cannot precede {}",
|
||||
std::string(capitalize(Token::ToString(&token.tokenType))),
|
||||
std::string(capitalize(Token::ToString(&nextToken.tokenType)))
|
||||
std::string(capitalize(Token::toString(&token.tokenType))),
|
||||
std::string(capitalize(Token::toString(&nextToken.tokenType)))
|
||||
),
|
||||
.row=token.row,
|
||||
.column=token.column
|
||||
@@ -141,17 +102,6 @@ Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
|
||||
return {};
|
||||
}
|
||||
|
||||
time_t parseToUTCTime(const std::string datetime, std::string format) {
|
||||
std::tm tm = {};
|
||||
std::istringstream ss(datetime);
|
||||
ss >> std::get_time(&tm, format.c_str());
|
||||
if (ss.fail()) {
|
||||
throw std::runtime_error("Failed to parse datetime string");
|
||||
}
|
||||
std::time_t time = timegm(&tm);
|
||||
return time;
|
||||
}
|
||||
|
||||
// @Fix: Prevent duplicate group names and questions in ordered question (to
|
||||
// simplify checking in practice).
|
||||
Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
|
||||
@@ -192,6 +142,17 @@ Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
|
||||
|
||||
if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) {
|
||||
try {
|
||||
auto parseToUTCTime = [](const std::string datetime, std::string format) {
|
||||
std::tm tm = {};
|
||||
std::istringstream ss(datetime);
|
||||
ss >> std::get_time(&tm, format.c_str());
|
||||
if (ss.fail()) {
|
||||
throw std::runtime_error("Failed to parse datetime string");
|
||||
}
|
||||
std::time_t time = timegm(&tm);
|
||||
return time;
|
||||
};
|
||||
|
||||
time = parseToUTCTime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M");
|
||||
} catch (std::exception e) {
|
||||
return makeResult(
|
||||
@@ -396,3 +357,43 @@ Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
|
||||
Token()
|
||||
);
|
||||
}
|
||||
|
||||
std::string MultiElementQuestion::toString() const {
|
||||
std::stringstream ss;
|
||||
for (const auto& choice : choices) {
|
||||
char opener;
|
||||
if (type == MultiElementType::Order) {
|
||||
opener = '^';
|
||||
} else if (choice.isCorrect) {
|
||||
opener = '+';
|
||||
} else {
|
||||
opener = '-';
|
||||
}
|
||||
ss << opener << " " << choice.answer << "; ";
|
||||
}
|
||||
return std::format(
|
||||
"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
|
||||
section,
|
||||
cooldown,
|
||||
questionText,
|
||||
ss.str()
|
||||
);
|
||||
}
|
||||
|
||||
std::string GroupQuestion::toString() const {
|
||||
std::stringstream ss;
|
||||
for (auto group: groups) {
|
||||
ss << group.name << ": ";
|
||||
for (auto el: group.elements) {
|
||||
ss << el << ", ";
|
||||
}
|
||||
ss << "; ";
|
||||
}
|
||||
return std::format(
|
||||
"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
|
||||
section,
|
||||
cooldown,
|
||||
questionText,
|
||||
ss.str()
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user