mirror of
https://github.com/jorenchik/mdemory.git
synced 2026-03-22 00:26:21 +00:00
restrutured removing the go source
This commit is contained in:
387
src/transpiler/parser.cpp
Normal file
387
src/transpiler/parser.cpp
Normal file
@@ -0,0 +1,387 @@
|
||||
#include <cstdio>
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <format>
|
||||
|
||||
#include "config.h"
|
||||
#include "lexer.h"
|
||||
#include "result.h"
|
||||
#include "parser.h"
|
||||
#include "stringUtils.h"
|
||||
|
||||
struct QuestionElement {
|
||||
bool isDash;
|
||||
bool isGroup;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
std::string MultiElementQuestion::ToString() const {
|
||||
std::stringstream ss;
|
||||
for (const auto& choice : Choices) {
|
||||
char opener;
|
||||
if (type == MultiElementType::Order) {
|
||||
opener = '^';
|
||||
} else if (choice.IsCorrect) {
|
||||
opener = '+';
|
||||
} else {
|
||||
opener = '-';
|
||||
}
|
||||
ss << opener << " " << choice.Answer << "; ";
|
||||
}
|
||||
return std::format(
|
||||
"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
|
||||
Section,
|
||||
Cooldown,
|
||||
QuestionText,
|
||||
ss.str()
|
||||
);
|
||||
}
|
||||
|
||||
std::string GroupQuestion::ToString() const {
|
||||
std::stringstream ss;
|
||||
for (auto group: Groups) {
|
||||
ss << group.name << ": ";
|
||||
for (auto el: group.elements) {
|
||||
ss << el << ", ";
|
||||
}
|
||||
ss << "; ";
|
||||
}
|
||||
return std::format(
|
||||
"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
|
||||
Section,
|
||||
Cooldown,
|
||||
QuestionText,
|
||||
ss.str()
|
||||
);
|
||||
}
|
||||
|
||||
// Automaton for validating token transitions
|
||||
std::map<TokenType, std::vector<TokenType>> automata;
|
||||
|
||||
bool contains(const std::vector<TokenType>& vec, TokenType element) {
|
||||
return std::find(vec.begin(), vec.end(), element) != vec.end();
|
||||
}
|
||||
|
||||
// Automata for validating the parser state
|
||||
std::map<TokenType, std::vector<TokenType>> parserAutomata() {
|
||||
std::map<TokenType, std::vector<TokenType>> automata;
|
||||
automata[TokenType::TextFragment] = {
|
||||
TokenType::QuestionEnd,
|
||||
TokenType::ElementDashStart,
|
||||
TokenType::ElementPlusStart,
|
||||
TokenType::MatchGroupEnd,
|
||||
TokenType::EndOfFile,
|
||||
};
|
||||
automata[TokenType::MatchGroupEnd] = {
|
||||
TokenType::ElementDashStart
|
||||
};
|
||||
automata[TokenType::QuestionEnd] = {
|
||||
TokenType::ElementDashStart,
|
||||
TokenType::ElementPlusStart
|
||||
};
|
||||
automata[TokenType::ElementDashStart] = {
|
||||
TokenType::CooldownStart,
|
||||
TokenType::TextFragment,
|
||||
TokenType::ElementOrderModifier
|
||||
};
|
||||
automata[TokenType::ElementOrderModifier] = {
|
||||
TokenType::TextFragment
|
||||
};
|
||||
automata[TokenType::ElementPlusStart] = {
|
||||
TokenType::TextFragment
|
||||
};
|
||||
automata[TokenType::Cooldown] = {
|
||||
TokenType::CooldownEnd,
|
||||
};
|
||||
automata[TokenType::CooldownStart] = {
|
||||
TokenType::Cooldown
|
||||
};
|
||||
automata[TokenType::CooldownEnd] = {
|
||||
TokenType::TextFragment
|
||||
};
|
||||
automata[TokenType::StartOfFile] = {
|
||||
TokenType::TextFragment,
|
||||
TokenType::ElementDashStart,
|
||||
TokenType::EndOfFile
|
||||
};
|
||||
automata[TokenType::EndOfFile] = {};
|
||||
return automata;
|
||||
}
|
||||
|
||||
std::string capitalize(const std::string& str) {
|
||||
if (str.empty()) return str;
|
||||
std::string result = str;
|
||||
result[0] = std::towupper(result[0]);
|
||||
return result;
|
||||
}
|
||||
|
||||
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
|
||||
automata = parserAutomata();
|
||||
for (size_t i = 0; i < tokens.size() - 1; ++i) {
|
||||
Token token = tokens[i];
|
||||
Token nextToken = tokens[i + 1];
|
||||
if (!contains(automata[token.tokenType], nextToken.tokenType)) {
|
||||
return {
|
||||
.error=std::format(
|
||||
"Invalid token sequence: {} cannot precede {}",
|
||||
std::string(capitalize(Token::ToString(&token.tokenType))),
|
||||
std::string(capitalize(Token::ToString(&nextToken.tokenType)))
|
||||
),
|
||||
.row=token.row,
|
||||
.column=token.column
|
||||
};
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
|
||||
auto questions = std::vector<Question*>();
|
||||
time_t time = 0;
|
||||
|
||||
auto makeResult = [&questions, &time](std::string error, Token token) -> Result<ParseInfo> {
|
||||
return {
|
||||
{ questions, time },
|
||||
error,
|
||||
token.row,
|
||||
token.column
|
||||
};
|
||||
};
|
||||
|
||||
if (tokens.size() == 0) {
|
||||
return makeResult("", Token());
|
||||
}
|
||||
|
||||
auto result = ValidateGrammar(tokens);
|
||||
if (result.error.length() > 0) {
|
||||
return makeResult(
|
||||
result.error,
|
||||
Token{.row=result.row, .column=result.column}
|
||||
);
|
||||
}
|
||||
|
||||
std::string section;
|
||||
size_t i = 0;
|
||||
|
||||
if (debug) {
|
||||
std::cout << "SECTION: Parser output:\n";
|
||||
}
|
||||
|
||||
auto isInBounds = [tokens](size_t i) {
|
||||
return i < tokens.size() && tokens[i].tokenType != TokenType::EndOfFile;
|
||||
};
|
||||
|
||||
if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) {
|
||||
std::tm tm = {};
|
||||
try {
|
||||
strptime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M", &tm);
|
||||
} catch (std::exception e) {
|
||||
return makeResult(
|
||||
std::format("cannot parse the time - {}", e.what()),
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
time = mktime(&tm);
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < tokens.size()) {
|
||||
if (tokens[i].tokenType == TokenType::ElementDashStart) {
|
||||
std::string questionText;
|
||||
std::vector<QuestionElement> questionElements;
|
||||
double cooldown;
|
||||
bool isOrderQuestion = false;
|
||||
bool isGroupQuestion = false;
|
||||
bool isPlusQuestion = false;
|
||||
|
||||
// Start element parsing & add to the offset.
|
||||
if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
|
||||
return makeResult(
|
||||
"cannot have order modifier ('^') in the question definition",
|
||||
tokens[i + 1]
|
||||
);
|
||||
}
|
||||
if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::CooldownStart) {
|
||||
try {
|
||||
cooldown = std::stod(tokens[i + 2].content);
|
||||
} catch (std::exception e) {
|
||||
return makeResult(
|
||||
"error parsing cooldown",
|
||||
tokens[i + 1]
|
||||
);
|
||||
}
|
||||
questionText = tokens[i + 4].content;
|
||||
i += 6;
|
||||
} else {
|
||||
cooldown = 0;
|
||||
questionText = tokens[i + 1].content;
|
||||
i += 3;
|
||||
}
|
||||
|
||||
// Parse elements of a question.
|
||||
while (isInBounds(i)) {
|
||||
|
||||
// Check question end.
|
||||
if (isInBounds(i + 3) && tokens[i].tokenType == TokenType::ElementDashStart) {
|
||||
// Distance to the possible question end.
|
||||
size_t offset;
|
||||
if (tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
|
||||
offset = tokens[i + 2].tokenType == TokenType::CooldownStart ? 6 : 3;
|
||||
} else {
|
||||
offset = tokens[i + 1].tokenType == TokenType::CooldownStart ? 5 : 2;
|
||||
}
|
||||
if (isInBounds(i + offset) && tokens[i + offset].tokenType == TokenType::QuestionEnd) {
|
||||
break;
|
||||
}
|
||||
if (offset == 5 && tokens[i + 5].tokenType != TokenType::QuestionEnd) {
|
||||
// Cannot place the identifier on the ordinary element.
|
||||
return makeResult(
|
||||
"Invalid identifier placement",
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine element type.
|
||||
bool isDash;
|
||||
bool isGroup = false;
|
||||
bool isOrder = false;
|
||||
if (tokens[i].tokenType == TokenType::ElementDashStart) {
|
||||
isDash = true;
|
||||
} else {
|
||||
isDash = false;
|
||||
isPlusQuestion = true;
|
||||
}
|
||||
if (isInBounds(i+1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
|
||||
isOrder = true;
|
||||
isOrderQuestion = true;
|
||||
if (!isDash) {
|
||||
return makeResult(
|
||||
"order questions can only be used with dashes ('-')",
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
if (isGroupQuestion) {
|
||||
return makeResult(
|
||||
"question with groups cannot be ordered ('-^' and ':')",
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
if (isInBounds(i + 3) && tokens[i + 3].tokenType == TokenType::MatchGroupEnd) {
|
||||
return makeResult(
|
||||
"cannot have groups in order question('-^' and ':')",
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
if (isInBounds(i + 2) && tokens[i + 2].tokenType == TokenType::MatchGroupEnd) {
|
||||
isGroup = true;
|
||||
isGroupQuestion = true;
|
||||
if (!isDash) {
|
||||
return makeResult(
|
||||
"group questions can only be used with dashes ('-')",
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
QuestionElement questionElement;
|
||||
questionElement.isDash = isDash;
|
||||
questionElement.isGroup = isGroup;
|
||||
if (isOrder) {
|
||||
questionElement.content = tokens[i + 2].content;
|
||||
} else {
|
||||
questionElement.content = tokens[i + 1].content;
|
||||
}
|
||||
questionElements.push_back(questionElement);
|
||||
|
||||
size_t offset = 2;
|
||||
if (isOrder) {
|
||||
offset += 1;
|
||||
}
|
||||
if (isGroup) {
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
i += offset;
|
||||
}
|
||||
|
||||
if (questionElements.size() > 0) {
|
||||
if (isGroupQuestion) {
|
||||
auto *question = new GroupQuestion();
|
||||
question->Cooldown = cooldown;
|
||||
question->QuestionText = questionText;
|
||||
question->Section = section;
|
||||
int32_t k = -1;
|
||||
for (size_t i = 0; i < questionElements.size(); ++i) {
|
||||
auto questionElement = questionElements[i];
|
||||
if (questionElement.isGroup) {
|
||||
++k;
|
||||
auto group = Group();
|
||||
group.name = cleanContent(questionElement.content);
|
||||
question->Groups.push_back(group);
|
||||
} else {
|
||||
if (k >= 0) {
|
||||
question->Groups[k].elements.push_back(
|
||||
cleanContent(
|
||||
questionElement.content
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
questions.push_back(question);
|
||||
if (debug) {
|
||||
std::cout << question->ToString() << "\n";
|
||||
}
|
||||
} else {
|
||||
auto *question = new MultiElementQuestion();
|
||||
question->Cooldown = cooldown;
|
||||
question->QuestionText = cleanContent(questionText);
|
||||
question->Section = section;
|
||||
for (const auto& elem : questionElements) {
|
||||
Choice choice;
|
||||
choice.Answer = cleanContent(elem.content);
|
||||
choice.IsCorrect = !elem.isDash;
|
||||
question->Choices.push_back(choice);
|
||||
}
|
||||
questions.push_back(question);
|
||||
if (isPlusQuestion) {
|
||||
question->type = MultiElementType::MultiChoice;
|
||||
} else if (isOrderQuestion) {
|
||||
question->type = MultiElementType::Order;
|
||||
} else {
|
||||
question->type = MultiElementType::Regular;
|
||||
}
|
||||
if (debug) {
|
||||
std::cout << question->ToString() << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (tokens[i].tokenType == TokenType::EndOfFile) {
|
||||
if (debug) {
|
||||
std::cout << "File terminated: EndOfFile\n";
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
return makeResult(
|
||||
"Unexpected token encountered",
|
||||
tokens[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
std::cout << "SECTION END: Parser output:\n";
|
||||
}
|
||||
return makeResult(
|
||||
"",
|
||||
Token()
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user