Files
mdemory/src/transpiler/parser.cpp
2024-10-27 11:18:06 +02:00

399 lines
11 KiB
C++

#include <cstdio>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <sstream>
#include <format>
#include "config.h"
#include "lexer.h"
#include "result.h"
#include "parser.h"
#include "stringUtils.h"
struct QuestionElement {
bool isDash;
bool isGroup;
std::string content;
};
std::string MultiElementQuestion::toString() const {
std::stringstream ss;
for (const auto& choice : choices) {
char opener;
if (type == MultiElementType::Order) {
opener = '^';
} else if (choice.isCorrect) {
opener = '+';
} else {
opener = '-';
}
ss << opener << " " << choice.answer << "; ";
}
return std::format(
"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
section,
cooldown,
questionText,
ss.str()
);
}
std::string GroupQuestion::toString() const {
std::stringstream ss;
for (auto group: groups) {
ss << group.name << ": ";
for (auto el: group.elements) {
ss << el << ", ";
}
ss << "; ";
}
return std::format(
"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
section,
cooldown,
questionText,
ss.str()
);
}
// Automaton for validating token transitions
std::map<TokenType, std::vector<TokenType>> automata;
bool contains(const std::vector<TokenType>& vec, TokenType element) {
return std::find(vec.begin(), vec.end(), element) != vec.end();
}
// Automata for validating the parser state
std::map<TokenType, std::vector<TokenType>> parserAutomata() {
std::map<TokenType, std::vector<TokenType>> automata;
automata[TokenType::TextFragment] = {
TokenType::QuestionEnd,
TokenType::ElementDashStart,
TokenType::ElementPlusStart,
TokenType::MatchGroupEnd,
TokenType::EndOfFile,
};
automata[TokenType::MatchGroupEnd] = {
TokenType::ElementDashStart
};
automata[TokenType::QuestionEnd] = {
TokenType::ElementDashStart,
TokenType::ElementPlusStart
};
automata[TokenType::ElementDashStart] = {
TokenType::CooldownStart,
TokenType::TextFragment,
TokenType::ElementOrderModifier
};
automata[TokenType::ElementOrderModifier] = {
TokenType::TextFragment
};
automata[TokenType::ElementPlusStart] = {
TokenType::TextFragment
};
automata[TokenType::Cooldown] = {
TokenType::CooldownEnd,
};
automata[TokenType::CooldownStart] = {
TokenType::Cooldown
};
automata[TokenType::CooldownEnd] = {
TokenType::TextFragment
};
automata[TokenType::StartOfFile] = {
TokenType::TextFragment,
TokenType::ElementDashStart,
TokenType::EndOfFile
};
automata[TokenType::EndOfFile] = {};
return automata;
}
std::string capitalize(const std::string& str) {
if (str.empty()) return str;
std::string result = str;
result[0] = std::towupper(result[0]);
return result;
}
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
automata = parserAutomata();
for (size_t i = 0; i < tokens.size() - 1; ++i) {
Token token = tokens[i];
Token nextToken = tokens[i + 1];
if (!contains(automata[token.tokenType], nextToken.tokenType)) {
return {
.error=std::format(
"Invalid token sequence: {} cannot precede {}",
std::string(capitalize(Token::ToString(&token.tokenType))),
std::string(capitalize(Token::ToString(&nextToken.tokenType)))
),
.row=token.row,
.column=token.column
};
}
}
return {};
}
time_t parseToUTCTime(const std::string datetime, std::string format) {
std::tm tm = {};
std::istringstream ss(datetime);
ss >> std::get_time(&tm, format.c_str());
if (ss.fail()) {
throw std::runtime_error("Failed to parse datetime string");
}
std::time_t time = timegm(&tm);
return time;
}
// @Fix: Prevent duplicate group names and questions in ordered question (to
// simplify checking in practice).
Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
auto questions = std::vector<Question*>();
time_t time = 0;
auto makeResult = [&questions, &time](std::string error, Token token) -> Result<ParseInfo> {
return {
{ questions, time },
error,
token.row,
token.column
};
};
if (tokens.size() == 0) {
return makeResult("", Token());
}
auto result = ValidateGrammar(tokens);
if (result.error.length() > 0) {
return makeResult(
result.error,
Token{.row=result.row, .column=result.column}
);
}
std::string section;
size_t i = 0;
if (debug) {
std::cout << "SECTION: Parser output:\n";
}
auto isInBounds = [tokens](size_t i) {
return i < tokens.size() && tokens[i].tokenType != TokenType::EndOfFile;
};
if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) {
try {
time = parseToUTCTime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M");
} catch (std::exception e) {
return makeResult(
std::format("cannot parse the time - {}", e.what()),
tokens[i]
);
}
i++;
}
while (i < tokens.size()) {
if (tokens[i].tokenType == TokenType::ElementDashStart) {
std::string questionText;
std::vector<QuestionElement> questionElements;
double cooldown;
bool isOrderQuestion = false;
bool isGroupQuestion = false;
bool isPlusQuestion = false;
// Start element parsing & add to the offset.
if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
return makeResult(
"cannot have order modifier ('^') in the question definition",
tokens[i + 1]
);
}
if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::CooldownStart) {
try {
cooldown = std::stod(tokens[i + 2].content);
} catch (std::exception e) {
return makeResult(
"error parsing cooldown",
tokens[i + 1]
);
}
questionText = tokens[i + 4].content;
i += 6;
} else {
cooldown = 0;
questionText = tokens[i + 1].content;
i += 3;
}
// Parse elements of a question.
while (isInBounds(i)) {
// Check question end.
if (isInBounds(i + 3) && tokens[i].tokenType == TokenType::ElementDashStart) {
// Distance to the possible question end.
size_t offset;
if (tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
offset = tokens[i + 2].tokenType == TokenType::CooldownStart ? 6 : 3;
} else {
offset = tokens[i + 1].tokenType == TokenType::CooldownStart ? 5 : 2;
}
if (isInBounds(i + offset) && tokens[i + offset].tokenType == TokenType::QuestionEnd) {
break;
}
if (offset == 5 && tokens[i + 5].tokenType != TokenType::QuestionEnd) {
// Cannot place the identifier on the ordinary element.
return makeResult(
"Invalid identifier placement",
tokens[i]
);
}
}
// Determine element type.
bool isDash;
bool isGroup = false;
bool isOrder = false;
if (tokens[i].tokenType == TokenType::ElementDashStart) {
isDash = true;
} else {
isDash = false;
isPlusQuestion = true;
}
if (isInBounds(i+1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
isOrder = true;
isOrderQuestion = true;
if (!isDash) {
return makeResult(
"order questions can only be used with dashes ('-')",
tokens[i]
);
}
if (isGroupQuestion) {
return makeResult(
"question with groups cannot be ordered ('-^' and ':')",
tokens[i]
);
}
if (isInBounds(i + 3) && tokens[i + 3].tokenType == TokenType::MatchGroupEnd) {
return makeResult(
"cannot have groups in order question('-^' and ':')",
tokens[i]
);
}
}
if (isInBounds(i + 2) && tokens[i + 2].tokenType == TokenType::MatchGroupEnd) {
isGroup = true;
isGroupQuestion = true;
if (!isDash) {
return makeResult(
"group questions can only be used with dashes ('-')",
tokens[i]
);
}
}
QuestionElement questionElement;
questionElement.isDash = isDash;
questionElement.isGroup = isGroup;
if (isOrder) {
questionElement.content = tokens[i + 2].content;
} else {
questionElement.content = tokens[i + 1].content;
}
questionElements.push_back(questionElement);
size_t offset = 2;
if (isOrder) {
offset += 1;
}
if (isGroup) {
offset += 1;
}
i += offset;
}
if (questionElements.size() > 0) {
if (isGroupQuestion) {
auto *question = new GroupQuestion();
question->cooldown = cooldown;
question->questionText = questionText;
question->section = section;
int32_t k = -1;
for (size_t i = 0; i < questionElements.size(); ++i) {
auto questionElement = questionElements[i];
if (questionElement.isGroup) {
++k;
auto group = Group();
group.name = cleanContent(questionElement.content);
question->groups.push_back(group);
} else {
if (k >= 0) {
question->groups[k].elements.push_back(
cleanContent(
questionElement.content
)
);
}
}
}
questions.push_back(question);
if (debug) {
std::cout << question->toString() << "\n";
}
} else {
auto *question = new MultiElementQuestion();
question->cooldown = cooldown;
question->questionText = cleanContent(questionText);
question->section = section;
for (const auto& elem : questionElements) {
Choice choice;
choice.answer = cleanContent(elem.content);
choice.isCorrect = !elem.isDash;
question->choices.push_back(choice);
}
questions.push_back(question);
if (isPlusQuestion) {
question->type = MultiElementType::MultiChoice;
} else if (isOrderQuestion) {
question->type = MultiElementType::Order;
} else {
question->type = MultiElementType::Regular;
}
if (debug) {
std::cout << question->toString() << "\n";
}
}
}
} else if (tokens[i].tokenType == TokenType::EndOfFile) {
if (debug) {
std::cout << "File terminated: EndOfFile\n";
}
break;
} else {
return makeResult(
"Unexpected token encountered",
tokens[i]
);
}
}
if (debug) {
std::cout << "SECTION END: Parser output:\n";
}
return makeResult(
"",
Token()
);
}