mdemory/src/transpiler/parser.cpp

#include <cstdio>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <sstream>
#include <format>

#include "config.h"
#include "lexer.h"
#include "result.h"
#include "parser.h"
#include "stringUtils.h"

struct QuestionElement {
	bool isDash;
	bool isGroup;
    std::string content;
};

std::string MultiElementQuestion::toString() const {
    std::stringstream ss;
    for (const auto& choice : choices) {
        char opener;
		if (type == MultiElementType::Order) {
            opener = '^';
		} else if (choice.isCorrect) {
            opener = '+';
        } else {
            opener = '-';
        }
        ss << opener << " " << choice.answer << "; ";
    }
	return std::format(
		"<Multiple element>\nsection:{}\nid:{}\n{}\n{}",
		section,
		cooldown,
		questionText,
		ss.str()
    );
}

std::string GroupQuestion::toString() const {
	std::stringstream ss;
	for (auto group: groups) {
		ss << group.name << ": ";
		for (auto el: group.elements) {
			ss << el << ", ";
		}
		ss << "; ";
	}
	return std::format(
		"<GroupQuestion>\nsection:{}\nid:{}\n{}\n{}",
		section,
		cooldown,
		questionText,
		ss.str()
	);
}

// Automaton for validating token transitions
std::map<TokenType, std::vector<TokenType>> automata;

bool contains(const std::vector<TokenType>& vec, TokenType element) {
    return std::find(vec.begin(), vec.end(), element) != vec.end();
}

// Automata for validating the parser state
std::map<TokenType, std::vector<TokenType>> parserAutomata() {
    std::map<TokenType, std::vector<TokenType>> automata;
    automata[TokenType::TextFragment] = {
        TokenType::QuestionEnd,
        TokenType::ElementDashStart,
        TokenType::ElementPlusStart,
        TokenType::MatchGroupEnd,
        TokenType::EndOfFile,
    };
    automata[TokenType::MatchGroupEnd] = {
		TokenType::ElementDashStart
	};
    automata[TokenType::QuestionEnd] = {
        TokenType::ElementDashStart,
        TokenType::ElementPlusStart
    };
    automata[TokenType::ElementDashStart] = {
        TokenType::CooldownStart,
        TokenType::TextFragment,
        TokenType::ElementOrderModifier
    };
    automata[TokenType::ElementOrderModifier] = {
		TokenType::TextFragment
    };
    automata[TokenType::ElementPlusStart] = {
        TokenType::TextFragment
    };
    automata[TokenType::Cooldown] = {
        TokenType::CooldownEnd,
    };
    automata[TokenType::CooldownStart] = {
        TokenType::Cooldown
    };
    automata[TokenType::CooldownEnd] = {
        TokenType::TextFragment
    };
    automata[TokenType::StartOfFile] = {
		TokenType::TextFragment,
        TokenType::ElementDashStart,
        TokenType::EndOfFile
    };
    automata[TokenType::EndOfFile] = {};
    return automata;
}

std::string capitalize(const std::string& str) {
    if (str.empty()) return str;
    std::string result = str;
    result[0] = std::towupper(result[0]);
    return result;
}

Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
    automata = parserAutomata();
    for (size_t i = 0; i < tokens.size() - 1; ++i) {
        Token token = tokens[i];
        Token nextToken = tokens[i + 1];
        if (!contains(automata[token.tokenType], nextToken.tokenType)) {
			return {
				.error=std::format(
					"Invalid token sequence: {} cannot precede {}",
					std::string(capitalize(Token::ToString(&token.tokenType))),
					std::string(capitalize(Token::ToString(&nextToken.tokenType)))
				),
				.row=token.row,
				.column=token.column
			};
        }
    }
	return {};
}

time_t parseToUTCTime(const std::string datetime, std::string format) {
    std::tm tm = {};
    std::istringstream ss(datetime);
    ss >> std::get_time(&tm, format.c_str());
    if (ss.fail()) {
        throw std::runtime_error("Failed to parse datetime string");
    }
    std::time_t time = timegm(&tm);
    return time;
}

// @Fix: Prevent duplicate group names and questions in ordered question (to
// simplify checking in practice).
Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
    auto questions = std::vector<Question*>();
	time_t time = 0;

	auto makeResult = [&questions, &time](std::string error, Token token) -> Result<ParseInfo> {
		return {
			{ questions, time },
			error,
			token.row,
			token.column
		};
	};

	if (tokens.size() == 0) {
		return makeResult("", Token());
	}

    auto result = ValidateGrammar(tokens);
    if (result.error.length() > 0) {
		return makeResult(
			result.error,
			Token{.row=result.row, .column=result.column}
		);
    }

    std::string section;
    size_t i = 0;

	if (debug) {
		std::cout << "SECTION: Parser output:\n";
	}

	auto isInBounds = [tokens](size_t i) {
		return i < tokens.size() && tokens[i].tokenType != TokenType::EndOfFile;
	};

	if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) {
		try {
			time = parseToUTCTime(tokens[i].content.c_str(), "%d.%m.%Y %H:%M");
		} catch (std::exception e) {
			return makeResult(
				std::format("cannot parse the time - {}", e.what()),
				tokens[i]
			);
		}
		i++;
	}

    while (i < tokens.size()) {
        if (tokens[i].tokenType == TokenType::ElementDashStart) {
			std::string	questionText;
            std::vector<QuestionElement> questionElements;
            double cooldown;
			bool isOrderQuestion = false;
			bool isGroupQuestion = false;
			bool isPlusQuestion = false;

            // Start element parsing & add to the offset.
            if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
				return makeResult(
					"cannot have order modifier ('^') in the question definition",
					tokens[i + 1]
				);
			}
            if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::CooldownStart) {
				try {
					cooldown = std::stod(tokens[i + 2].content);
				} catch (std::exception e) {
					return makeResult(
						"error parsing cooldown",
						tokens[i + 1]
					);
				}
                questionText = tokens[i + 4].content;
                i += 6;
            } else {
                cooldown = 0;
                questionText = tokens[i + 1].content;
                i += 3;
            }

			// Parse elements of a question.
            while (isInBounds(i)) {

				// Check question end.
                if (isInBounds(i + 3) && tokens[i].tokenType == TokenType::ElementDashStart) {
					// Distance to the possible question end.
					size_t offset;
					if (tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
						offset = tokens[i + 2].tokenType == TokenType::CooldownStart ? 6 : 3;
					} else {
						offset = tokens[i + 1].tokenType == TokenType::CooldownStart ? 5 : 2;
					}
                    if (isInBounds(i + offset) && tokens[i + offset].tokenType == TokenType::QuestionEnd) {
						break;
					}
                    if (offset == 5 && tokens[i + 5].tokenType != TokenType::QuestionEnd) {
						// Cannot place the identifier on the ordinary element.
                        return makeResult(
							"Invalid identifier placement",
							tokens[i]
						);
                    }
                }

				// Determine element type.
				bool isDash;
				bool isGroup = false;
				bool isOrder = false;
                if (tokens[i].tokenType == TokenType::ElementDashStart) {
					isDash = true;
				} else {
					isDash = false;
					isPlusQuestion = true;
				}
                if (isInBounds(i+1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
					isOrder = true;
					isOrderQuestion = true;
					if (!isDash) {
						return makeResult(
							"order questions can only be used with dashes ('-')",
							tokens[i]
						);
					}
					if (isGroupQuestion) {
						return makeResult(
							"question with groups cannot be ordered ('-^' and ':')",
							tokens[i]
						);
					}
					if (isInBounds(i + 3) && tokens[i + 3].tokenType == TokenType::MatchGroupEnd) {
						return makeResult(
							"cannot have groups in order question('-^' and ':')",
							tokens[i]
						);
					}
				}
				if (isInBounds(i + 2) && tokens[i + 2].tokenType == TokenType::MatchGroupEnd) {
					isGroup         = true;
					isGroupQuestion = true;
					if (!isDash) {
						return makeResult(
							"group questions can only be used with dashes ('-')",
							tokens[i]
						);
					}
				}

                QuestionElement questionElement;
                questionElement.isDash  = isDash;
				questionElement.isGroup = isGroup;
				if (isOrder) {
					questionElement.content = tokens[i + 2].content;
				} else {
					questionElement.content = tokens[i + 1].content;
				}
                questionElements.push_back(questionElement);

				size_t offset = 2;
				if (isOrder) {
					offset += 1;
				}
				if (isGroup) {
					offset += 1;
				}

                i += offset;
            }

            if (questionElements.size() > 0) {
				if (isGroupQuestion) {
					auto *question = new GroupQuestion();
					question->cooldown     = cooldown;
					question->questionText = questionText;
					question->section      = section;
					int32_t k = -1;
					for (size_t i = 0; i < questionElements.size(); ++i) {
						auto questionElement = questionElements[i];
						if (questionElement.isGroup) {
							++k;
							auto group = Group();
							group.name = cleanContent(questionElement.content);
							question->groups.push_back(group);
						} else {
							if (k >= 0) {
								question->groups[k].elements.push_back(
									cleanContent(
										questionElement.content
									)
								);
							}
						}
					}
					questions.push_back(question);
					if (debug) {
						std::cout << question->toString() << "\n";
					}
				} else {
					auto *question = new MultiElementQuestion();
					question->cooldown = cooldown;
					question->questionText = cleanContent(questionText);
					question->section = section;
					for (const auto& elem : questionElements) {
						Choice choice;
						choice.answer = cleanContent(elem.content);
						choice.isCorrect = !elem.isDash;
						question->choices.push_back(choice);
					}
					questions.push_back(question);
					if (isPlusQuestion) {
						question->type = MultiElementType::MultiChoice;
					} else if (isOrderQuestion) {
						question->type = MultiElementType::Order;
					} else {
						question->type = MultiElementType::Regular;
					}
					if (debug) {
						std::cout << question->toString() << "\n";
					}
				}
            }
        } else if (tokens[i].tokenType == TokenType::EndOfFile) {
			if (debug) {
				std::cout << "File terminated: EndOfFile\n";
			}
            break;
        } else {
            return makeResult(
				"Unexpected token encountered",
				tokens[i]
			);
        }
    }

	if (debug) {
		std::cout << "SECTION END: Parser output:\n";
	}
    return makeResult(
		"",
		Token()
	);
}