mdemory/src/transpiler/parser.cpp

#include <cstdio>
#include <ctime>
#include <exception>
#include <iomanip>
#include <iostream>
#include <regex>
#include <stdexcept>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <sstream>
#include <format>

#include "config.h"
#include "lexer.h"
#include "result.h"
#include "parser.h"
#include "stringUtils.h"

// Uz Windows sistēmās timegm funkcijai ir atšķirīgs nosaukums.
#ifdef _WIN32
#define timegm _mkgmtime
#endif

typedef std::map<TokenType, std::vector<TokenType>> TokenTransitions;

TokenTransitions *transitions = nullptr;
/*
 * Tekstvienību secības pārejas, kas nosaka, kādā secībā tekstvienības var būt.
 * Pāreja no tekstvienības A uz tekstvienību B ir atļauta, tikai ja sarakstā ar
 * atslēgu tekstvienībā A ir tekstvienība B.
 * */
void initTransitions() {
	transitions = new TokenTransitions;
    (*transitions)[TokenType::TextFragment] = {
        TokenType::QuestionEnd,
        TokenType::ElementDashStart,
        TokenType::ElementPlusStart,
        TokenType::MatchGroupEnd,
        TokenType::EndOfFile,
    };
    (*transitions)[TokenType::MatchGroupEnd] = {
		TokenType::ElementDashStart,
		TokenType::EndOfFile
	};
    (*transitions)[TokenType::QuestionEnd] = {
        TokenType::ElementDashStart,
        TokenType::ElementPlusStart
    };
    (*transitions)[TokenType::ElementDashStart] = {
        TokenType::CooldownStart,
        TokenType::TextFragment,
        TokenType::ElementOrderModifier
    };
    (*transitions)[TokenType::ElementOrderModifier] = {
		TokenType::TextFragment
    };
    (*transitions)[TokenType::ElementPlusStart] = {
        TokenType::TextFragment
    };
    (*transitions)[TokenType::Cooldown] = {
        TokenType::CooldownEnd,
    };
    (*transitions)[TokenType::CooldownStart] = {
        TokenType::Cooldown
    };
    (*transitions)[TokenType::CooldownEnd] = {
        TokenType::TextFragment
    };
    (*transitions)[TokenType::EndOfFile] = {};
}

/*
 * Pārbauda, vai tekstvienību saraksts atbilst valodas
 * definētām pieļaujamām pārejām.
 * */
Result<NoneType> ValidateGrammar(const std::vector<Token>& tokens) {
	if (!transitions) {
		initTransitions();
	}
    for (size_t i = 0; i < tokens.size() - 1; ++i) {
        Token token = tokens[i];
        Token nextToken = tokens[i + 1];
        if (
			std::find(
				(*transitions)[token.tokenType].begin(),
				(*transitions)[token.tokenType].end(),
				nextToken.tokenType
			) == (*transitions)[token.tokenType].end()) {


			auto capitalize = [](const std::string& str) {
				if (str.empty()) return str;
				std::string result = str;
				result[0] = std::toupper(result[0]);
				return result;
			};
			return {
				.error=std::format(
					"Nekorekta tekstvienību secība: \"{}\" nevar būt pirms \"{}\"",
					std::string(capitalize(Token::toString(&token.tokenType))),
					std::string(capitalize(Token::toString(&nextToken.tokenType)))
				),
				.row=token.row,
				.column=token.column
			};
        }
    }
	return {};
}

/*
 * Apstrādā tekstvienības, iegūstot datumu un laiku un vienu vai vairākus
 * dažāda veida jautājumus. Veiksmes gadījumā atgriež jautājumu norādes un
 * datumu un laiku, ja tāds bija norādīts. Kļūdas gadījumā atgriež jautājumu
 * norādes un kļūdu ar to atrašanās vietu failā. Atrašanās vieta ir nosakāma no
 * atrašanās informācijas tekstvienību objektos.
 * */
Result<ParseInfo> parseQuestions(const std::vector<Token>& tokens) {
    auto questions = std::vector<Question*>();
	time_t time = 0;

	// Palīgfunkcija - atgriež jautājumus un laiku ar norādītu tekstvienības
	// kļūdas informāciju.
	auto makeResult = [&questions, &time](std::string error, Token token)
			-> Result<ParseInfo> {
		return {
			{ questions, time },
			error,
			token.row,
			token.column
		};
	};

	// Sākotnējā validācija.
	if (tokens.size() == 0) {
		return makeResult("", Token());
	}
    auto result = ValidateGrammar(tokens);
    if (result.error.length() > 0) {
		return makeResult(
			result.error,
			Token{.row=result.row, .column=result.column}
		);
    }

    size_t i = 0;

	if (debug) {
		std::cout << "SECTION: PARSER:\n";
	}

	// Palīgfunkcija - pārbauda, vai tekstvienības saraksta indeksā ir
	// elements, kas nav faila beigas.
	auto isInBounds = [tokens](size_t i) {
		return i < tokens.size() && tokens[i].tokenType != TokenType::EndOfFile;
	};

	// Sākuma datumu un laiku mēģina nolasīt, ja tāds ir norādīts.
	if (isInBounds(i) && tokens[i].tokenType == TokenType::TextFragment) {

		// Pārbauda datuma un laika formātu.
		auto datetimeContent = cleanContent(tokens[i].content);
		static const std::regex datetimeExp(
			"^\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d \\d\\d?:\\d\\d?$",
			std::regex_constants::ECMAScript | std::regex_constants::icase
		);
		if (!std::regex_match(datetimeContent, datetimeExp)) {
			return makeResult(
				"Nekorekts datuma un laika formāts",
				tokens[i]
			);
		}

		// Parsē datumu un laiku.
		const std::string format = "%d.%m.%Y %H:%M";
		const std::string datetime = datetimeContent.c_str();
		std::tm tm = {};
		std::istringstream ss(datetime);
		ss >> std::get_time(&tm, format.c_str());
		if (ss.fail()) {
			return makeResult(
				"Neizdevās nolasīt datuma un laiku",
				tokens[i]
			);
		}
	    time = timegm(&tm);
		i++;
	}

	// Pamata parsēšana.
    while (i < tokens.size()) {
        if (tokens[i].tokenType == TokenType::ElementDashStart) {
			std::string	questionText;
            std::vector<QuestionElement> questionElements;
            double cooldown;
			bool isOrderQuestion     = false;
			bool isGroupQuestion     = false;
			bool isPlusQuestion      = false;
			bool hasGroupEncountered = false;
			Token questionStartToken;

			// Šajā momentā ir sagaidāms jautājuma sākums - tam nevar būt secības modifikators.
            if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
				return makeResult(
					"Nevar izmantot secības modifikatoru ('^') jautājuma sākumā",
					tokens[i + 1]);
			}

			// Piefiksē sākumu, lai varētu sniegt labāku kļūdas
			// atrašanās vietu kļūdas gadījumā.
			questionStartToken = tokens[i];

			// Apstrādā pārtraukumu, ja tāds ir.
			bool hasCooldown;
            if (isInBounds(i + 1) && tokens[i + 1].tokenType == TokenType::CooldownStart) {
				try {
					auto cooldownContent = tokens[i + 2].content;
					// Pārbauda, vai dotais pārtraukums ir viens skaitlis, kas ir:
					// - pozitīvs;
					// - viens no: vesels skaitlis vai ar punktu atdalīts skaitlis
					//   ar norādīto vai bez norādītās veselās daļas.
					static const std::regex decimalNumExp(
						"^\\d*(\\.\\d+)?$",
						std::regex_constants::ECMAScript | std::regex_constants::icase
					);
					if (!std::regex_match(cooldownContent, decimalNumExp)) {
						throw std::invalid_argument("Nekorekts pārtraukuma formāts");
					}
					cooldown = std::stod(cooldownContent);
				} catch (std::exception e) {
					return makeResult(
						"Nekorekts pārtraukums. Pārtraukums ir viens pozitīvs "
						"decimāls skaitlis ar punktu vai bez punkta",
						tokens[i + 1]
					);
				}
                questionText = tokens[i + 4].content;
				hasCooldown = true;
            } else {
                cooldown = 0;
                questionText = tokens[i + 1].content;
				hasCooldown = false;
            }

			int questionStartOffset = hasCooldown ? 5 : 2;
			// Pārbauda, vai jautājums ir nobeigts ar korektu simbolu.
			if (isInBounds(questionStartOffset) &&
				tokens[i + questionStartOffset].tokenType != TokenType::QuestionEnd) {
				return makeResult(
					"Jautājumu var iesākt tikai ar \">\"",
					tokens[i + questionStartOffset]
				);
			}
			i += hasCooldown ? 6 : 3;

			// Jautājumu elementu parsēšana.
            while (isInBounds(i)) {

				// Pārbauda, vai nav sastapts cits jautājuma sākums, un noslēdz, ja tas tā ir.
                if (isInBounds(i + 3) && tokens[i].tokenType == TokenType::ElementDashStart) {

					// Jautājumam var būt un var nebūt pārtraukums - nosaka vai tas būtu.
					size_t offset;
					if (tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
						offset = tokens[i + 2].tokenType == TokenType::CooldownStart ? 6 : 3;
					} else {
						offset = tokens[i + 1].tokenType == TokenType::CooldownStart ? 5 : 2;
					}
                    if (isInBounds(i + offset) && tokens[i + offset].tokenType == TokenType::QuestionEnd) {
						break;
					}

                    if (offset == 5 && tokens[i + 5].tokenType != TokenType::QuestionEnd) {
                        return makeResult(
							"Jautājuma elementam nevar būt pārtraukums",
							tokens[i]
						);
                    }
                }

				// Jautājuma elementa noteikšana un ar to saistītās kļūdas.
				bool isDash;
				bool isGroup = false;
				bool isOrder = false;

				// Elementa sākums.
                if (tokens[i].tokenType == TokenType::ElementDashStart) {
					isDash = true;
				} else {
					isDash = false;
					isPlusQuestion = true;
					if (isGroupQuestion) {
						return makeResult(
						    "Jautājumos ar grupām nevar būt secības elementu ('+' and ':')",
							tokens[i]
						);
					}
					if (isOrderQuestion) {
						return makeResult(
							"Secības jautājumos nevar būt izvēles  elementu ('-^' and '+')",
							tokens[i]
						);
					}
				}

				// Elementa secības modifikators.
                if (isInBounds(i+1) && tokens[i + 1].tokenType == TokenType::ElementOrderModifier) {
					isOrder = true;
					isOrderQuestion = true;
					if (!isDash) {
						return makeResult(
							"Secības jautājumus var izmantot tikai ar svītrām ('-')",
							tokens[i]
						);
					}
					if (isGroupQuestion) {
						return makeResult(
						    "Jautājumos ar grupām nevar būt secības elementu ('-^' and ':')",
							tokens[i]
						);
					}
					if (isPlusQuestion) {
						return makeResult(
							"Izvēles jautājumos nevar būt secības elementu ('+' and '-^')",
							tokens[i]
						);
					}
					if (isInBounds(i + 3) && tokens[i + 3].tokenType == TokenType::MatchGroupEnd) {
						return makeResult(
							"Secības jautājumā nevar būt grupas ('-^' and ':')",
							tokens[i]
						);
					}
				}

				// Elementa grupas modifikators.
				if (isInBounds(i + 2) && tokens[i + 2].tokenType == TokenType::MatchGroupEnd) {
					isGroup             = true;
					isGroupQuestion     = true;
					if (!isDash) {
						return makeResult(
							"Grupas jautājumus var definēt tikai ar svītru elementiem ('-')",
							tokens[i]
						);
					}
					if (!hasGroupEncountered) {
						if (questionElements.size() > 0) {
							return makeResult(
								"Elementi grupas jautājumā nevar eksistēt bez grupas",
								tokens[i]
							);
						}
					}
					hasGroupEncountered = true;
				}

				// Izveido atbilstoša veida jautājuma elementu.
                QuestionElement questionElement;
                questionElement.isDash  = isDash;
				questionElement.isGroup = isGroup;
				if (isOrder) {
					questionElement.content = tokens[i + 2].content;
				} else {
					questionElement.content = tokens[i + 1].content;
				}
                questionElements.push_back(questionElement);

                // Nākamā elementa atrašanās vieta ir atkarīga no elementu
                // veida, kas ir sastapts.
                size_t offset = 2;
				if (isOrder) {
					offset += 1;
				}
				if (isGroup) {
					offset += 1;
				}
                i += offset;
            }

			// Izveido jautājuma objektu.
			// Fix: else block - jautājums bez elementiem.
            if (questionElements.size() > 0) {
				if (isGroupQuestion) {
					auto *question = new GroupQuestion();
					question->cooldown     = cooldown;
					question->questionText = cleanContent(questionText);

					// Izveido grupas; i - elementu iterators; k - grupu iterators.
					int32_t k = -1;
					for (size_t i = 0; i < questionElements.size(); ++i) {
						auto questionElement = questionElements[i];
						if (questionElement.isGroup) {
							++k;
							auto group = Group();
							group.name = cleanContent(questionElement.content);
							question->groups.push_back(group);
						} else {
							if (k >= 0) {
								question->groups[k].elements.push_back(
									cleanContent(
										questionElement.content
									)
								);
							}
						}
					}
					questions.push_back(question);
					if (debug) {
						std::cout << question->toString() << "\n";
					}
				} else {
					auto *question = new MultiElementQuestion();
					question->cooldown = cooldown;
					question->questionText = cleanContent(questionText);

					// Izveido vairāku elementu jautājumu.
					auto existingElements = std::set<std::string>();
					for (const auto& elem : questionElements) {
						Choice choice;
						choice.answer = cleanContent(elem.content);
						choice.isCorrect = !elem.isDash;

						// Secības elementiem nedrīkst būt vienādi elementi.
						if (isOrderQuestion) {
							if (existingElements.contains(choice.answer)) {
								return makeResult(
									"Secības jautājumi atbildes nedrīkst atkārtoties",
									questionStartToken
								);
							} else {
								question->choices.push_back(choice);
								existingElements.insert(choice.answer);
							}
						} else {
							question->choices.push_back(choice);
						}
					}
					questions.push_back(question);

					// Uzstāda vairāku elementu jautājuma specializēto veidu.
					if (isPlusQuestion) {
						question->type = MultiElementType::MultiChoice;
					} else if (isOrderQuestion) {
						question->type = MultiElementType::Order;
					} else {
						question->type = MultiElementType::Regular;
					}
					if (debug) {
						std::cout << question->toString() << "\n";
					}
				}
            } else {
				return makeResult(
					"Jautājums nevar būt bez atbildes elementiem",
					questionStartToken
				);
			}
        } else if (tokens[i].tokenType == TokenType::EndOfFile) {
			if (debug) {
				std::cout << "Fails beidzās: EndOfFile\n";
			}
            break;
        } else {
            return makeResult(
				"Negaidīta tekstvienība",
				tokens[i]
			);
        }
    }

	if (debug) {
		std::cout << "SECTION END: PARSER:\n";
	}
    return makeResult(
		"",
		Token()
	);
}

/*
 * Simbolu virkne, kas attēlo jautājumu atkļūdošanai.
 */
std::string MultiElementQuestion::toString() const {
    std::stringstream ss;
    for (const auto& choice : choices) {
        char opener;
		if (type == MultiElementType::Order) {
            opener = '^';
		} else if (choice.isCorrect) {
            opener = '+';
        } else {
            opener = '-';
        }
        ss << opener << " " << choice.answer << "; ";
    }
	return std::format(
		"<Vairāku elementu jautājums>\npārtraukums:{}\n{}\n{}",
		cooldown,
		questionText,
		ss.str()
    );
}

/*
 * Simbolu virkne, kas attēlo jautājumu atkļūdošanai.
 */
std::string GroupQuestion::toString() const {
	std::stringstream ss;
	for (auto group: groups) {
		ss << group.name << ": ";
		for (auto el: group.elements) {
			ss << el << ", ";
		}
		ss << "; ";
	}
	return std::format(
		"<Grupas jautājums>\npārtraukums:{}\n{}\n{}",
		cooldown,
		questionText,
		ss.str()
	);
}