mirror of
https://github.com/jorenchik/mdemory.git
synced 2026-03-22 00:26:21 +00:00
307 lines
8.2 KiB
C++
307 lines
8.2 KiB
C++
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <format>
|
|
#include <regex>
|
|
|
|
#include "lexer.h"
|
|
#include "config.h"
|
|
#include "result.h"
|
|
|
|
std::vector<Token> tokens;
|
|
std::vector<char> buffer;
|
|
int32_t row;
|
|
int32_t column;
|
|
int32_t previousRow;
|
|
int32_t previousColumn;
|
|
bool textStarted = false;
|
|
bool cooldownStarted = false;
|
|
bool sof;
|
|
|
|
/*
|
|
* Noņem norādītos simbolus no simbolu virknes kreisās un labās pusēs.
|
|
* Simboli tiek noņemti līdz tiek sastapts simbols, kas nav norādīts.
|
|
*/
|
|
void trimString(std::string *str, std::string trimChars) {
|
|
|
|
// Noņem kreisās puses simbolus.
|
|
int padSize = 0;
|
|
bool pad = false;
|
|
for (size_t i = 0; i < str->size(); ++i) {
|
|
for (size_t k = 0; k < trimChars.size(); ++k) {
|
|
if ((*str)[i] == trimChars[k]) {
|
|
padSize++;
|
|
pad = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!pad) {
|
|
break;
|
|
}
|
|
pad = false;
|
|
}
|
|
if (padSize > 0) {
|
|
str->erase(0, padSize);
|
|
}
|
|
|
|
// Noņem labās puses simbolus.
|
|
padSize = 0;
|
|
pad = false;
|
|
for (size_t i = str->size(); i-- > 0;) {
|
|
for (size_t k = 0; k < trimChars.size(); ++k) {
|
|
if ((*str)[i] == trimChars[k]) {
|
|
padSize++;
|
|
pad = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!pad) {
|
|
break;
|
|
}
|
|
pad = false;
|
|
}
|
|
if (padSize > 0) {
|
|
str->erase(str->end() - padSize, str->end());
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Izveido tekstvienību, iegūstot to no bufera beigām.
|
|
* Ja buferī ir teksta vienība pirms tekstvienības, pievieno to pirms beigu
|
|
* tekstvienības.
|
|
*/
|
|
void tokenWithBuffer(
|
|
TokenType ttype,
|
|
size_t tokenLen,
|
|
TokenType textType
|
|
) {
|
|
std::string token(buffer.end() - tokenLen, buffer.end());
|
|
if (buffer.size() > tokenLen) {
|
|
std::string prevFragment(buffer.begin(), buffer.end() - tokenLen);
|
|
trimString(&prevFragment, " \n\t");
|
|
if (prevFragment.length() > 0) {
|
|
tokens.push_back(Token{
|
|
textType,
|
|
prevFragment,
|
|
previousRow,
|
|
previousColumn
|
|
});
|
|
}
|
|
}
|
|
buffer.clear();
|
|
|
|
tokens.push_back(Token{
|
|
ttype,
|
|
token,
|
|
row,
|
|
column
|
|
});
|
|
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
buffer.clear();
|
|
}
|
|
|
|
/*
|
|
* Pārveido simbolu virkni tekstvienību sarakstā.
|
|
* */
|
|
Result<std::vector<Token>> tokenizeMdem(const std::string& content) {
|
|
row = 1;
|
|
column = 1;
|
|
previousRow = 1;
|
|
previousColumn = 1;
|
|
textStarted = false;
|
|
tokens.clear();
|
|
buffer.clear();
|
|
|
|
// Beidz, ja satur tikai tukšumus vai neko.
|
|
if (content.find_first_not_of(" \n\t") == std::string::npos) {
|
|
return {tokens, ""};
|
|
}
|
|
|
|
for (size_t i = 0; i < content.size(); ++i) {
|
|
char c = content[i];
|
|
|
|
// Pavirza faila norādi un papildina buferi.
|
|
if (c == '\n') {
|
|
row += 1;
|
|
column = 0;
|
|
}
|
|
if (c == '\\') {
|
|
// Simbolus, kas abilst citām tekstvienībām, var ievadīt,
|
|
// ja pirms tiem ieliek '\' simbolu.
|
|
i += 1;
|
|
if (i < content.size()) {
|
|
buffer.push_back(content[i]);
|
|
}
|
|
continue;
|
|
} else {
|
|
buffer.push_back(c);
|
|
}
|
|
// Iepriekšējā tekstvienības pozīcijas uzturēšana.
|
|
if (!textStarted) {
|
|
if (c == '\n') {
|
|
previousRow += 1;
|
|
previousColumn = 1;
|
|
} else if (c == ' ') {
|
|
previousColumn += 1;
|
|
} else if (c == '\t') {
|
|
previousColumn += 4;
|
|
} else {
|
|
textStarted = true;
|
|
}
|
|
}
|
|
|
|
// Izveido viena simbola tekstvienības, ja tāda ir sastapta.
|
|
switch (c) {
|
|
case '[': {
|
|
tokenWithBuffer(
|
|
TokenType::CooldownStart,
|
|
1,
|
|
TokenType::TextFragment
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
|
|
// Karodziņš, lai zinātu, kad ir jānosaka pārtraukuma
|
|
// tekstvienību.
|
|
cooldownStarted = true;
|
|
} break;
|
|
case ']': {
|
|
if (!cooldownStarted) {
|
|
return {
|
|
tokens,
|
|
"Nevar beigt pārtraukuma norādīšanu, ja tas nav iesākts",
|
|
tokens[i].row,
|
|
tokens[i].column
|
|
};
|
|
}
|
|
tokenWithBuffer(
|
|
TokenType::CooldownEnd,
|
|
1,
|
|
TokenType::Cooldown
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
cooldownStarted = false;
|
|
} break;
|
|
case '-': {
|
|
tokenWithBuffer(
|
|
TokenType::ElementDashStart,
|
|
1,
|
|
TokenType::TextFragment
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
} break;
|
|
case '^': {
|
|
tokenWithBuffer(
|
|
TokenType::ElementOrderModifier,
|
|
1,
|
|
TokenType::TextFragment
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
} break;
|
|
case ':': {
|
|
tokenWithBuffer(
|
|
TokenType::MatchGroupEnd,
|
|
1,
|
|
TokenType::TextFragment
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
} break;
|
|
case '>': {
|
|
tokenWithBuffer(
|
|
TokenType::QuestionEnd,
|
|
1,
|
|
TokenType::TextFragment
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
} break;
|
|
case '+': {
|
|
tokenWithBuffer(
|
|
TokenType::ElementPlusStart,
|
|
1,
|
|
TokenType::TextFragment
|
|
);
|
|
previousRow = row;
|
|
previousColumn = column;
|
|
textStarted = false;
|
|
} break;
|
|
default:{
|
|
} break;
|
|
}
|
|
|
|
column += 1;
|
|
}
|
|
|
|
// Pievieno beigu simbolu, lai atvieglotu parsēšanu.
|
|
tokenWithBuffer(
|
|
TokenType::EndOfFile,
|
|
0,
|
|
TokenType::TextFragment
|
|
);
|
|
|
|
if (debug) {
|
|
std::cout << "SECTION: LEXER:\n";
|
|
std::cout << std::format("Tekstvienību daudzums: {}", tokens.size()) << std::endl;
|
|
for (const Token& token : tokens) {
|
|
std::cout << token.toString();
|
|
}
|
|
std::cout << "SECTION END: LEXER\n";
|
|
}
|
|
|
|
// Leksiskā analīze ir veiksmīga - neatgriež kļūdu.
|
|
return {tokens, ""};
|
|
}
|
|
|
|
/*
|
|
* Tekstvienības nosaukums latviešu valodā.
|
|
*/
|
|
std::string Token::toString(const TokenType* ttype) {
|
|
switch (*ttype) {
|
|
case TokenType::TextFragment: return "teksta fragments";
|
|
case TokenType::QuestionEnd: return "jautājuma beigas";
|
|
case TokenType::MatchGroupEnd: return "grupas beigas";
|
|
case TokenType::ElementDashStart: return "svītras elementa sākums";
|
|
case TokenType::ElementOrderModifier: return "secības elementa sākums";
|
|
case TokenType::ElementPlusStart: return "plusa elementa sākums";
|
|
case TokenType::Cooldown: return "pārtraukums";
|
|
case TokenType::CooldownStart: return "pātraukuma sākums";
|
|
case TokenType::CooldownEnd: return "pātraukuma beigas";
|
|
case TokenType::EndOfFile: return "faila beigas";
|
|
default: return "neatpazīta tekstvienība";
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Tekstvienību reprezentējoša simbolu virkne atkļūdošanai.
|
|
*/
|
|
std::string Token::toString() const {
|
|
std::string contentStr = content;
|
|
static const std::regex nextLineExp("\n", std::regex_constants::ECMAScript);
|
|
static const std::regex doubleSpaceExp("\\s\\s+", std::regex_constants::ECMAScript);
|
|
if (tokenType == TokenType::TextFragment) {
|
|
contentStr = std::regex_replace(contentStr, nextLineExp, "");
|
|
contentStr = std::regex_replace(contentStr, doubleSpaceExp, " ");
|
|
}
|
|
return std::format(
|
|
"{}: \"{}\" ({}:{})\n",
|
|
toString(&tokenType),
|
|
contentStr,
|
|
row,
|
|
column
|
|
);
|
|
}
|