text content cleaning happens in parsing & adjusted the approach

This commit is contained in:
jorenchik
2024-09-29 11:32:11 +03:00
parent 031803cbad
commit 586ad72e70
5 changed files with 37 additions and 28 deletions

View File

@@ -0,0 +1,2 @@
#pragma once
std::string cleanContent(std::string answer);

View File

@@ -10,7 +10,6 @@
#include <qtoolbutton.h>
#include <qwindow.h>
#include <qwindowdefs.h>
#include <regex>
#include <string>
#include <QApplication>
@@ -37,6 +36,7 @@
#include "api.h"
#include "parser.h"
#include "trainWindow.h"
#include "stringUtils.h"
struct Page {
int start;
@@ -158,21 +158,6 @@ QToolButton *load;
QToolButton *practice;
const std::regex doubleSpaceExp(
" ",
std::regex_constants::ECMAScript | std::regex_constants::icase
);
const std::regex tabExp(
"\t",
std::regex_constants::ECMAScript | std::regex_constants::icase
);
const std::regex newLineExp(
"\n",
std::regex_constants::ECMAScript | std::regex_constants::icase
);
void CreateMdems(std::vector<Question*>& questions) {
hMdemScroll->removeItem(mdemSpacer);
@@ -191,12 +176,6 @@ void CreateMdems(std::vector<Question*>& questions) {
}
}
auto transformAnswer = [](std::string answer) -> std::string {
answer = std::regex_replace(answer, doubleSpaceExp, " ");
answer = std::regex_replace(answer, tabExp, "");
answer = std::regex_replace(answer, newLineExp, "");
return answer;
};
for (size_t i = 0; i < questions.size(); ++i) {
if (MultiElementQuestion* mw = dynamic_cast<MultiElementQuestion*>(questions[i])) {
@@ -206,7 +185,6 @@ void CreateMdems(std::vector<Question*>& questions) {
auto choices = mw->Choices;
for (size_t k = 0; k < choices.size(); ++k) {
auto answer = choices[k].Answer;
answer = transformAnswer(answer);
answer = std::format("- {}", answer);
if (k < mdems[i]->backLabels.size()) {
mdems[i]->backLabels[k]->setText(QString::fromStdString(answer));

View File

@@ -10,6 +10,7 @@ add_library(
parser.cpp
time.cpp
api.cpp
stringUtils.cpp
)
add_executable(transpiler ${SOURCES})

View File

@@ -11,6 +11,7 @@
#include "lexer.h"
#include "result.h"
#include "parser.h"
#include "stringUtils.h"
struct QuestionElement {
bool isDash;
@@ -324,12 +325,14 @@ Result<std::vector<Question*>> ParseQuestions(const std::vector<Token>& tokens)
if (questionElement.isGroup) {
++k;
auto group = Group();
group.name = questionElement.content;
group.name = cleanContent(questionElement.content);
question->Groups.push_back(group);
} else {
if (k >= 0) {
question->Groups[k].elements.push_back(
cleanContent(
questionElement.content
)
);
}
}
@@ -341,11 +344,11 @@ Result<std::vector<Question*>> ParseQuestions(const std::vector<Token>& tokens)
} else {
auto *question = new MultiElementQuestion();
question->ID = id;
question->QuestionText = questionText;
question->QuestionText = cleanContent(questionText);
question->Section = section;
for (const auto& elem : questionElements) {
Choice choice;
choice.Answer = elem.content;
choice.Answer = cleanContent(elem.content);
choice.IsCorrect = !elem.isDash;
question->Choices.push_back(choice);
}

View File

@@ -0,0 +1,25 @@
#include <regex>
#include "stringUtils.h"
const std::regex doubleOrMoreSpaceExp(
"\\s\\s+",
std::regex_constants::ECMAScript | std::regex_constants::icase
);
const std::regex tabExp(
"\\t+",
std::regex_constants::ECMAScript | std::regex_constants::icase
);
const std::regex newLineExp(
"\\n+",
std::regex_constants::ECMAScript | std::regex_constants::icase
);
std::string cleanContent(std::string answer) {
answer = std::regex_replace(answer, newLineExp, "");
answer = std::regex_replace(answer, tabExp, " ");
answer = std::regex_replace(answer, doubleOrMoreSpaceExp, " ");
return answer;
}