Separate parser package

2026-03-22 00:26:21 +00:00 · 2024-08-04 22:38:01 +03:00
parent 1164a10846
commit e824115fae
2 changed files with 29 additions and 25 deletions
--- a/src/compiler/parser/parser.go
+++ b/src/compiler/parser/parser.go
@@ -0,0 +1,245 @@
+package parser
+
+import (
+	"fmt"
+	"strings"
+	"log"
+	"github.com/jorenchik/mdemory/src/compiler/lexer"
+)
+
+type Question interface {
+	ToString() string;
+}
+
+type SingleAnswerQuestion struct {
+	id string;
+	question string;
+	answer string;
+	section string;
+}
+
+type Choice struct {
+	answer string;
+	isCorrect bool;
+}
+
+type MultipleChoiceQuestion struct {
+	id string;
+	question string;
+	choices []Choice;
+	section string;
+}
+
+func (question SingleAnswerQuestion)ToString() string {
+	return fmt.Sprintf(
+		"<Single choice> (%s) %s: %s\n",
+		question.section,
+		strings.Trim(question.question, "\t\n "),
+		strings.Trim(question.answer, "\t\n "),
+	)
+}
+
+func (question MultipleChoiceQuestion)ToString() string {
+	acc := ""
+	acc += fmt.Sprintf(
+		"<Multi choice> (%s) %s\n",
+		question.section,
+		question.question,
+	)
+	for _, el := range question.choices {
+		opener := '-'
+		if (el.isCorrect) {
+			opener = '+'
+		}
+		acc += fmt.Sprintf("\t%c %s\n", opener, strings.Trim(el.answer, "\t\n "))
+	}
+	return acc
+}
+
+type QuestionElement struct {
+	isDash bool;
+	content string;
+} 
+
+var automata map[lexer.TokenType][]lexer.TokenType 
+
+type CompilerErr struct {
+	message string;	
+	row	     int32;
+	column   int32;
+}
+
+func (e CompilerErr) Error() string {
+	return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
+}
+
+func contains(s []lexer.TokenType, e lexer.TokenType) bool {
+    for _, a := range s {
+        if a == e {
+            return true
+        }
+    }
+    return false
+}
+
+func parserAutomata() map[lexer.TokenType][]lexer.TokenType {
+	automata := make(map[lexer.TokenType][]lexer.TokenType)
+	automata[lexer.TextFragment] = []lexer.TokenType{
+		lexer.QuestionEnd,
+		lexer.ElementDashStart,
+		lexer.ElementPlusStart,
+		lexer.SectionIdentifierStart,
+		lexer.SectionStart,
+		lexer.EOF,
+		lexer.SectionEnd,
+	}
+	automata[lexer.QuestionEnd] = []lexer.TokenType{
+		lexer.ElementDashStart, lexer.ElementPlusStart,
+	}               
+	automata[lexer.ElementDashStart] = []lexer.TokenType{
+		lexer.IdentifierStart, lexer.TextFragment,
+	}          
+	automata[lexer.ElementPlusStart] = []lexer.TokenType{
+		lexer.TextFragment,
+	}          
+	automata[lexer.Identifier] = []lexer.TokenType{
+		lexer.IdentifierEnd, lexer.SectionStart,
+	}                
+	automata[lexer.IdentifierStart] = []lexer.TokenType{
+		lexer.Identifier,
+	}            
+	automata[lexer.IdentifierEnd] = []lexer.TokenType{
+		lexer.TextFragment,
+	}             
+	automata[lexer.SectionIdentifierStart] = []lexer.TokenType{
+		lexer.Identifier,
+	}
+	automata[lexer.SectionStart] = []lexer.TokenType{
+		lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
+	}
+	automata[lexer.SectionEnd] = []lexer.TokenType{
+		lexer.SectionIdentifierStart, lexer.ElementDashStart, lexer.EOF,
+	}
+	automata[lexer.SOF] = []lexer.TokenType{
+		lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
+	}
+	automata[lexer.EOF] = []lexer.TokenType{}
+	return automata
+}
+
+func ValidateGrammar(tokens []lexer.Token) error {
+	automata = parserAutomata()
+	for i := 0; i < len(tokens) - 1; i++ {
+		token := tokens[i]
+		nextToken := tokens[i + 1]
+		if (!contains(automata[token.TokenType], nextToken.TokenType)) {
+			return CompilerErr{
+				message: fmt.Sprintf(
+					"Token %s cannot precede %s\n",
+					lexer.ToString(&token.TokenType),
+					lexer.ToString(&nextToken.TokenType),
+				),
+				row: token.Row,
+				column: token.Column,
+			}
+		}
+	} 
+	return nil
+}
+
+func ParseQuestions(fileContents string) ([]Question, error) {
+	tokens, err := lexer.TokenizeMdem([]rune(fileContents))
+	if (err != nil) {
+		return nil, err
+	}
+	if (true) {
+		log.Println("Lexer output:")
+		for _, el := range tokens {
+			fmt.Print(el.ToString())
+		}
+	}
+
+	err = ValidateGrammar(tokens)
+	if (err != nil) {
+		log.Fatal(err.Error())
+	}
+
+	questions := []Question{}
+	section := ""
+	i := 0
+	for {
+		if (i >= len(tokens)) {
+			break
+		}
+		if (tokens[i].TokenType == lexer.ElementDashStart) {
+			id := tokens[i + 2].Content
+			question := tokens[i + 4].Content
+			quesitonElements := []QuestionElement{}
+			i += 6
+			for {
+				if (i + 1 >= len(tokens) ||
+					!(tokens[i].TokenType == lexer.ElementDashStart ||
+						tokens[i].TokenType == lexer.ElementPlusStart) ||
+					tokens[i+1].TokenType == lexer.IdentifierStart) {
+					break
+				}
+				questionElement := QuestionElement{}
+				if (tokens[i].TokenType == lexer.ElementDashStart) {
+					questionElement.isDash = true 
+				} else {
+					questionElement.isDash = false 
+				}
+				questionElement.content = tokens[i+1].Content
+				quesitonElements = append(quesitonElements, questionElement)
+				i += 2
+			}
+			if len(quesitonElements) > 1 {
+				question := MultipleChoiceQuestion{
+					id: id,
+					question: question,
+				}
+				choices := []Choice{}
+				for k := 0; k < len(quesitonElements); k++ {
+					choice := Choice{}
+					choice.answer = quesitonElements[k].content
+					choice.isCorrect = !quesitonElements[k].isDash
+					choices = append(choices, choice)
+				}		
+				if (section != "") {
+					question.section = section
+				}
+				question.choices = choices
+				questions = append(questions, question)
+			} else if (len(quesitonElements) == 1) {
+				question := SingleAnswerQuestion{
+					id: id,
+					question: question,
+					answer: quesitonElements[0].content,
+				}
+				if (section != "") {
+					question.section = section
+				}
+				questions = append(questions, question)
+			}
+		} else if (tokens[i].TokenType == lexer.SectionIdentifierStart) {
+			section = tokens[i + 1].Content
+			i += 3;
+		} else if (tokens[i].TokenType == lexer.SectionEnd) {
+			section = ""
+			i += 1
+		} else if (tokens[i].TokenType == lexer.EOF) {
+			break
+		} else {
+			log.Fatalf(
+				"Not handled: %s",
+				lexer.ToString(&tokens[i].TokenType),
+			)
+			return nil, CompilerErr{
+				message: "",
+				row: tokens[i].Row,
+				column: tokens[i].Column,
+			}
+		}
+	}
+	return questions, nil
+}