Parsing

2026-03-22 00:26:21 +00:00 · 2024-08-04 14:41:10 +03:00
parent 26f008c670
commit 64a250bb77
2 changed files with 254 additions and 61 deletions
--- a/src/compiler/compiler.go
+++ b/src/compiler/compiler.go
@@ -7,11 +7,35 @@ import (
 	"strings"
 )

+var automata map[TokenType][]TokenType 
+
+type SingleAnswerQuestion struct {
+	id string;
+	question string;
+	answer string;
+	section string;
+}
+
+type Choice struct {
+	answer string;
+	isCorrect bool;
+}
+
+type ChoiceQuestion struct {
+	id string;
+	question string;
+	choices []Choice;
+	section string;
+}
+
+type Question interface {}
+
 func main() {
 	log.Println("Compilation started")

-	file, err := os.ReadFile("./input.mdem")
-	if (err != nil) { log.Fatal("Cannot open the input file")
+	file, err := os.ReadFile("/home/jorenchik/Code/mdemory/src/compiler/input.mdem")
+	if (err != nil) { 
+		log.Fatalf("Cannot open the input file: %s", err.Error())
 		return
 	}
 	fileContents := string(file)
@@ -22,13 +46,56 @@ func main() {
 		return
 	}

-	// prettyPrintTokens
-	for i := 0; i < len(tokens); i++ {
+	// defineParserAutomata 
+	automata = make(map[TokenType][]TokenType)
+	automata[TextFragment] = []TokenType{
+		QuestionEnd, ElementDashStart, ElementPlusStart, SectionIdentifierStart, SectionStart, EOF, SectionEnd,
+	}
+	automata[QuestionEnd] = []TokenType{
+		ElementDashStart, ElementPlusStart,
+	}               
+	automata[ElementDashStart] = []TokenType{
+		IdentifierStart, TextFragment,
+	}          
+	automata[ElementPlusStart] = []TokenType{
+		TextFragment,
+	}          
+	automata[Identifier] = []TokenType{
+		IdentifierEnd, SectionStart,
+	}                
+	automata[IdentifierStart] = []TokenType{
+		Identifier,
+	}            
+	automata[IdentifierEnd] = []TokenType{
+		TextFragment,
+	}             
+	automata[SectionIdentifierStart] = []TokenType{
+		Identifier,
+	}
+	automata[SectionStart] = []TokenType{
+		ElementDashStart, SectionIdentifierStart, EOF,
+	}
+	automata[SectionEnd] = []TokenType{
+		SectionIdentifierStart, ElementDashStart, EOF,
+	}
+	automata[SOF] = []TokenType{
+		ElementDashStart, SectionIdentifierStart, EOF,
+	}
+	automata[EOF] = []TokenType{}
+	
+	// validateGrammar
+	for i := 0; i < len(tokens) - 1; i++ {
 		token := tokens[i]
 		content := token.content
 		if (token.tokenType == TextFragment) {
-			content = strings.Replace(strings.Trim(content, " "), "\n", "\\n", -1)
+			content = strings.Replace(
+				strings.Trim(content, " "),
+				"\n",
+				"\\n",
+				-1,
+			)
 		}
+		nextToken := tokens[i + 1]
 		fmt.Printf(
 			"%s: \"%s\" %d:%d\n",
 			ToString(&token.tokenType),
@@ -36,7 +103,148 @@ func main() {
 			token.row,
 			token.column,
 		)
+		if (false) {
+			fmt.Print("Possible next tokens:")
+			for k:=0; k<len(automata[token.tokenType]); k++ {
+				ttype := automata[token.tokenType][k]
+				fmt.Printf(" %s,", ToString(&ttype))
+			}
+			fmt.Print("\n:")
+		}
+		if (!contains(automata[token.tokenType], nextToken.tokenType)) {
+			fmt.Printf(
+				"Token %s cannot precede %s\n",
+				ToString(&token.tokenType),
+				ToString(&nextToken.tokenType),
+			)
+			return
+		}
 	} 

+	// extract questions
+	questions := []Question{}
+	section := ""
+	i := 0
+	for {
+		if (i >= len(tokens)) {
+			break
+		}
+		if (tokens[i].tokenType == ElementDashStart) {
+			id := tokens[i + 2].content
+			question := tokens[i + 4].content
+			quesitonElements := []QuestionElement{}
+			i += 6
+			for {
+				if (i + 1 >= len(tokens) ||
+					!(tokens[i].tokenType == ElementDashStart ||
+						tokens[i].tokenType == ElementPlusStart) ||
+					tokens[i+1].tokenType == IdentifierStart) {
+					break
+				}
+				questionElement := QuestionElement{}
+				if (tokens[i].tokenType == ElementDashStart) {
+					questionElement.isDash = true 
+				} else {
+					questionElement.isDash = false 
+				}
+				questionElement.content = tokens[i+1].content
+				quesitonElements = append(quesitonElements, questionElement)
+				i += 2
+			}
+			if len(quesitonElements) > 1 {
+				question := ChoiceQuestion{
+					id: id,
+					question: question,
+				}
+				choices := []Choice{}
+				for k := 0; k < len(quesitonElements); k++ {
+					choice := Choice{}
+					choice.answer = quesitonElements[k].content
+					choice.isCorrect = !quesitonElements[k].isDash
+					choices = append(choices, choice)
+				}		
+				if (section != "") {
+					question.section = section
+				}
+				question.choices = choices
+				questions = append(questions, question)
+			} else if (len(quesitonElements) == 1) {
+				question := SingleAnswerQuestion{
+					id: id,
+					question: question,
+					answer: quesitonElements[0].content,
+				}
+				if (section != "") {
+					question.section = section
+				}
+				questions = append(questions, question)
+			}
+		} else if (tokens[i].tokenType == SectionIdentifierStart) {
+			section = tokens[i + 1].content
+			i += 3;
+		} else if (tokens[i].tokenType == SectionEnd) {
+			section = ""
+			i += 1
+		} else if (tokens[i].tokenType == EOF) {
+			break
+		} else {
+			log.Fatalf(
+				"Not handled: %s",
+				ToString(&tokens[i].tokenType),
+			)
+			return
+		}
+	}
+
+	for _, element := range questions {
+		switch element.(type) {
+		case SingleAnswerQuestion:
+			fmt.Printf(
+				"<Single choice> (%s) %s: %s\n",
+				element.(SingleAnswerQuestion).section,
+				strings.Trim(element.(SingleAnswerQuestion).question, "\t\n "),
+				strings.Trim(element.(SingleAnswerQuestion).answer, "\t\n "),
+			)
+		case ChoiceQuestion:
+			fmt.Printf(
+				"<Multi choice> (%s) %s\n",
+				element.(ChoiceQuestion).section,
+				element.(ChoiceQuestion).question,
+			)
+			for _, el := range element.(ChoiceQuestion).choices {
+				opener := '-'
+				if (el.isCorrect) {
+					opener = '+'
+				}
+				fmt.Printf("\t%c %s\n", opener, strings.Trim(el.answer, "\t\n "))
+			}
+		}
+	}
+
 	log.Println("Compilation completed")
 }
+
+type QuestionElement struct {
+	isDash bool;
+	content string;
+} 
+
+
+type CompilerErr struct {
+	message string;	
+	row	     int32;
+	column   int32;
+}
+
+func (e CompilerErr) Error() string {
+	return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
+}
+
+func contains(s []TokenType, e TokenType) bool {
+    for _, a := range s {
+        if a == e {
+            return true
+        }
+    }
+    return false
+}
--- a/src/compiler/lexer.go
+++ b/src/compiler/lexer.go
@@ -1,7 +1,6 @@
 package main 

 import (
-	"fmt"
 	"strings"
 )

@@ -14,14 +13,16 @@ var	previousColumn int32 =  -1
 var textStarted bool =   false

 type TokenType int
+
 const (
 	TextFragment TokenType = iota
-	QuestionEnd               
-	ElementDashStart          
-	ElementPlusStart          
-	Identifier                
-	IdentifierStart            
-	IdentifierEnd             
+	QuestionEnd
+	ElementDashStart
+	ElementPlusStart
+	Identifier              
+	IdentifierStart
+	IdentifierEnd
+	SectionIdentifierStart
 	SectionStart
 	SectionEnd
 	SOF
@@ -35,14 +36,8 @@ type Token struct {
 	column          int32;
 } 

-type LexingErr struct {
-	message string;	
-	row	     int32;
-	column   int32;
-}
-
-func makePostTextToken(ttype TokenType, tokenLen int32) {
-	if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) {
+func makePostTextToken(ttype TokenType, tokenLen int32, textType TokenType) {
+	if (len(strings.Trim(string(buffer), " \n\t")) - 1 > 0) {
 		textFragment := []rune{}
 		for i := 0; i < len(buffer) - int(tokenLen); i++ {
 			element := buffer[i]
@@ -51,7 +46,7 @@ func makePostTextToken(ttype TokenType, tokenLen int32) {
 		tokens = append(
 			tokens,
 			Token{
-				tokenType: TextFragment,
+				tokenType: textType,
 				content: string(textFragment),
 				row: int32(previousRow),
 				column: int32(previousColumn),
@@ -95,40 +90,17 @@ func tokenize(fileRunes []rune) ( []Token, error ) {
 			}
 		}

-		trimmedBuffer := strings.Trim(string(buffer), " \n")
-		if (len(trimmedBuffer) > 2) {
-			lastTwo := buffer[len(trimmedBuffer) - 1:]
-			switch string(lastTwo) {
-			case "|>":
-				makePostTextToken(SectionStart, 2)
-				previousRow = row
-				previousColumn = column
-				textStarted = false
-				continue
-			case "<|":
-				makePostTextToken(SectionEnd, 2)
-				previousRow = row
-				previousColumn = column
-				textStarted = false
-				continue
-			}
-		}
-
 		switch c {
+		case '[':
+			makePostTextToken(IdentifierStart, 1, TextFragment)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
 		case ']':
-			tokens = append(
-				tokens,
-				Token{
-					tokenType: ElementDashStart,
-					content: "[",
-					row: int32(row),
-					column: int32(column),
-				},
-			)
 			if (len(buffer) - 1 > 1) {
 				textFragment := []rune{}
-				trimmedStr := strings.Trim(string(buffer), " ")
-				for i := 1; i <  len(trimmedStr) - 1; i++ {
+				trimmedStr := strings.Trim(string(buffer), " \n\t")
+				for i := 0; i <  len(trimmedStr) - 1; i++ {
 					element := trimmedStr[i]
 					textFragment = append(textFragment, rune(element))
 				}
@@ -145,7 +117,7 @@ func tokenize(fileRunes []rune) ( []Token, error ) {
 			tokens = append(
 				tokens,
 				Token{
-					tokenType: ElementDashStart,
+					tokenType: IdentifierEnd,
 					content: "]",
 					row: int32(row),
 					column: int32(column),
@@ -155,24 +127,39 @@ func tokenize(fileRunes []rune) ( []Token, error ) {
 			previousColumn = column
 			textStarted = false
 			buffer = []rune{}
-		case '+':
-			makePostTextToken(ElementPlusStart, 1)
+		case '#':
+			makePostTextToken(SectionIdentifierStart, 1, TextFragment)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
+		case '{':
+			makePostTextToken(SectionStart, 1, Identifier)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
+		case '}':
+			makePostTextToken(SectionEnd, 1, TextFragment)
 			previousRow = row
 			previousColumn = column
 			textStarted = false
 		case '-':
-			makePostTextToken(ElementDashStart, 1)
+			makePostTextToken(ElementDashStart, 1, TextFragment)
 			previousRow = row
 			previousColumn = column
 			textStarted = false
 		case '>':
-			makePostTextToken(QuestionEnd, 1)
+			makePostTextToken(QuestionEnd, 1, TextFragment)
 			previousRow = row
 			previousColumn = column
+		case '+':
+			makePostTextToken(ElementPlusStart, 1, TextFragment)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
 		}
 		column += 1
 	}
-	makePostTextToken(EOF, 0)
+	makePostTextToken(EOF, 0, TextFragment)
 	return tokens, nil
 }

@@ -192,6 +179,8 @@ func ToString (ttype *TokenType) string {
 		return "IdentifierStart"            
 	case IdentifierEnd:
 		return "IdentifierEnd"             
+	case SectionIdentifierStart:
+		return "SectionIdentifierStart"             
 	case SectionStart:
 		return "SectionStart"             
 	case SectionEnd:
@@ -202,7 +191,3 @@ func ToString (ttype *TokenType) string {
 		return "NOT_DEFINED"
 	}
 }
-
-func (e LexingErr) Error() string {
-	return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
-}