Files
mdemory/src/compiler/parser/parser.go

279 lines
6.8 KiB
Go

package parser
import (
"fmt"
"github.com/jorenchik/mdemory/src/compiler/lexer"
"log"
"strings"
)
type Question interface {
ToString() string
}
type SingleAnswerQuestion struct {
ID string
Question string
Answer string
Section string
}
type Choice struct {
answer string
isCorrect bool
}
type MultipleChoiceQuestion struct {
id string
question string
choices []Choice
section string
}
func (question SingleAnswerQuestion) ToString() string {
return fmt.Sprintf(
"%20s: section: %-10s id: %-10s %-30s: %-30s\n",
"<Single choice>",
question.Section,
question.ID,
strings.Trim(question.Question, "\t\n "),
strings.Trim(question.Answer, "\t\n "),
)
}
func (question MultipleChoiceQuestion) ToString() string {
acc := ""
acc += fmt.Sprintf(
"%20s: section: %-10s id: %-10s %-30s\n",
"<Multi choice>",
question.section,
question.id,
question.question,
)
for _, el := range question.choices {
opener := '-'
if el.isCorrect {
opener = '+'
}
acc += fmt.Sprintf("\t%c %s\n", opener, strings.Trim(el.answer, "\t\n "))
}
return acc
}
type QuestionElement struct {
isDash bool
content string
}
var automata map[lexer.TokenType][]lexer.TokenType
type CompilerErr struct {
message string
row int32
column int32
}
func (e CompilerErr) Error() string {
return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
}
func contains(s []lexer.TokenType, e lexer.TokenType) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}
func parserAutomata() map[lexer.TokenType][]lexer.TokenType {
automata := make(map[lexer.TokenType][]lexer.TokenType)
automata[lexer.TextFragment] = []lexer.TokenType{
lexer.QuestionEnd,
lexer.ElementDashStart,
lexer.ElementPlusStart,
lexer.SectionIdentifierStart,
lexer.SectionStart,
lexer.EOF,
lexer.SectionEnd,
}
automata[lexer.QuestionEnd] = []lexer.TokenType{
lexer.ElementDashStart, lexer.ElementPlusStart,
}
automata[lexer.ElementDashStart] = []lexer.TokenType{
lexer.IdentifierStart, lexer.TextFragment,
}
automata[lexer.ElementPlusStart] = []lexer.TokenType{
lexer.TextFragment,
}
automata[lexer.Identifier] = []lexer.TokenType{
lexer.IdentifierEnd, lexer.SectionStart,
}
automata[lexer.IdentifierStart] = []lexer.TokenType{
lexer.Identifier,
}
automata[lexer.IdentifierEnd] = []lexer.TokenType{
lexer.TextFragment,
}
automata[lexer.SectionIdentifierStart] = []lexer.TokenType{
lexer.Identifier,
}
automata[lexer.SectionStart] = []lexer.TokenType{
lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
}
automata[lexer.SectionEnd] = []lexer.TokenType{
lexer.SectionIdentifierStart, lexer.ElementDashStart, lexer.EOF,
}
automata[lexer.SOF] = []lexer.TokenType{
lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
}
automata[lexer.EOF] = []lexer.TokenType{}
return automata
}
func ValidateGrammar(tokens []lexer.Token) error {
automata = parserAutomata()
for i := 0; i < len(tokens) - 1; i++ {
token := tokens[i]
nextToken := tokens[i+1]
if !contains(automata[token.TokenType], nextToken.TokenType) {
return CompilerErr{
message: fmt.Sprintf(
"Token %s cannot precede %s\n",
lexer.ToString(&token.TokenType),
lexer.ToString(&nextToken.TokenType),
),
row: token.Row,
column: token.Column,
}
}
}
return nil
}
func ParseQuestions(fileContents string) ([]Question, error) {
tokens, err := lexer.TokenizeMdem([]rune(fileContents))
if err != nil {
return nil, err
}
if true {
log.Println("Lexer output:")
for _, el := range tokens {
fmt.Print(el.ToString())
}
}
err = ValidateGrammar(tokens)
if err != nil {
log.Fatal(err.Error())
}
questions := []Question{}
section := ""
i := 0
for {
if i >= len(tokens) {
break
}
// - [identifier] question_token >
if tokens[i].TokenType == lexer.ElementDashStart {
var id string
var question string
var questionElements []QuestionElement
if tokens[i + 1].TokenType == lexer.IdentifierStart {
id = tokens[i + 2].Content
question = tokens[i + 4].Content
questionElements = []QuestionElement{}
i += 6
} else {
id = ""
question = tokens[i + 1].Content
questionElements = []QuestionElement{}
i += 3
}
for {
// Pointer is on the start of an element
// - a_question >
// - [identifier] a_question >
// - an_element
// terminate if we encounter a question.
if i + 3 < len(tokens) &&
tokens[i + 3].TokenType != lexer.EOF {
offset := 0
if tokens[i + 1].TokenType == lexer.IdentifierStart {
offset = 5
} else {
offset = 2
}
if i + offset < len(tokens) &&
tokens[i + offset].TokenType == lexer.QuestionEnd {
break
}
}
if (i + 2 >= len(tokens)) {
break;
}
questionElement := QuestionElement{}
if tokens[i].TokenType == lexer.ElementDashStart {
questionElement.isDash = true
} else {
questionElement.isDash = false
}
questionElement.content = tokens[i + 1].Content
questionElements = append(questionElements, questionElement)
i += 2
}
if len(questionElements) > 1 {
question := MultipleChoiceQuestion{
id: id,
question: question,
}
choices := []Choice{}
for k := 0; k < len(questionElements); k++ {
choice := Choice{}
choice.answer = questionElements[k].content
choice.isCorrect = !questionElements[k].isDash
choices = append(choices, choice)
}
if section != "" {
question.section = section
}
question.choices = choices
questions = append(questions, question)
} else if len(questionElements) == 1 {
question := SingleAnswerQuestion{
ID: id,
Question: question,
Answer: questionElements[0].content,
}
if section != "" {
question.Section = section
}
questions = append(questions, question)
}
} else if tokens[i].TokenType == lexer.SectionIdentifierStart {
section = tokens[i+1].Content
i += 3
} else if tokens[i].TokenType == lexer.SectionEnd {
section = ""
i += 1
} else if tokens[i].TokenType == lexer.EOF {
break
} else {
log.Fatalf(
"Not handled: %s",
lexer.ToString(&tokens[i].TokenType),
)
return nil, CompilerErr{
message: "",
row: tokens[i].Row,
column: tokens[i].Column,
}
}
}
return questions, nil
}