mdemory/src/compiler/parser/parser.go

package parser

import (
	"fmt"
	"github.com/jorenchik/mdemory/src/compiler/lexer"
	"log"
	"strings"
)

type Question interface {
	ToString() string
}

type SingleAnswerQuestion struct {
	ID       string
	Question string
	Answer   string
	Section  string
}

type Choice struct {
	answer    string
	isCorrect bool
}

type MultipleChoiceQuestion struct {
	id       string
	question string
	choices  []Choice
	section  string
}

func (question SingleAnswerQuestion) ToString() string {
	return fmt.Sprintf(
        "%20s: section: %-10s id: %-10s %-30s: %-30s\n",
        "<Single choice>",
		question.Section,
		question.ID,
		strings.Trim(question.Question, "\t\n "),
		strings.Trim(question.Answer, "\t\n "),
	)
}

func (question MultipleChoiceQuestion) ToString() string {
	acc := ""
	acc += fmt.Sprintf(
        "%20s: section: %-10s id: %-10s %-30s\n",
        "<Multi choice>",
		question.section,
		question.id,
		question.question,
	)
	for _, el := range question.choices {
		opener := '-'
		if el.isCorrect {
			opener = '+'
		}
		acc += fmt.Sprintf("\t%c %s\n", opener, strings.Trim(el.answer, "\t\n "))
	}
	return acc
}

type QuestionElement struct {
	isDash  bool
	content string
}

var automata map[lexer.TokenType][]lexer.TokenType

type CompilerErr struct {
	message string
	row     int32
	column  int32
}

func (e CompilerErr) Error() string {
	return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
}

func contains(s []lexer.TokenType, e lexer.TokenType) bool {
	for _, a := range s {
		if a == e {
			return true
		}
	}
	return false
}

func parserAutomata() map[lexer.TokenType][]lexer.TokenType {
	automata := make(map[lexer.TokenType][]lexer.TokenType)
	automata[lexer.TextFragment] = []lexer.TokenType{
		lexer.QuestionEnd,
		lexer.ElementDashStart,
		lexer.ElementPlusStart,
		lexer.SectionIdentifierStart,
		lexer.SectionStart,
		lexer.EOF,
		lexer.SectionEnd,
	}
	automata[lexer.QuestionEnd] = []lexer.TokenType{
		lexer.ElementDashStart, lexer.ElementPlusStart,
	}
	automata[lexer.ElementDashStart] = []lexer.TokenType{
		lexer.IdentifierStart, lexer.TextFragment,
	}
	automata[lexer.ElementPlusStart] = []lexer.TokenType{
		lexer.TextFragment,
	}
	automata[lexer.Identifier] = []lexer.TokenType{
		lexer.IdentifierEnd, lexer.SectionStart,
	}
	automata[lexer.IdentifierStart] = []lexer.TokenType{
		lexer.Identifier,
	}
	automata[lexer.IdentifierEnd] = []lexer.TokenType{
		lexer.TextFragment,
	}
	automata[lexer.SectionIdentifierStart] = []lexer.TokenType{
		lexer.Identifier,
	}
	automata[lexer.SectionStart] = []lexer.TokenType{
		lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
	}
	automata[lexer.SectionEnd] = []lexer.TokenType{
		lexer.SectionIdentifierStart, lexer.ElementDashStart, lexer.EOF,
	}
	automata[lexer.SOF] = []lexer.TokenType{
		lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
	}
	automata[lexer.EOF] = []lexer.TokenType{}
	return automata
}

func ValidateGrammar(tokens []lexer.Token) error {
	automata = parserAutomata()
	for i := 0; i < len(tokens) - 1; i++ {
		token := tokens[i]
		nextToken := tokens[i+1]
		if !contains(automata[token.TokenType], nextToken.TokenType) {
			return CompilerErr{
				message: fmt.Sprintf(
					"Token %s cannot precede %s\n",
					lexer.ToString(&token.TokenType),
					lexer.ToString(&nextToken.TokenType),
				),
				row:    token.Row,
				column: token.Column,
			}
		}
	}
	return nil
}

func ParseQuestions(fileContents string) ([]Question, error) {
	tokens, err := lexer.TokenizeMdem([]rune(fileContents))
	if err != nil {
		return nil, err
	}
	if true {
		log.Println("Lexer output:")
		for _, el := range tokens {
			fmt.Print(el.ToString())
		}
	}

	err = ValidateGrammar(tokens)
	if err != nil {
		log.Fatal(err.Error())
	}

	questions := []Question{}
	section := ""
	i := 0
	for {
		if i >= len(tokens) {
			break
		}
        // - [identifier] question_token >
		if tokens[i].TokenType == lexer.ElementDashStart {
            var id string
            var question string
            var questionElements []QuestionElement

            if tokens[i + 1].TokenType == lexer.IdentifierStart {
                id               = tokens[i + 2].Content
                question         = tokens[i + 4].Content
                questionElements = []QuestionElement{}
                i += 6
            } else {
                id               = ""
                question         = tokens[i + 1].Content
                questionElements = []QuestionElement{}
                i += 3
            }

			for {
                // Pointer is on the start of an element
                //   - a_question >
                //   - [identifier] a_question >
                //   - an_element
                // terminate if we encounter a question.
				if i + 3 < len(tokens) &&
                   tokens[i + 3].TokenType != lexer.EOF {

                    offset := 0
                    if tokens[i + 1].TokenType == lexer.IdentifierStart {
                        offset = 5
                    } else {
                        offset = 2
                    }
                    if i + offset < len(tokens) &&
                       tokens[i + offset].TokenType == lexer.QuestionEnd {
                        break
                    }
                }
                if (i + 2 >= len(tokens)) {
                    break;
                }
				questionElement := QuestionElement{}
				if tokens[i].TokenType == lexer.ElementDashStart {
					questionElement.isDash = true
				} else {
					questionElement.isDash = false
				}
				questionElement.content = tokens[i + 1].Content
				questionElements = append(questionElements, questionElement)
				i += 2
			}
			if len(questionElements) > 1 {
				question := MultipleChoiceQuestion{
					id:       id,
					question: question,
				}
				choices := []Choice{}
				for k := 0; k < len(questionElements); k++ {
					choice := Choice{}
					choice.answer = questionElements[k].content
					choice.isCorrect = !questionElements[k].isDash
					choices = append(choices, choice)
				}
				if section != "" {
					question.section = section
				}
				question.choices = choices
				questions = append(questions, question)
			} else if len(questionElements) == 1 {
				question := SingleAnswerQuestion{
					ID:       id,
					Question: question,
					Answer:   questionElements[0].content,
				}
				if section != "" {
					question.Section = section
				}
				questions = append(questions, question)
			}
		} else if tokens[i].TokenType == lexer.SectionIdentifierStart {
			section = tokens[i+1].Content
			i += 3
		} else if tokens[i].TokenType == lexer.SectionEnd {
			section = ""
			i += 1
		} else if tokens[i].TokenType == lexer.EOF {
			break
		} else {
			log.Fatalf(
				"Not handled: %s",
				lexer.ToString(&tokens[i].TokenType),
			)
			return nil, CompilerErr{
				message: "",
				row:     tokens[i].Row,
				column:  tokens[i].Column,
			}
		}
	}
	return questions, nil
}