Lexer as a package

This commit is contained in:
jorenchik
2024-08-04 21:23:24 +03:00
parent eeb42b6a32
commit 1164a10846
4 changed files with 89 additions and 81 deletions

View File

@@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"log" "log"
"os" "os"
"github.com/jorenchik/mdemory/src/compiler/lexer"
) )
func main() { func main() {
@@ -16,7 +17,7 @@ func main() {
} }
fileContents := string(file) fileContents := string(file)
tokens, err := tokenizeMdem([]rune(fileContents)) tokens, err := lexer.TokenizeMdem([]rune(fileContents))
if (err != nil) { if (err != nil) {
fmt.Printf("%s\n", err.Error()) fmt.Printf("%s\n", err.Error())
return return
@@ -29,7 +30,7 @@ func main() {
} }
automata = parserAutomata() automata = parserAutomata()
err = validateGrammar() err = validateGrammar(tokens)
if (err != nil) { if (err != nil) {
log.Fatal(err.Error()) log.Fatal(err.Error())
} }

View File

@@ -1,4 +1,4 @@
package main package lexer
import ( import (
"fmt" "fmt"
@@ -30,15 +30,15 @@ const (
) )
type Token struct { type Token struct {
tokenType TokenType; TokenType TokenType;
content string; Content string;
row int32; Row int32;
column int32; Column int32;
} }
func (token Token)ToString() string { func (token Token)ToString() string {
content := token.content content := token.Content
if (token.tokenType == TextFragment) { if (token.TokenType == TextFragment) {
content = strings.Replace( content = strings.Replace(
strings.Trim(content, " "), strings.Trim(content, " "),
"\n", "\n",
@@ -48,10 +48,10 @@ func (token Token)ToString() string {
} }
return fmt.Sprintf( return fmt.Sprintf(
"%s: \"%s\" %d:%d\n", "%s: \"%s\" %d:%d\n",
ToString(&token.tokenType), ToString(&token.TokenType),
content, content,
token.row, token.Row,
token.column, token.Column,
) )
} }
@@ -67,20 +67,20 @@ func makePostTextToken(ttype TokenType, tokenLen int32, textType TokenType) {
tokens = append( tokens = append(
tokens, tokens,
Token{ Token{
tokenType: textType, TokenType: textType,
content: string(textFragment), Content: string(textFragment),
row: int32(previousRow), Row: int32(previousRow),
column: int32(previousColumn), Column: int32(previousColumn),
}, },
) )
} }
tokens = append( tokens = append(
tokens, tokens,
Token{ Token{
tokenType: ttype, TokenType: ttype,
content: string(buffer[len(buffer)-int(tokenLen):]), Content: string(buffer[len(buffer)-int(tokenLen):]),
row: int32(row), Row: int32(row),
column: int32(column), Column: int32(column),
}, },
) )
previousRow = row previousRow = row
@@ -88,7 +88,7 @@ func makePostTextToken(ttype TokenType, tokenLen int32, textType TokenType) {
buffer = []rune{} buffer = []rune{}
} }
func tokenizeMdem(fileRunes []rune) ( []Token, error ) { func TokenizeMdem(fileRunes []rune) ( []Token, error ) {
tokens = []Token{} tokens = []Token{}
buffer = []rune{} buffer = []rune{}
@@ -128,20 +128,20 @@ func tokenizeMdem(fileRunes []rune) ( []Token, error ) {
tokens = append( tokens = append(
tokens, tokens,
Token{ Token{
tokenType: Identifier, TokenType: Identifier,
content: string(textFragment), Content: string(textFragment),
row: int32(previousRow), Row: int32(previousRow),
column: int32(previousColumn), Column: int32(previousColumn),
}, },
) )
} }
tokens = append( tokens = append(
tokens, tokens,
Token{ Token{
tokenType: IdentifierEnd, TokenType: IdentifierEnd,
content: "]", Content: "]",
row: int32(row), Row: int32(row),
column: int32(column), Column: int32(column),
}, },
) )
previousRow = row previousRow = row

View File

@@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"strings" "strings"
"log" "log"
"github.com/jorenchik/mdemory/src/compiler/lexer"
) )
type Question interface { type Question interface {
@@ -60,7 +61,7 @@ type QuestionElement struct {
content string; content string;
} }
var automata map[TokenType][]TokenType var automata map[lexer.TokenType][]lexer.TokenType
type CompilerErr struct { type CompilerErr struct {
message string; message string;
@@ -72,7 +73,7 @@ func (e CompilerErr) Error() string {
return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message) return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
} }
func contains(s []TokenType, e TokenType) bool { func contains(s []lexer.TokenType, e lexer.TokenType) bool {
for _, a := range s { for _, a := range s {
if a == e { if a == e {
return true return true
@@ -81,65 +82,71 @@ func contains(s []TokenType, e TokenType) bool {
return false return false
} }
func parserAutomata() map[TokenType][]TokenType { func parserAutomata() map[lexer.TokenType][]lexer.TokenType {
automata := make(map[TokenType][]TokenType) automata := make(map[lexer.TokenType][]lexer.TokenType)
automata[TextFragment] = []TokenType{ automata[lexer.TextFragment] = []lexer.TokenType{
QuestionEnd, ElementDashStart, ElementPlusStart, SectionIdentifierStart, SectionStart, EOF, SectionEnd, lexer.QuestionEnd,
lexer.ElementDashStart,
lexer.ElementPlusStart,
lexer.SectionIdentifierStart,
lexer.SectionStart,
lexer.EOF,
lexer.SectionEnd,
} }
automata[QuestionEnd] = []TokenType{ automata[lexer.QuestionEnd] = []lexer.TokenType{
ElementDashStart, ElementPlusStart, lexer.ElementDashStart, lexer.ElementPlusStart,
} }
automata[ElementDashStart] = []TokenType{ automata[lexer.ElementDashStart] = []lexer.TokenType{
IdentifierStart, TextFragment, lexer.IdentifierStart, lexer.TextFragment,
} }
automata[ElementPlusStart] = []TokenType{ automata[lexer.ElementPlusStart] = []lexer.TokenType{
TextFragment, lexer.TextFragment,
} }
automata[Identifier] = []TokenType{ automata[lexer.Identifier] = []lexer.TokenType{
IdentifierEnd, SectionStart, lexer.IdentifierEnd, lexer.SectionStart,
} }
automata[IdentifierStart] = []TokenType{ automata[lexer.IdentifierStart] = []lexer.TokenType{
Identifier, lexer.Identifier,
} }
automata[IdentifierEnd] = []TokenType{ automata[lexer.IdentifierEnd] = []lexer.TokenType{
TextFragment, lexer.TextFragment,
} }
automata[SectionIdentifierStart] = []TokenType{ automata[lexer.SectionIdentifierStart] = []lexer.TokenType{
Identifier, lexer.Identifier,
} }
automata[SectionStart] = []TokenType{ automata[lexer.SectionStart] = []lexer.TokenType{
ElementDashStart, SectionIdentifierStart, EOF, lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
} }
automata[SectionEnd] = []TokenType{ automata[lexer.SectionEnd] = []lexer.TokenType{
SectionIdentifierStart, ElementDashStart, EOF, lexer.SectionIdentifierStart, lexer.ElementDashStart, lexer.EOF,
} }
automata[SOF] = []TokenType{ automata[lexer.SOF] = []lexer.TokenType{
ElementDashStart, SectionIdentifierStart, EOF, lexer.ElementDashStart, lexer.SectionIdentifierStart, lexer.EOF,
} }
automata[EOF] = []TokenType{} automata[lexer.EOF] = []lexer.TokenType{}
return automata return automata
} }
func validateGrammar() error { func validateGrammar(tokens []lexer.Token) error {
for i := 0; i < len(tokens) - 1; i++ { for i := 0; i < len(tokens) - 1; i++ {
token := tokens[i] token := tokens[i]
nextToken := tokens[i + 1] nextToken := tokens[i + 1]
if (!contains(automata[token.tokenType], nextToken.tokenType)) { if (!contains(automata[token.TokenType], nextToken.TokenType)) {
return CompilerErr{ return CompilerErr{
message: fmt.Sprintf( message: fmt.Sprintf(
"Token %s cannot precede %s\n", "Token %s cannot precede %s\n",
ToString(&token.tokenType), lexer.ToString(&token.TokenType),
ToString(&nextToken.tokenType), lexer.ToString(&nextToken.TokenType),
), ),
row: token.row, row: token.Row,
column: token.column, column: token.Column,
} }
} }
} }
return nil return nil
} }
func ParseQuestions(tokens []Token) ([]Question, error) { func ParseQuestions(tokens []lexer.Token) ([]Question, error) {
questions := []Question{} questions := []Question{}
section := "" section := ""
i := 0 i := 0
@@ -147,25 +154,25 @@ func ParseQuestions(tokens []Token) ([]Question, error) {
if (i >= len(tokens)) { if (i >= len(tokens)) {
break break
} }
if (tokens[i].tokenType == ElementDashStart) { if (tokens[i].TokenType == lexer.ElementDashStart) {
id := tokens[i + 2].content id := tokens[i + 2].Content
question := tokens[i + 4].content question := tokens[i + 4].Content
quesitonElements := []QuestionElement{} quesitonElements := []QuestionElement{}
i += 6 i += 6
for { for {
if (i + 1 >= len(tokens) || if (i + 1 >= len(tokens) ||
!(tokens[i].tokenType == ElementDashStart || !(tokens[i].TokenType == lexer.ElementDashStart ||
tokens[i].tokenType == ElementPlusStart) || tokens[i].TokenType == lexer.ElementPlusStart) ||
tokens[i+1].tokenType == IdentifierStart) { tokens[i+1].TokenType == lexer.IdentifierStart) {
break break
} }
questionElement := QuestionElement{} questionElement := QuestionElement{}
if (tokens[i].tokenType == ElementDashStart) { if (tokens[i].TokenType == lexer.ElementDashStart) {
questionElement.isDash = true questionElement.isDash = true
} else { } else {
questionElement.isDash = false questionElement.isDash = false
} }
questionElement.content = tokens[i+1].content questionElement.content = tokens[i+1].Content
quesitonElements = append(quesitonElements, questionElement) quesitonElements = append(quesitonElements, questionElement)
i += 2 i += 2
} }
@@ -197,23 +204,23 @@ func ParseQuestions(tokens []Token) ([]Question, error) {
} }
questions = append(questions, question) questions = append(questions, question)
} }
} else if (tokens[i].tokenType == SectionIdentifierStart) { } else if (tokens[i].TokenType == lexer.SectionIdentifierStart) {
section = tokens[i + 1].content section = tokens[i + 1].Content
i += 3; i += 3;
} else if (tokens[i].tokenType == SectionEnd) { } else if (tokens[i].TokenType == lexer.SectionEnd) {
section = "" section = ""
i += 1 i += 1
} else if (tokens[i].tokenType == EOF) { } else if (tokens[i].TokenType == lexer.EOF) {
break break
} else { } else {
log.Fatalf( log.Fatalf(
"Not handled: %s", "Not handled: %s",
ToString(&tokens[i].tokenType), lexer.ToString(&tokens[i].TokenType),
) )
return nil, CompilerErr{ return nil, CompilerErr{
message: "", message: "",
row: tokens[i].row, row: tokens[i].Row,
column: tokens[i].column, column: tokens[i].Column,
} }
} }
} }

View File

@@ -1,8 +1,6 @@
module mdemory-app module mdemory-app
go 1.21 go 1.22.5
toolchain go1.22.5
require github.com/wailsapp/wails/v2 v2.9.1 require github.com/wailsapp/wails/v2 v2.9.1
@@ -36,4 +34,6 @@ require (
golang.org/x/text v0.15.0 // indirect golang.org/x/text v0.15.0 // indirect
) )
replace github.com/jorenchik/mdemory/src/compiler => ../compiler
// replace github.com/wailsapp/wails/v2 v2.9.1 => /home/jorenchik/go/pkg/mod // replace github.com/wailsapp/wails/v2 v2.9.1 => /home/jorenchik/go/pkg/mod