From 1a2c961807f0847bcd665ecb873f496b18a52444 Mon Sep 17 00:00:00 2001 From: jorenchik Date: Fri, 6 Sep 2024 19:56:52 +0300 Subject: [PATCH] lexer refactoring --- src/compiler/lexer/lexer.go | 103 +++++++++++++++--------------------- 1 file changed, 43 insertions(+), 60 deletions(-) diff --git a/src/compiler/lexer/lexer.go b/src/compiler/lexer/lexer.go index a4f5070..23cd950 100644 --- a/src/compiler/lexer/lexer.go +++ b/src/compiler/lexer/lexer.go @@ -5,15 +5,14 @@ import ( "strings" ) -var buffer []rune -var row int32 = 1 -var column int32 = 1 -var previousRow int32 = -1 -var previousColumn int32 = -1 -var textStarted bool = false +var buffer []rune +var row int32 = 1 +var column int32 = 1 +var previousRow int32 = -1 +var previousColumn int32 = -1 +var textStarted bool = false type TokenType int - const ( TextFragment TokenType = iota QuestionEnd @@ -30,13 +29,13 @@ const ( ) type Token struct { - TokenType TokenType; - Content string; - Row int32; - Column int32; + TokenType TokenType; + Content string; + Row int32; + Column int32; } -func (token Token)ToString() string { +func (token Token) ToString() string { content := token.Content if (token.TokenType == TextFragment) { content = strings.Replace( @@ -57,7 +56,11 @@ func (token Token)ToString() string { var tokens []Token -func makePostTextToken(ttype TokenType, tokenLen int32, textType TokenType) { +func makeTokenWithTokenBuffer( + ttype TokenType, + tokenLen int32, + textType TokenType, +) { if (len(strings.Trim(string(buffer), " \n\t")) - 1 > 0) { textFragment := []rune{} for i := 0; i < len(buffer) - int(tokenLen); i++ { @@ -95,11 +98,14 @@ func TokenizeMdem(fileRunes []rune) ( []Token, error ) { for i := 0; i < len(fileRunes); i++ { c := fileRunes[i] + // AdvancePointer if (c == '\n') { row += 1 column = 1 } buffer = append(buffer, c) + + // SkipWhitetext if !textStarted { if c == '\n' { previousRow += 1 @@ -111,92 +117,69 @@ func TokenizeMdem(fileRunes []rune) ( []Token, error ) { } } + // EmitTokens switch c { case '[': - makePostTextToken(IdentifierStart, 1, TextFragment) + makeTokenWithTokenBuffer(IdentifierStart, 1, TextFragment) previousRow = row previousColumn = column textStarted = false case ']': - if (len(buffer) - 1 > 1) { - textFragment := []rune{} - trimmedStr := strings.Trim(string(buffer), " \n\t") - for i := 0; i < len(trimmedStr) - 1; i++ { - element := trimmedStr[i] - textFragment = append(textFragment, rune(element)) - } - tokens = append( - tokens, - Token{ - TokenType: Identifier, - Content: string(textFragment), - Row: int32(previousRow), - Column: int32(previousColumn), - }, - ) - } - tokens = append( - tokens, - Token{ - TokenType: IdentifierEnd, - Content: "]", - Row: int32(row), - Column: int32(column), - }, - ) + makeTokenWithTokenBuffer(IdentifierEnd, 1, Identifier) previousRow = row previousColumn = column textStarted = false - buffer = []rune{} case '#': - makePostTextToken(SectionIdentifierStart, 1, TextFragment) + makeTokenWithTokenBuffer(SectionIdentifierStart, 1, TextFragment) previousRow = row previousColumn = column textStarted = false case '{': - makePostTextToken(SectionStart, 1, Identifier) + makeTokenWithTokenBuffer(SectionStart, 1, Identifier) previousRow = row previousColumn = column textStarted = false case '}': - makePostTextToken(SectionEnd, 1, TextFragment) + makeTokenWithTokenBuffer(SectionEnd, 1, TextFragment) previousRow = row previousColumn = column textStarted = false case '-': - makePostTextToken(ElementDashStart, 1, TextFragment) + makeTokenWithTokenBuffer(ElementDashStart, 1, TextFragment) previousRow = row previousColumn = column textStarted = false case '>': - makePostTextToken(QuestionEnd, 1, TextFragment) + makeTokenWithTokenBuffer(QuestionEnd, 1, TextFragment) previousRow = row previousColumn = column case '+': - makePostTextToken(ElementPlusStart, 1, TextFragment) + makeTokenWithTokenBuffer(ElementPlusStart, 1, TextFragment) previousRow = row previousColumn = column textStarted = false } column += 1 } - makePostTextToken(EOF, 0, TextFragment) + + // EmitEOF + makeTokenWithTokenBuffer(EOF, 0, TextFragment) return tokens, nil } func ToString (ttype *TokenType) string { switch *ttype { - case TextFragment: return "TextFragment" - case QuestionEnd: return "QuestionEnd" - case ElementDashStart: return "ElementDashStart" - case ElementPlusStart: return "ElementPlusStart" - case Identifier: return "Identifier" - case IdentifierStart: return "IdentifierStart" - case IdentifierEnd: return "IdentifierEnd" - case SectionIdentifierStart: return "SectionIdentifierStart" - case SectionStart: return "SectionStart" - case SectionEnd: return "SectionEnd" - case EOF: return "EndOfFile" - default: return "NOT_DEFINED" + case TextFragment: return "TextFragment" + case QuestionEnd: return "QuestionEnd" + case ElementDashStart: return "ElementDashStart" + case ElementPlusStart: return "ElementPlusStart" + case Identifier: return "Identifier" + case IdentifierStart: return "IdentifierStart" + case IdentifierEnd: return "IdentifierEnd" + case SectionIdentifierStart: return "SectionIdentifierStart" + case SectionStart: return "SectionStart" + case SectionEnd: return "SectionEnd" + case EOF: return "EndOfFile" + default: return "NOT_RECOGNIZED" } }