From 26f008c670771cdd6758d8219ef30bfef4aa8fbb Mon Sep 17 00:00:00 2001 From: jorenchik Date: Sun, 4 Aug 2024 09:44:47 +0300 Subject: [PATCH] Separated tokenizer into a separate file --- src/compiler/compiler.go | 206 +------------------------------------- src/compiler/lexer.go | 208 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+), 204 deletions(-) create mode 100644 src/compiler/lexer.go diff --git a/src/compiler/compiler.go b/src/compiler/compiler.go index 7e8a79e..5ebabe9 100644 --- a/src/compiler/compiler.go +++ b/src/compiler/compiler.go @@ -7,177 +7,6 @@ import ( "strings" ) -var tokens []Token -var buffer []rune -var row int32 = 1 -var column int32 = 1 -var previousRow int32 = -1 -var previousColumn int32 = -1 -var textStarted bool = false - -type TokenType int -const ( - TextFragment TokenType = iota - QuestionEnd - ElementDashStart - ElementPlusStart - Identifier - IdentifierStart - IdentifierEnd - SectionStart - SectionEnd - SOF - EOF -) - -type Token struct { - tokenType TokenType; - content string; - row int32; - column int32; -} - -type LexingErr struct { - message string; - row int32; - column int32; -} - -func makePostTextToken(ttype TokenType, tokenLen int32) { - if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) { - textFragment := []rune{} - for i := 0; i < len(buffer) - int(tokenLen); i++ { - element := buffer[i] - textFragment = append(textFragment, element) - } - tokens = append( - tokens, - Token{ - tokenType: TextFragment, - content: string(textFragment), - row: int32(previousRow), - column: int32(previousColumn), - }, - ) - } - tokens = append( - tokens, - Token{ - tokenType: ttype, - content: string(buffer[len(buffer)-int(tokenLen):]), - row: int32(row), - column: int32(column), - }, - ) - previousRow = row - previousColumn = column - buffer = []rune{} -} - -func tokenize(runes []rune) error { - tokens = []Token{} - buffer = []rune{} - - for i := 0; i < len(runes); i++ { - c := runes[i] - - if (c == '\n') { - row += 1 - column = 1 - } - buffer = append(buffer, c) - if !textStarted { - if c == '\n' { - previousRow += 1 - previousColumn = 1 - } else if (c == ' ') { - previousColumn += 1 - } else { - textStarted = true - } - } - - trimmedBuffer := strings.Trim(string(buffer), " \n") - if (len(trimmedBuffer) > 2) { - lastTwo := buffer[len(trimmedBuffer) - 1:] - switch string(lastTwo) { - case "|>": - makePostTextToken(SectionStart, 2) - previousRow = row - previousColumn = column - textStarted = false - continue - case "<|": - makePostTextToken(SectionEnd, 2) - previousRow = row - previousColumn = column - textStarted = false - continue - } - } - - switch c { - case ']': - tokens = append( - tokens, - Token{ - tokenType: ElementDashStart, - content: "[", - row: int32(row), - column: int32(column), - }, - ) - if (len(buffer) - 1 > 1) { - textFragment := []rune{} - trimmedStr := strings.Trim(string(buffer), " ") - for i := 1; i < len(trimmedStr) - 1; i++ { - element := trimmedStr[i] - textFragment = append(textFragment, rune(element)) - } - tokens = append( - tokens, - Token{ - tokenType: Identifier, - content: string(textFragment), - row: int32(previousRow), - column: int32(previousColumn), - }, - ) - } - tokens = append( - tokens, - Token{ - tokenType: ElementDashStart, - content: "]", - row: int32(row), - column: int32(column), - }, - ) - previousRow = row - previousColumn = column - textStarted = false - buffer = []rune{} - case '+': - makePostTextToken(ElementPlusStart, 1) - previousRow = row - previousColumn = column - textStarted = false - case '-': - makePostTextToken(ElementDashStart, 1) - previousRow = row - previousColumn = column - textStarted = false - case '>': - makePostTextToken(QuestionEnd, 1) - previousRow = row - previousColumn = column - } - column += 1 - } - makePostTextToken(EOF, 0) - return nil -} - func main() { log.Println("Compilation started") @@ -187,7 +16,7 @@ func main() { } fileContents := string(file) - err = tokenize([]rune(fileContents)) + tokens, err := tokenize([]rune(fileContents)) if (err != nil) { fmt.Printf("%s\n", err.Error()) return @@ -202,7 +31,7 @@ func main() { } fmt.Printf( "%s: \"%s\" %d:%d\n", - toString(&token.tokenType), + ToString(&token.tokenType), content, token.row, token.column, @@ -211,34 +40,3 @@ func main() { log.Println("Compilation completed") } - -func toString (ttype *TokenType) string { - switch *ttype { - case TextFragment: - return "TextFragment" - case QuestionEnd: - return "QuestionEnd" - case ElementDashStart: - return "ElementDashStart" - case ElementPlusStart: - return "ElementPlusStart" - case Identifier: - return "Identifier" - case IdentifierStart: - return "IdentifierStart" - case IdentifierEnd: - return "IdentifierEnd" - case SectionStart: - return "SectionStart" - case SectionEnd: - return "SectionEnd" - case EOF: - return "EndOfFile" - default: - return "NOT_DEFINED" - } -} - -func (e LexingErr) Error() string { - return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message) -} diff --git a/src/compiler/lexer.go b/src/compiler/lexer.go new file mode 100644 index 0000000..dda1a03 --- /dev/null +++ b/src/compiler/lexer.go @@ -0,0 +1,208 @@ +package main + +import ( + "fmt" + "strings" +) + +var tokens []Token +var buffer []rune +var row int32 = 1 +var column int32 = 1 +var previousRow int32 = -1 +var previousColumn int32 = -1 +var textStarted bool = false + +type TokenType int +const ( + TextFragment TokenType = iota + QuestionEnd + ElementDashStart + ElementPlusStart + Identifier + IdentifierStart + IdentifierEnd + SectionStart + SectionEnd + SOF + EOF +) + +type Token struct { + tokenType TokenType; + content string; + row int32; + column int32; +} + +type LexingErr struct { + message string; + row int32; + column int32; +} + +func makePostTextToken(ttype TokenType, tokenLen int32) { + if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) { + textFragment := []rune{} + for i := 0; i < len(buffer) - int(tokenLen); i++ { + element := buffer[i] + textFragment = append(textFragment, element) + } + tokens = append( + tokens, + Token{ + tokenType: TextFragment, + content: string(textFragment), + row: int32(previousRow), + column: int32(previousColumn), + }, + ) + } + tokens = append( + tokens, + Token{ + tokenType: ttype, + content: string(buffer[len(buffer)-int(tokenLen):]), + row: int32(row), + column: int32(column), + }, + ) + previousRow = row + previousColumn = column + buffer = []rune{} +} + +func tokenize(fileRunes []rune) ( []Token, error ) { + tokens = []Token{} + buffer = []rune{} + + for i := 0; i < len(fileRunes); i++ { + c := fileRunes[i] + + if (c == '\n') { + row += 1 + column = 1 + } + buffer = append(buffer, c) + if !textStarted { + if c == '\n' { + previousRow += 1 + previousColumn = 1 + } else if (c == ' ') { + previousColumn += 1 + } else { + textStarted = true + } + } + + trimmedBuffer := strings.Trim(string(buffer), " \n") + if (len(trimmedBuffer) > 2) { + lastTwo := buffer[len(trimmedBuffer) - 1:] + switch string(lastTwo) { + case "|>": + makePostTextToken(SectionStart, 2) + previousRow = row + previousColumn = column + textStarted = false + continue + case "<|": + makePostTextToken(SectionEnd, 2) + previousRow = row + previousColumn = column + textStarted = false + continue + } + } + + switch c { + case ']': + tokens = append( + tokens, + Token{ + tokenType: ElementDashStart, + content: "[", + row: int32(row), + column: int32(column), + }, + ) + if (len(buffer) - 1 > 1) { + textFragment := []rune{} + trimmedStr := strings.Trim(string(buffer), " ") + for i := 1; i < len(trimmedStr) - 1; i++ { + element := trimmedStr[i] + textFragment = append(textFragment, rune(element)) + } + tokens = append( + tokens, + Token{ + tokenType: Identifier, + content: string(textFragment), + row: int32(previousRow), + column: int32(previousColumn), + }, + ) + } + tokens = append( + tokens, + Token{ + tokenType: ElementDashStart, + content: "]", + row: int32(row), + column: int32(column), + }, + ) + previousRow = row + previousColumn = column + textStarted = false + buffer = []rune{} + case '+': + makePostTextToken(ElementPlusStart, 1) + previousRow = row + previousColumn = column + textStarted = false + case '-': + makePostTextToken(ElementDashStart, 1) + previousRow = row + previousColumn = column + textStarted = false + case '>': + makePostTextToken(QuestionEnd, 1) + previousRow = row + previousColumn = column + } + column += 1 + } + makePostTextToken(EOF, 0) + return tokens, nil +} + +func ToString (ttype *TokenType) string { + switch *ttype { + case TextFragment: + return "TextFragment" + case QuestionEnd: + return "QuestionEnd" + case ElementDashStart: + return "ElementDashStart" + case ElementPlusStart: + return "ElementPlusStart" + case Identifier: + return "Identifier" + case IdentifierStart: + return "IdentifierStart" + case IdentifierEnd: + return "IdentifierEnd" + case SectionStart: + return "SectionStart" + case SectionEnd: + return "SectionEnd" + case EOF: + return "EndOfFile" + default: + return "NOT_DEFINED" + } +} + +func (e LexingErr) Error() string { + return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message) +}