Separated tokenizer into a separate file

This commit is contained in:
jorenchik
2024-08-04 09:44:47 +03:00
parent e726489619
commit 26f008c670
2 changed files with 210 additions and 204 deletions

View File

@@ -7,177 +7,6 @@ import (
"strings"
)
var tokens []Token
var buffer []rune
var row int32 = 1
var column int32 = 1
var previousRow int32 = -1
var previousColumn int32 = -1
var textStarted bool = false
type TokenType int
const (
TextFragment TokenType = iota
QuestionEnd
ElementDashStart
ElementPlusStart
Identifier
IdentifierStart
IdentifierEnd
SectionStart
SectionEnd
SOF
EOF
)
type Token struct {
tokenType TokenType;
content string;
row int32;
column int32;
}
type LexingErr struct {
message string;
row int32;
column int32;
}
func makePostTextToken(ttype TokenType, tokenLen int32) {
if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) {
textFragment := []rune{}
for i := 0; i < len(buffer) - int(tokenLen); i++ {
element := buffer[i]
textFragment = append(textFragment, element)
}
tokens = append(
tokens,
Token{
tokenType: TextFragment,
content: string(textFragment),
row: int32(previousRow),
column: int32(previousColumn),
},
)
}
tokens = append(
tokens,
Token{
tokenType: ttype,
content: string(buffer[len(buffer)-int(tokenLen):]),
row: int32(row),
column: int32(column),
},
)
previousRow = row
previousColumn = column
buffer = []rune{}
}
func tokenize(runes []rune) error {
tokens = []Token{}
buffer = []rune{}
for i := 0; i < len(runes); i++ {
c := runes[i]
if (c == '\n') {
row += 1
column = 1
}
buffer = append(buffer, c)
if !textStarted {
if c == '\n' {
previousRow += 1
previousColumn = 1
} else if (c == ' ') {
previousColumn += 1
} else {
textStarted = true
}
}
trimmedBuffer := strings.Trim(string(buffer), " \n")
if (len(trimmedBuffer) > 2) {
lastTwo := buffer[len(trimmedBuffer) - 1:]
switch string(lastTwo) {
case "|>":
makePostTextToken(SectionStart, 2)
previousRow = row
previousColumn = column
textStarted = false
continue
case "<|":
makePostTextToken(SectionEnd, 2)
previousRow = row
previousColumn = column
textStarted = false
continue
}
}
switch c {
case ']':
tokens = append(
tokens,
Token{
tokenType: ElementDashStart,
content: "[",
row: int32(row),
column: int32(column),
},
)
if (len(buffer) - 1 > 1) {
textFragment := []rune{}
trimmedStr := strings.Trim(string(buffer), " ")
for i := 1; i < len(trimmedStr) - 1; i++ {
element := trimmedStr[i]
textFragment = append(textFragment, rune(element))
}
tokens = append(
tokens,
Token{
tokenType: Identifier,
content: string(textFragment),
row: int32(previousRow),
column: int32(previousColumn),
},
)
}
tokens = append(
tokens,
Token{
tokenType: ElementDashStart,
content: "]",
row: int32(row),
column: int32(column),
},
)
previousRow = row
previousColumn = column
textStarted = false
buffer = []rune{}
case '+':
makePostTextToken(ElementPlusStart, 1)
previousRow = row
previousColumn = column
textStarted = false
case '-':
makePostTextToken(ElementDashStart, 1)
previousRow = row
previousColumn = column
textStarted = false
case '>':
makePostTextToken(QuestionEnd, 1)
previousRow = row
previousColumn = column
}
column += 1
}
makePostTextToken(EOF, 0)
return nil
}
func main() {
log.Println("Compilation started")
@@ -187,7 +16,7 @@ func main() {
}
fileContents := string(file)
err = tokenize([]rune(fileContents))
tokens, err := tokenize([]rune(fileContents))
if (err != nil) {
fmt.Printf("%s\n", err.Error())
return
@@ -202,7 +31,7 @@ func main() {
}
fmt.Printf(
"%s: \"%s\" %d:%d\n",
toString(&token.tokenType),
ToString(&token.tokenType),
content,
token.row,
token.column,
@@ -211,34 +40,3 @@ func main() {
log.Println("Compilation completed")
}
func toString (ttype *TokenType) string {
switch *ttype {
case TextFragment:
return "TextFragment"
case QuestionEnd:
return "QuestionEnd"
case ElementDashStart:
return "ElementDashStart"
case ElementPlusStart:
return "ElementPlusStart"
case Identifier:
return "Identifier"
case IdentifierStart:
return "IdentifierStart"
case IdentifierEnd:
return "IdentifierEnd"
case SectionStart:
return "SectionStart"
case SectionEnd:
return "SectionEnd"
case EOF:
return "EndOfFile"
default:
return "NOT_DEFINED"
}
}
func (e LexingErr) Error() string {
return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
}

208
src/compiler/lexer.go Normal file
View File

@@ -0,0 +1,208 @@
package main
import (
"fmt"
"strings"
)
var tokens []Token
var buffer []rune
var row int32 = 1
var column int32 = 1
var previousRow int32 = -1
var previousColumn int32 = -1
var textStarted bool = false
type TokenType int
const (
TextFragment TokenType = iota
QuestionEnd
ElementDashStart
ElementPlusStart
Identifier
IdentifierStart
IdentifierEnd
SectionStart
SectionEnd
SOF
EOF
)
type Token struct {
tokenType TokenType;
content string;
row int32;
column int32;
}
type LexingErr struct {
message string;
row int32;
column int32;
}
func makePostTextToken(ttype TokenType, tokenLen int32) {
if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) {
textFragment := []rune{}
for i := 0; i < len(buffer) - int(tokenLen); i++ {
element := buffer[i]
textFragment = append(textFragment, element)
}
tokens = append(
tokens,
Token{
tokenType: TextFragment,
content: string(textFragment),
row: int32(previousRow),
column: int32(previousColumn),
},
)
}
tokens = append(
tokens,
Token{
tokenType: ttype,
content: string(buffer[len(buffer)-int(tokenLen):]),
row: int32(row),
column: int32(column),
},
)
previousRow = row
previousColumn = column
buffer = []rune{}
}
func tokenize(fileRunes []rune) ( []Token, error ) {
tokens = []Token{}
buffer = []rune{}
for i := 0; i < len(fileRunes); i++ {
c := fileRunes[i]
if (c == '\n') {
row += 1
column = 1
}
buffer = append(buffer, c)
if !textStarted {
if c == '\n' {
previousRow += 1
previousColumn = 1
} else if (c == ' ') {
previousColumn += 1
} else {
textStarted = true
}
}
trimmedBuffer := strings.Trim(string(buffer), " \n")
if (len(trimmedBuffer) > 2) {
lastTwo := buffer[len(trimmedBuffer) - 1:]
switch string(lastTwo) {
case "|>":
makePostTextToken(SectionStart, 2)
previousRow = row
previousColumn = column
textStarted = false
continue
case "<|":
makePostTextToken(SectionEnd, 2)
previousRow = row
previousColumn = column
textStarted = false
continue
}
}
switch c {
case ']':
tokens = append(
tokens,
Token{
tokenType: ElementDashStart,
content: "[",
row: int32(row),
column: int32(column),
},
)
if (len(buffer) - 1 > 1) {
textFragment := []rune{}
trimmedStr := strings.Trim(string(buffer), " ")
for i := 1; i < len(trimmedStr) - 1; i++ {
element := trimmedStr[i]
textFragment = append(textFragment, rune(element))
}
tokens = append(
tokens,
Token{
tokenType: Identifier,
content: string(textFragment),
row: int32(previousRow),
column: int32(previousColumn),
},
)
}
tokens = append(
tokens,
Token{
tokenType: ElementDashStart,
content: "]",
row: int32(row),
column: int32(column),
},
)
previousRow = row
previousColumn = column
textStarted = false
buffer = []rune{}
case '+':
makePostTextToken(ElementPlusStart, 1)
previousRow = row
previousColumn = column
textStarted = false
case '-':
makePostTextToken(ElementDashStart, 1)
previousRow = row
previousColumn = column
textStarted = false
case '>':
makePostTextToken(QuestionEnd, 1)
previousRow = row
previousColumn = column
}
column += 1
}
makePostTextToken(EOF, 0)
return tokens, nil
}
func ToString (ttype *TokenType) string {
switch *ttype {
case TextFragment:
return "TextFragment"
case QuestionEnd:
return "QuestionEnd"
case ElementDashStart:
return "ElementDashStart"
case ElementPlusStart:
return "ElementPlusStart"
case Identifier:
return "Identifier"
case IdentifierStart:
return "IdentifierStart"
case IdentifierEnd:
return "IdentifierEnd"
case SectionStart:
return "SectionStart"
case SectionEnd:
return "SectionEnd"
case EOF:
return "EndOfFile"
default:
return "NOT_DEFINED"
}
}
func (e LexingErr) Error() string {
return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
}