lexer refactoring

This commit is contained in:
jorenchik
2024-09-06 19:56:52 +03:00
parent 1fdbff42f2
commit 1a2c961807

View File

@@ -5,15 +5,14 @@ import (
"strings"
)
var buffer []rune
var row int32 = 1
var column int32 = 1
var previousRow int32 = -1
var previousColumn int32 = -1
var textStarted bool = false
var buffer []rune
var row int32 = 1
var column int32 = 1
var previousRow int32 = -1
var previousColumn int32 = -1
var textStarted bool = false
type TokenType int
const (
TextFragment TokenType = iota
QuestionEnd
@@ -30,13 +29,13 @@ const (
)
type Token struct {
TokenType TokenType;
Content string;
Row int32;
Column int32;
TokenType TokenType;
Content string;
Row int32;
Column int32;
}
func (token Token)ToString() string {
func (token Token) ToString() string {
content := token.Content
if (token.TokenType == TextFragment) {
content = strings.Replace(
@@ -57,7 +56,11 @@ func (token Token)ToString() string {
var tokens []Token
func makePostTextToken(ttype TokenType, tokenLen int32, textType TokenType) {
func makeTokenWithTokenBuffer(
ttype TokenType,
tokenLen int32,
textType TokenType,
) {
if (len(strings.Trim(string(buffer), " \n\t")) - 1 > 0) {
textFragment := []rune{}
for i := 0; i < len(buffer) - int(tokenLen); i++ {
@@ -95,11 +98,14 @@ func TokenizeMdem(fileRunes []rune) ( []Token, error ) {
for i := 0; i < len(fileRunes); i++ {
c := fileRunes[i]
// AdvancePointer
if (c == '\n') {
row += 1
column = 1
}
buffer = append(buffer, c)
// SkipWhitetext
if !textStarted {
if c == '\n' {
previousRow += 1
@@ -111,92 +117,69 @@ func TokenizeMdem(fileRunes []rune) ( []Token, error ) {
}
}
// EmitTokens
switch c {
case '[':
makePostTextToken(IdentifierStart, 1, TextFragment)
makeTokenWithTokenBuffer(IdentifierStart, 1, TextFragment)
previousRow = row
previousColumn = column
textStarted = false
case ']':
if (len(buffer) - 1 > 1) {
textFragment := []rune{}
trimmedStr := strings.Trim(string(buffer), " \n\t")
for i := 0; i < len(trimmedStr) - 1; i++ {
element := trimmedStr[i]
textFragment = append(textFragment, rune(element))
}
tokens = append(
tokens,
Token{
TokenType: Identifier,
Content: string(textFragment),
Row: int32(previousRow),
Column: int32(previousColumn),
},
)
}
tokens = append(
tokens,
Token{
TokenType: IdentifierEnd,
Content: "]",
Row: int32(row),
Column: int32(column),
},
)
makeTokenWithTokenBuffer(IdentifierEnd, 1, Identifier)
previousRow = row
previousColumn = column
textStarted = false
buffer = []rune{}
case '#':
makePostTextToken(SectionIdentifierStart, 1, TextFragment)
makeTokenWithTokenBuffer(SectionIdentifierStart, 1, TextFragment)
previousRow = row
previousColumn = column
textStarted = false
case '{':
makePostTextToken(SectionStart, 1, Identifier)
makeTokenWithTokenBuffer(SectionStart, 1, Identifier)
previousRow = row
previousColumn = column
textStarted = false
case '}':
makePostTextToken(SectionEnd, 1, TextFragment)
makeTokenWithTokenBuffer(SectionEnd, 1, TextFragment)
previousRow = row
previousColumn = column
textStarted = false
case '-':
makePostTextToken(ElementDashStart, 1, TextFragment)
makeTokenWithTokenBuffer(ElementDashStart, 1, TextFragment)
previousRow = row
previousColumn = column
textStarted = false
case '>':
makePostTextToken(QuestionEnd, 1, TextFragment)
makeTokenWithTokenBuffer(QuestionEnd, 1, TextFragment)
previousRow = row
previousColumn = column
case '+':
makePostTextToken(ElementPlusStart, 1, TextFragment)
makeTokenWithTokenBuffer(ElementPlusStart, 1, TextFragment)
previousRow = row
previousColumn = column
textStarted = false
}
column += 1
}
makePostTextToken(EOF, 0, TextFragment)
// EmitEOF
makeTokenWithTokenBuffer(EOF, 0, TextFragment)
return tokens, nil
}
func ToString (ttype *TokenType) string {
switch *ttype {
case TextFragment: return "TextFragment"
case QuestionEnd: return "QuestionEnd"
case ElementDashStart: return "ElementDashStart"
case ElementPlusStart: return "ElementPlusStart"
case Identifier: return "Identifier"
case IdentifierStart: return "IdentifierStart"
case IdentifierEnd: return "IdentifierEnd"
case SectionIdentifierStart: return "SectionIdentifierStart"
case SectionStart: return "SectionStart"
case SectionEnd: return "SectionEnd"
case EOF: return "EndOfFile"
default: return "NOT_DEFINED"
case TextFragment: return "TextFragment"
case QuestionEnd: return "QuestionEnd"
case ElementDashStart: return "ElementDashStart"
case ElementPlusStart: return "ElementPlusStart"
case Identifier: return "Identifier"
case IdentifierStart: return "IdentifierStart"
case IdentifierEnd: return "IdentifierEnd"
case SectionIdentifierStart: return "SectionIdentifierStart"
case SectionStart: return "SectionStart"
case SectionEnd: return "SectionEnd"
case EOF: return "EndOfFile"
default: return "NOT_RECOGNIZED"
}
}