From 26f008c670771cdd6758d8219ef30bfef4aa8fbb Mon Sep 17 00:00:00 2001
From: jorenchik <jorens.stekels@gmail.com>
Date: Sun, 4 Aug 2024 09:44:47 +0300
Subject: [PATCH] Separated tokenizer into a separate file

---
 src/compiler/compiler.go | 206 +-------------------------------------
 src/compiler/lexer.go    | 208 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 210 insertions(+), 204 deletions(-)
 create mode 100644 src/compiler/lexer.go

diff --git a/src/compiler/compiler.go b/src/compiler/compiler.go
index 7e8a79e..5ebabe9 100644
--- a/src/compiler/compiler.go
+++ b/src/compiler/compiler.go
@@ -7,177 +7,6 @@ import (
 	"strings"
 )
 
-var tokens []Token
-var buffer []rune
-var	row int32          =     1
-var	column int32       =     1
-var	previousRow int32 =     -1
-var	previousColumn int32 =  -1
-var textStarted bool =   false
-
-type TokenType int
-const (
-	TextFragment TokenType = iota
-	QuestionEnd               
-	ElementDashStart          
-	ElementPlusStart          
-	Identifier                
-	IdentifierStart            
-	IdentifierEnd             
-	SectionStart
-	SectionEnd
-	SOF
-	EOF
-)
-
-type Token struct {
-	tokenType   TokenType;
-	content        string;
-	row		        int32;
-	column          int32;
-} 
-
-type LexingErr struct {
-	message string;	
-	row	     int32;
-	column   int32;
-}
-
-func makePostTextToken(ttype TokenType, tokenLen int32) {
-	if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) {
-		textFragment := []rune{}
-		for i := 0; i < len(buffer) - int(tokenLen); i++ {
-			element := buffer[i]
-			textFragment = append(textFragment, element)
-		}
-		tokens = append(
-			tokens,
-			Token{
-				tokenType: TextFragment,
-				content: string(textFragment),
-				row: int32(previousRow),
-				column: int32(previousColumn),
-			},
-		)
-	}
-	tokens = append(
-		tokens,
-		Token{
-			tokenType: ttype,
-			content: string(buffer[len(buffer)-int(tokenLen):]),
-			row: int32(row),
-			column: int32(column),
-		},
-	)
-	previousRow = row
-	previousColumn = column
-	buffer = []rune{}
-}
-
-func tokenize(runes []rune) error {
-	tokens = []Token{} 
-	buffer = []rune{} 
-
-	for i := 0; i < len(runes); i++ {
-		c := runes[i]
-
-		if (c == '\n') {
-			row += 1
-			column = 1
-		}
-		buffer = append(buffer, c)
-		if !textStarted {
-			if c == '\n' {
-				previousRow += 1
-				previousColumn = 1
-			} else if (c == ' ') {
-				previousColumn += 1
-			} else {
-				textStarted = true 
-			}
-		}
-
-		trimmedBuffer := strings.Trim(string(buffer), " \n")
-		if (len(trimmedBuffer) > 2) {
-			lastTwo := buffer[len(trimmedBuffer) - 1:]
-			switch string(lastTwo) {
-			case "|>":
-				makePostTextToken(SectionStart, 2)
-				previousRow = row
-				previousColumn = column
-				textStarted = false
-				continue
-			case "<|":
-				makePostTextToken(SectionEnd, 2)
-				previousRow = row
-				previousColumn = column
-				textStarted = false
-				continue
-			}
-		}
-
-		switch c {
-		case ']':
-			tokens = append(
-				tokens,
-				Token{
-					tokenType: ElementDashStart,
-					content: "[",
-					row: int32(row),
-					column: int32(column),
-				},
-			)
-			if (len(buffer) - 1 > 1) {
-				textFragment := []rune{}
-				trimmedStr := strings.Trim(string(buffer), " ")
-				for i := 1; i <  len(trimmedStr) - 1; i++ {
-					element := trimmedStr[i]
-					textFragment = append(textFragment, rune(element))
-				}
-				tokens = append(
-					tokens,
-					Token{
-						tokenType: Identifier,
-						content: string(textFragment),
-						row: int32(previousRow),
-						column: int32(previousColumn),
-					},
-				)
-			}
-			tokens = append(
-				tokens,
-				Token{
-					tokenType: ElementDashStart,
-					content: "]",
-					row: int32(row),
-					column: int32(column),
-				},
-			)
-			previousRow = row
-			previousColumn = column
-			textStarted = false
-			buffer = []rune{}
-		case '+':
-			makePostTextToken(ElementPlusStart, 1)
-			previousRow = row
-			previousColumn = column
-			textStarted = false
-		case '-':
-			makePostTextToken(ElementDashStart, 1)
-			previousRow = row
-			previousColumn = column
-			textStarted = false
-		case '>':
-			makePostTextToken(QuestionEnd, 1)
-			previousRow = row
-			previousColumn = column
-		}
-		column += 1
-	}
-	makePostTextToken(EOF, 0)
-	return nil
-}
-
 func main() {
 	log.Println("Compilation started")
 
@@ -187,7 +16,7 @@ func main() {
 	}
 	fileContents := string(file)
 
-	err = tokenize([]rune(fileContents))
+	tokens, err := tokenize([]rune(fileContents))
 	if (err != nil) {
 		fmt.Printf("%s\n", err.Error())	
 		return
@@ -202,7 +31,7 @@ func main() {
 		}
 		fmt.Printf(
 			"%s: \"%s\" %d:%d\n",
-			toString(&token.tokenType),
+			ToString(&token.tokenType),
 			content,
 			token.row,
 			token.column,
@@ -211,34 +40,3 @@ func main() {
 
 	log.Println("Compilation completed")
 }
-
-func toString (ttype *TokenType) string {
-	switch *ttype {
-	case TextFragment:
-		return "TextFragment"
-	case QuestionEnd:
-		return "QuestionEnd"               
-	case ElementDashStart:
-		return "ElementDashStart"          
-	case ElementPlusStart:
-		return "ElementPlusStart"          
-	case Identifier:
-		return "Identifier"                
-	case IdentifierStart:
-		return "IdentifierStart"            
-	case IdentifierEnd:
-		return "IdentifierEnd"             
-	case SectionStart:
-		return "SectionStart"             
-	case SectionEnd:
-		return "SectionEnd"             
-	case EOF:
-		return "EndOfFile"             
-	default:
-		return "NOT_DEFINED"
-	}
-}
-
-func (e LexingErr) Error() string {
-	return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
-}
diff --git a/src/compiler/lexer.go b/src/compiler/lexer.go
new file mode 100644
index 0000000..dda1a03
--- /dev/null
+++ b/src/compiler/lexer.go
@@ -0,0 +1,208 @@
+package main 
+
+import (
+	"fmt"
+	"strings"
+)
+
+var tokens []Token
+var buffer []rune
+var	row int32          =     1
+var	column int32       =     1
+var	previousRow int32 =     -1
+var	previousColumn int32 =  -1
+var textStarted bool =   false
+
+type TokenType int
+const (
+	TextFragment TokenType = iota
+	QuestionEnd               
+	ElementDashStart          
+	ElementPlusStart          
+	Identifier                
+	IdentifierStart            
+	IdentifierEnd             
+	SectionStart
+	SectionEnd
+	SOF
+	EOF
+)
+
+type Token struct {
+	tokenType   TokenType;
+	content        string;
+	row		        int32;
+	column          int32;
+} 
+
+type LexingErr struct {
+	message string;	
+	row	     int32;
+	column   int32;
+}
+
+func makePostTextToken(ttype TokenType, tokenLen int32) {
+	if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) {
+		textFragment := []rune{}
+		for i := 0; i < len(buffer) - int(tokenLen); i++ {
+			element := buffer[i]
+			textFragment = append(textFragment, element)
+		}
+		tokens = append(
+			tokens,
+			Token{
+				tokenType: TextFragment,
+				content: string(textFragment),
+				row: int32(previousRow),
+				column: int32(previousColumn),
+			},
+		)
+	}
+	tokens = append(
+		tokens,
+		Token{
+			tokenType: ttype,
+			content: string(buffer[len(buffer)-int(tokenLen):]),
+			row: int32(row),
+			column: int32(column),
+		},
+	)
+	previousRow = row
+	previousColumn = column
+	buffer = []rune{}
+}
+
+func tokenize(fileRunes []rune) ( []Token, error ) {
+	tokens = []Token{} 
+	buffer = []rune{} 
+
+	for i := 0; i < len(fileRunes); i++ {
+		c := fileRunes[i]
+
+		if (c == '\n') {
+			row += 1
+			column = 1
+		}
+		buffer = append(buffer, c)
+		if !textStarted {
+			if c == '\n' {
+				previousRow += 1
+				previousColumn = 1
+			} else if (c == ' ') {
+				previousColumn += 1
+			} else {
+				textStarted = true 
+			}
+		}
+
+		trimmedBuffer := strings.Trim(string(buffer), " \n")
+		if (len(trimmedBuffer) > 2) {
+			lastTwo := buffer[len(trimmedBuffer) - 1:]
+			switch string(lastTwo) {
+			case "|>":
+				makePostTextToken(SectionStart, 2)
+				previousRow = row
+				previousColumn = column
+				textStarted = false
+				continue
+			case "<|":
+				makePostTextToken(SectionEnd, 2)
+				previousRow = row
+				previousColumn = column
+				textStarted = false
+				continue
+			}
+		}
+
+		switch c {
+		case ']':
+			tokens = append(
+				tokens,
+				Token{
+					tokenType: ElementDashStart,
+					content: "[",
+					row: int32(row),
+					column: int32(column),
+				},
+			)
+			if (len(buffer) - 1 > 1) {
+				textFragment := []rune{}
+				trimmedStr := strings.Trim(string(buffer), " ")
+				for i := 1; i <  len(trimmedStr) - 1; i++ {
+					element := trimmedStr[i]
+					textFragment = append(textFragment, rune(element))
+				}
+				tokens = append(
+					tokens,
+					Token{
+						tokenType: Identifier,
+						content: string(textFragment),
+						row: int32(previousRow),
+						column: int32(previousColumn),
+					},
+				)
+			}
+			tokens = append(
+				tokens,
+				Token{
+					tokenType: ElementDashStart,
+					content: "]",
+					row: int32(row),
+					column: int32(column),
+				},
+			)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
+			buffer = []rune{}
+		case '+':
+			makePostTextToken(ElementPlusStart, 1)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
+		case '-':
+			makePostTextToken(ElementDashStart, 1)
+			previousRow = row
+			previousColumn = column
+			textStarted = false
+		case '>':
+			makePostTextToken(QuestionEnd, 1)
+			previousRow = row
+			previousColumn = column
+		}
+		column += 1
+	}
+	makePostTextToken(EOF, 0)
+	return tokens, nil
+}
+
+func ToString (ttype *TokenType) string {
+	switch *ttype {
+	case TextFragment:
+		return "TextFragment"
+	case QuestionEnd:
+		return "QuestionEnd"               
+	case ElementDashStart:
+		return "ElementDashStart"          
+	case ElementPlusStart:
+		return "ElementPlusStart"          
+	case Identifier:
+		return "Identifier"                
+	case IdentifierStart:
+		return "IdentifierStart"            
+	case IdentifierEnd:
+		return "IdentifierEnd"             
+	case SectionStart:
+		return "SectionStart"             
+	case SectionEnd:
+		return "SectionEnd"             
+	case EOF:
+		return "EndOfFile"             
+	default:
+		return "NOT_DEFINED"
+	}
+}
+
+func (e LexingErr) Error() string {
+	return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message)
+}