diff --git a/src/compiler/compiler.go b/src/compiler/compiler.go index 35c7da7..660e461 100644 --- a/src/compiler/compiler.go +++ b/src/compiler/compiler.go @@ -1,7 +1,259 @@ -package main +package main -import "fmt" +import ( + "bufio" + "fmt" + "io" + "log" + "os" + "strings" +) + +var tokens []Token +var buffer []rune + +type TokenType int +const ( + TextFragment TokenType = iota + QuestionEnd + ElementDashStart + ElementPlusStart + Identifier + IdentifierStart + IdentifierEnd + SectionStart + SectionEnd +) + +type Token struct { + tokenType TokenType; + content string; + row int32; + column int32; +} + +type Flashcard struct { + question string; + answer string; +} + +func toString (ttype *TokenType) string { + switch *ttype { + case TextFragment: + return "TextFragment" + case QuestionEnd: + return "QuestionEnd" + case ElementDashStart: + return "ElementDashStart" + case ElementPlusStart: + return "ElementPlusStart" + case Identifier: + return "Identifier" + case IdentifierStart: + return "IdentifierStart" + case IdentifierEnd: + return "IdentifierEnd" + case SectionStart: + return "SectionStart" + case SectionEnd: + return "SectionEnd" + default: + return "NOT_DEFINED" + } +} + +type LexingErr struct { + message string; + row int32; + column int32; +} + +func (e LexingErr) Error() string { + return fmt.Sprintf("%d:%d - %s", e.row, e.column, e.message) +} + +var row int32 = 1 +var column int32 = 1 +var previous_row int32 = -1 +var previous_col int32 = -1 +var can_have_text bool = false + +func makePostTextToken(ttype TokenType, tokenLen int32) { + if (len(strings.Trim(string(buffer), " \n")) - 1 > 0) { + textFragment := []rune{} + for i := 0; i < len(buffer) - int(tokenLen); i++ { + element := buffer[i] + textFragment = append(textFragment, element) + } + tokens = append( + tokens, + Token{ + tokenType: TextFragment, + content: string(textFragment), + row: int32(previous_row), + column: int32(previous_col), + }, + ) + } + tokens = append( + tokens, + Token{ + tokenType: ttype, + content: string(buffer[len(buffer)-int(tokenLen):]), + row: int32(row), + column: int32(column), + }, + ) + previous_row = row + previous_col = column + buffer = []rune{} +} + +func tokenize(contents string) error { + tokens = []Token{} + buffer = []rune{} + + previous_row = -1 + previous_col = -1 + reader := bufio.NewReader( + strings.NewReader(contents), + ) + for { + c, sz, err := reader.ReadRune() + // TODO previous token start and end + if err != nil { + if err == io.EOF { + break + } else { + log.Fatal(err) + } + } + _ = sz + if (c == '\n') { + row += 1 + column = 1 + } + buffer = append(buffer, c) + + trimmedBuffer := strings.Trim(string(buffer), " \n") + if (len(trimmedBuffer) > 2) { + lastTwo := buffer[len(trimmedBuffer) - 1:] + switch string(lastTwo) { + case "|>": + if (len(trimmedBuffer) - 2 > 0 && !can_have_text) { + return LexingErr{"Text cannot be here", previous_row, previous_col} + } + makePostTextToken(SectionStart, 2) + can_have_text = true + continue + case "<|": + if (len(trimmedBuffer) - 2 > 0 && !can_have_text) { + return LexingErr{"Text cannot be here", previous_row, previous_col} + } + makePostTextToken(SectionEnd, 2) + can_have_text = false + continue + } + } + + switch c { + case ']': + if (len(trimmedBuffer) - 1 > 0 && !can_have_text) { + return LexingErr{"Text cannot be here", row, column} + } + tokens = append( + tokens, + Token{ + tokenType: ElementDashStart, + content: "[", + row: int32(row), + column: int32(column), + }, + ) + if (len(buffer) - 1 > 1) { + textFragment := []rune{} + trimmedStr := strings.Trim(string(buffer), " ") + for i := 1; i < len(trimmedStr) - 1; i++ { + element := trimmedStr[i] + textFragment = append(textFragment, rune(element)) + } + tokens = append( + tokens, + Token{ + tokenType: Identifier, + content: string(textFragment), + row: int32(previous_row), + column: int32(previous_col), + }, + ) + can_have_text = true + } + tokens = append( + tokens, + Token{ + tokenType: ElementDashStart, + content: "]", + row: int32(row), + column: int32(column), + }, + ) + previous_row = row + previous_col = column + buffer = []rune{} + case '+': + if (len(trimmedBuffer) - 1 > 0 && !can_have_text) { + return LexingErr{"Text cannot be here", previous_row, previous_col} + } + makePostTextToken(ElementPlusStart, 1) + can_have_text = true + case '-': + // fmt.Printf("%s %d\n", trimmedBuffer, len(trimmedBuffer)) + if (len(trimmedBuffer) - 1 > 0 && !can_have_text) { + return LexingErr{"Text cannot be here", previous_row, previous_col} } + makePostTextToken(ElementDashStart, 1) + can_have_text = true + case '>': + if (len(trimmedBuffer) - 1 > 0 && !can_have_text) { + return LexingErr{"Text cannot be here", previous_row, previous_col} + } + makePostTextToken(QuestionEnd, 1) + can_have_text = false + } + column += 1 + } + return nil +} func main() { - fmt.Println("Hello from compiler!") + // openAndExtractFileContents + log.Println("Compilation started") + + file, err := os.ReadFile("./input.mdem") + if (err != nil) { log.Fatal("Cannot open the input file") + return + } + fileContents := string(file) + + err = tokenize(fileContents) + if (err != nil) { + fmt.Printf("%s\n", err.Error()) + return + } + + for i := 0; i < len(tokens); i++ { + token := tokens[i] + content := token.content + if (token.tokenType == TextFragment) { + content = strings.Replace(strings.Trim(content, " "), "\n", "\\n", -1) + } + fmt.Printf( + "%s: \"%s\" %d:%d\n", + toString(&token.tokenType), + content, + token.row, + token.column, + ) + } + + log.Println("Compilation completed") } diff --git a/src/compiler/go.mod b/src/compiler/go.mod index 58dc732..d9afeb0 100644 --- a/src/compiler/go.mod +++ b/src/compiler/go.mod @@ -1,3 +1,5 @@ module github.com/jorenchik/mdemory/src/compiler go 1.22.5 + +require golang.org/x/text v0.16.0 diff --git a/src/compiler/go.sum b/src/compiler/go.sum new file mode 100644 index 0000000..e85a4c4 --- /dev/null +++ b/src/compiler/go.sum @@ -0,0 +1,2 @@ +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=