Kajetan Johannes Hammerle пре 3 година
родитељ
комит
83991f8394
11 измењених фајлова са 474 додато и 430 уклоњено
  1. 15 8
      Compiler.c
  2. 1 1
      Main.c
  3. 14 6
      Test.c
  4. 0 390
      Tokenizer.c
  5. 3 1
      meson.build
  6. 33 0
      tokenizer/File.c
  7. 13 0
      tokenizer/File.h
  8. 105 0
      tokenizer/Token.c
  9. 4 24
      tokenizer/Token.h
  10. 264 0
      tokenizer/Tokenizer.c
  11. 22 0
      tokenizer/Tokenizer.h

+ 15 - 8
Compiler.c

@@ -7,7 +7,7 @@
 #include "FunctionMap.h"
 #include "Operation.h"
 #include "StringIntMap.h"
-#include "Tokenizer.h"
+#include "tokenizer/Tokenizer.h"
 
 #define ERROR_LENGTH 256
 #define RETURN_BUFFER 16
@@ -48,7 +48,7 @@ static int cAddVar(const char* var) {
 }
 
 static void cUnexpectedToken(Token t) {
-    cError("unexpected token on line %d: %s", line, tGetTokenName(t));
+    cError("unexpected token on line %d: %s", line, tGetName(t));
 }
 
 static void cAddOperation(Operation token) {
@@ -100,7 +100,8 @@ static Token cReadTokenAndLine() {
 static void cConsumeToken(Token wanted) {
     Token t = cReadTokenAndLine();
     if(wanted != t) {
-        cError("unexpected token on line %d: expected '%s' got '%s'", line, tGetTokenName(wanted), tGetTokenName(t));
+        cError("unexpected token on line %d: expected '%s' got '%s'", line,
+               tGetName(wanted), tGetName(t));
     }
 }
 
@@ -171,12 +172,14 @@ static void cCallFunction(const char* literal, bool noReturn) {
     Function* f = fmSearch(&functions, literal, arguments);
     cAddOperation(OP_GOSUB);
     if(f == NULL) {
-        fmEnqueue(&functions, literal, arguments, line, cReserveInt(), noReturn);
+        fmEnqueue(&functions, literal, arguments, line, cReserveInt(),
+                  noReturn);
         cAddInt(arguments);
         cAddOperation(OP_NOTHING);
     } else {
         if(!noReturn && !f->returns) {
-            cError("function '%s' needs a return value on line %d", f->name, line);
+            cError("function '%s' needs a return value on line %d", f->name,
+                   line);
         }
         cAddInt(f->address);
         cAddInt(arguments);
@@ -484,7 +487,9 @@ static void cConsumeBody() {
     while(!cConsumeTokenIf(T_CLOSE_CURVED_BRACKET)) {
         Token t = cReadTokenAndLine();
         if(t == T_END) {
-            cError("unexpected end of file: non closed curved bracket on line %d", oldLine);
+            cError(
+                "unexpected end of file: non closed curved bracket on line %d",
+                oldLine);
         }
         cLine(t);
     }
@@ -708,11 +713,13 @@ static void cForEachLine() {
 
 static void cLinkQueuedFunctions() {
     for(int i = 0; i < functions.queueEntries; i++) {
-        Function* f = fmSearch(&functions, functions.queue[i].name, functions.queue[i].arguments);
+        Function* f = fmSearch(&functions, functions.queue[i].name,
+                               functions.queue[i].arguments);
         if(f == NULL) {
             cError("unknown function on line %d", functions.queue[i].line);
         } else if(!functions.queue[i].noReturn && !f->returns) {
-            cError("function '%s' needs a return value on line %d", f->name, functions.queue[i].line);
+            cError("function '%s' needs a return value on line %d", f->name,
+                   functions.queue[i].line);
         }
         cSetInt(functions.queue[i].reserved, f->address);
         if(functions.queue[i].noReturn && f->returns) {

+ 1 - 1
Main.c

@@ -5,7 +5,7 @@
 #include "Compiler.h"
 #include "Script.h"
 #include "Test.h"
-#include "Tokenizer.h"
+#include "tokenizer/Tokenizer.h"
 
 long getNanos() {
     struct timespec time;

+ 14 - 6
Test.c

@@ -8,7 +8,7 @@
 
 #include "Compiler.h"
 #include "Script.h"
-#include "Tokenizer.h"
+#include "tokenizer/Tokenizer.h"
 
 static int doneTests = 0;
 static int allTests = 0;
@@ -23,7 +23,8 @@ static void tsPrintToBuffer(const char* format, ...) {
     va_list args;
     va_start(args, format);
     int leftBytes = TEST_BUFFER_LENGTH - testBufferIndex;
-    testBufferIndex += vsnprintf(testBuffer + testBufferIndex, leftBytes, format, args);
+    testBufferIndex +=
+        vsnprintf(testBuffer + testBufferIndex, leftBytes, format, args);
     if(testBufferIndex > TEST_BUFFER_LENGTH) {
         testBufferIndex = TEST_BUFFER_LENGTH;
     }
@@ -36,10 +37,16 @@ static bool tsPrinter(Object* o) {
     }
     switch(o->type) {
         case OT_INT: tsPrintToBuffer("%d\n", o->data.intValue); return false;
-        case OT_FLOAT: tsPrintToBuffer("%.2f\n", o->data.floatValue); return false;
-        case OT_CONST_STRING: tsPrintToBuffer("%s\n", o->data.stringValue); return false;
+        case OT_FLOAT:
+            tsPrintToBuffer("%.2f\n", o->data.floatValue);
+            return false;
+        case OT_CONST_STRING:
+            tsPrintToBuffer("%s\n", o->data.stringValue);
+            return false;
         case OT_NULL: tsPrintToBuffer("null\n"); return false;
-        case OT_BOOL: tsPrintToBuffer(o->data.intValue ? "true\n" : "false\n"); return false;
+        case OT_BOOL:
+            tsPrintToBuffer(o->data.intValue ? "true\n" : "false\n");
+            return false;
         case OT_ARRAY: tsPrintToBuffer("array\n"); return false;
         default: return true;
     }
@@ -69,7 +76,8 @@ static bool tsCompareResults(FILE* file) {
         char a = fgetc(file);
         char b = testBuffer[i];
         if(a != b) {
-            printf("error in '%s': expected %c, got:\n%s", path, a, testBuffer + i);
+            printf("error in '%s': expected %c, got:\n%s", path, a,
+                   testBuffer + i);
             return true;
         }
     }

+ 0 - 390
Tokenizer.c

@@ -1,390 +0,0 @@
-#include <limits.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "Tokenizer.h"
-#include "Utils.h"
-
-#define TOKEN_BUFFER_LENGTH (1024 * 1024)
-#define ERROR_LENGTH 256
-
-static char tokenBuffer[TOKEN_BUFFER_LENGTH];
-static int writeIndex = 0;
-static int readIndex = 0;
-static int16 line = 1;
-static FILE* file = NULL;
-static char error[ERROR_LENGTH] = {'\0'};
-
-typedef struct Literal {
-    const char* name;
-    Token token;
-} Literal;
-
-Literal LITERALS[] = {{"print", T_PRINT},       {"null", T_NULL},     {"true", T_TRUE},   {"false", T_FALSE},
-                      {"function", T_FUNCTION}, {"return", T_RETURN}, {"if", T_IF},       {"else", T_ELSE},
-                      {"while", T_WHILE},       {"for", T_FOR},       {"break", T_BREAK}, {"continue", T_CONTINUE},
-                      {"array", T_ARRAY}};
-const int LITERAL_AMOUNT = sizeof(LITERALS) / sizeof(Literal);
-
-static void tError(const char* format, ...) {
-    va_list args;
-    va_start(args, format);
-    vsnprintf(error, ERROR_LENGTH, format, args);
-    va_end(args);
-}
-
-static bool tAdd(const void* data, int length) {
-    if(writeIndex + length > TOKEN_BUFFER_LENGTH) {
-        tError("the token buffer is too small");
-        return false;
-    }
-    memcpy(tokenBuffer + writeIndex, data, length);
-    writeIndex += length;
-    return true;
-}
-
-static bool tAddToken(Token token) {
-    unsigned char c = token;
-    return tAdd(&c, 1) && tAdd(&line, sizeof(line));
-}
-
-static bool tReadTokens(void* dest, int length) {
-    if(readIndex + length > writeIndex) {
-        return false;
-    }
-    memcpy(dest, tokenBuffer + readIndex, length);
-    readIndex += length;
-    return true;
-}
-
-static int tRead() {
-    return fgetc(file);
-}
-
-static int tPeek() {
-    int c = tRead();
-    ungetc(c, file);
-    return c;
-}
-
-static int tReadIf(int c) {
-    if(tPeek() == c) {
-        tRead();
-        return true;
-    }
-    return false;
-}
-
-static bool tParseLiteral(int c) {
-    int index = 1;
-    char buffer[64];
-    buffer[0] = c;
-    while(isLetter(tPeek())) {
-        if(index >= 63) {
-            tError("literal is too long");
-            return false;
-        }
-        buffer[index++] = tRead();
-    }
-    buffer[index] = '\0';
-    for(int i = 0; i < LITERAL_AMOUNT; i++) {
-        if(strcmp(buffer, LITERALS[i].name) == 0) {
-            return tAddToken(LITERALS[i].token);
-        }
-    }
-    return tAddToken(T_LITERAL) && tAdd(buffer, index + 1);
-}
-
-static bool tParseNumber(int c) {
-    int index = 1;
-    char buffer[64];
-    buffer[0] = c;
-    bool point = false;
-    while(true) {
-        int c = tPeek();
-        if(c == '.') {
-            point = true;
-        } else if(!isNumber(c)) {
-            break;
-        } else if(index >= 63) {
-            tError("number is too long");
-            return false;
-        }
-        buffer[index++] = tRead();
-    }
-    buffer[index] = '\0';
-    if(point) {
-        char* end = NULL;
-        float f = strtof(buffer, &end);
-        if(end[0] != '\0') {
-            tError("invalid float on line %d", line);
-            return false;
-        }
-        return tAddToken(T_FLOAT) && tAdd(&f, sizeof(float));
-    } else {
-        char* end = NULL;
-        long l = strtol(buffer, &end, 10);
-        if(end[0] != '\0' || l > INT_MAX) {
-            tError("invalid int on line %d", line);
-            return false;
-        }
-        int i = l;
-        return tAddToken(T_INT) && tAdd(&i, sizeof(int));
-    }
-}
-
-static bool tAddString() {
-    if(!tAddToken(T_TEXT)) {
-        return false;
-    }
-    while(true) {
-        int c = tRead();
-        if(c == '"') {
-            break;
-        } else if(c == '\\') {
-            switch(tRead()) {
-                case '"': c = '"'; break;
-                case '\\': c = '\\'; break;
-                default: tError("unknown escaped character at line %d", line); return false;
-            }
-        } else if(c == EOF) {
-            tError("unclosed string starting at line %d", line);
-            return false;
-        }
-        if(!tAdd(&c, 1)) {
-            return false;
-        }
-    }
-    char c = '\0';
-    return tAdd(&c, 1);
-}
-
-static bool tAddTokenChecked(int c, Token tc, Token te, Token t) {
-    if(tReadIf(c)) {
-        return tAddToken(tc);
-    } else if(tReadIf('=')) {
-        return tAddToken(te);
-    }
-    return tAddToken(t);
-}
-
-static bool tAddLongTokenChecked(int c, Token tce, Token tc, Token te, Token t) {
-    if(tReadIf(c)) {
-        if(tReadIf('=')) {
-            return tAddToken(tce);
-        } else {
-            return tAddToken(tc);
-        }
-    } else if(tReadIf('=')) {
-        return tAddToken(te);
-    }
-    return tAddToken(t);
-}
-
-static bool tParseToken() {
-    int c = tRead();
-    if(c == EOF) {
-        return false;
-    } else if(isLetter(c)) {
-        return tParseLiteral(c);
-    } else if(isNumber(c)) {
-        return tParseNumber(c);
-    }
-    switch(c) {
-        case ' ': return true;
-        case '\n': line++; return true;
-        case '+': return tAddTokenChecked('+', T_INCREMENT, T_ADD_SET, T_ADD);
-        case '-': return tAddTokenChecked('-', T_DECREMENT, T_SUB_SET, T_SUB);
-        case '*': return tReadIf('=') ? tAddToken(T_MUL_SET) : tAddToken(T_MUL);
-        case '/': return tReadIf('=') ? tAddToken(T_DIV_SET) : tAddToken(T_DIV);
-        case '%': return tReadIf('=') ? tAddToken(T_MOD_SET) : tAddToken(T_MOD);
-        case '<': return tAddLongTokenChecked('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL, T_LESS);
-        case '>': return tAddLongTokenChecked('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT, T_GREATER_EQUAL, T_GREATER);
-        case '=': return tReadIf('=') ? tAddToken(T_EQUAL) : tAddToken(T_SET);
-        case '!': return tReadIf('=') ? tAddToken(T_NOT_EQUAL) : tAddToken(T_NOT);
-        case '&': return tAddTokenChecked('&', T_AND, T_BIT_AND_SET, T_BIT_AND);
-        case '|': return tAddTokenChecked('|', T_OR, T_BIT_OR_SET, T_BIT_OR);
-        case '~': return tAddToken(T_BIT_NOT);
-        case '^': return tReadIf('=') ? tAddToken(T_BIT_XOR_SET) : tAddToken(T_BIT_XOR);
-        case ',': return tAddToken(T_COMMA);
-        case ';': return tAddToken(T_SEMICOLON);
-        case '(': return tAddToken(T_OPEN_BRACKET);
-        case ')': return tAddToken(T_CLOSE_BRACKET);
-        case '{': return tAddToken(T_OPEN_CURVED_BRACKET);
-        case '}': return tAddToken(T_CLOSE_CURVED_BRACKET);
-        case '"': return tAddString();
-        case '.': return tAddToken(T_POINT);
-        case '[': return tAddToken(T_OPEN_SQUARE_BRACKET);
-        case ']': return tAddToken(T_CLOSE_SQUARE_BRACKET);
-    }
-    tError("unknown character on line %d: %c", line, c);
-    return false;
-}
-
-static void tParseFile() {
-    readIndex = 0;
-    writeIndex = 0;
-    line = 1;
-    error[0] = '\0';
-    while(tParseToken()) {
-    }
-}
-
-bool tTokenize(const char* path) {
-    file = fopen(path, "r");
-    if(file == NULL) {
-        tError("cannot read file '%s'", path);
-        return true;
-    }
-    tParseFile();
-    fclose(file);
-    return error[0] != '\0';
-}
-
-const char* tGetError() {
-    return error;
-}
-
-void tResetReader() {
-    readIndex = 0;
-}
-
-Token tPeekToken() {
-    if(readIndex >= writeIndex) {
-        return T_END;
-    }
-    return tokenBuffer[readIndex];
-}
-
-Token tReadToken() {
-    if(readIndex >= writeIndex) {
-        return T_END;
-    }
-    return tokenBuffer[readIndex++];
-}
-
-bool tReadInt(int* i) {
-    if(tReadTokens(i, sizeof(int))) {
-        return true;
-    }
-    return false;
-}
-
-bool tReadInt16(int16* i) {
-    if(tReadTokens(i, sizeof(int16))) {
-        return true;
-    }
-    return false;
-}
-
-bool tReadFloat(float* f) {
-    if(tReadTokens(f, sizeof(float))) {
-        return true;
-    }
-    return false;
-}
-
-const char* tReadString(int* length) {
-    *length = 0;
-    const char* s = tokenBuffer + readIndex;
-    while(readIndex <= writeIndex) {
-        (*length)++;
-        if(tokenBuffer[readIndex++] == '\0') {
-            return s;
-        }
-    }
-    return NULL;
-}
-
-const char* tGetTokenName(Token token) {
-    switch(token) {
-        case T_INT: return "int";
-        case T_FLOAT: return "float";
-        case T_TEXT: return "text";
-        case T_NULL: return "null";
-        case T_TRUE: return "true";
-        case T_FALSE: return "false";
-        case T_ADD: return "+";
-        case T_SUB: return "-";
-        case T_MUL: return "*";
-        case T_DIV: return "/";
-        case T_MOD: return "%";
-        case T_LESS: return "<";
-        case T_LESS_EQUAL: return "<=";
-        case T_GREATER: return ">";
-        case T_GREATER_EQUAL: return ">=";
-        case T_EQUAL: return "==";
-        case T_NOT_EQUAL: return "!=";
-        case T_NOT: return "!";
-        case T_AND: return "&&";
-        case T_OR: return "||";
-        case T_BIT_NOT: return "~";
-        case T_BIT_AND: return "&";
-        case T_BIT_OR: return "|";
-        case T_BIT_XOR: return "^";
-        case T_LEFT_SHIFT: return "<<";
-        case T_RIGHT_SHIFT: return ">>";
-        case T_SET: return "=";
-        case T_ADD_SET: return "+=";
-        case T_SUB_SET: return "-=";
-        case T_MUL_SET: return "*=";
-        case T_DIV_SET: return "/=";
-        case T_MOD_SET: return "%=";
-        case T_BIT_AND_SET: return "&=";
-        case T_BIT_OR_SET: return "|=";
-        case T_BIT_XOR_SET: return "^=";
-        case T_LEFT_SHIFT_SET: return "<<=";
-        case T_RIGHT_SHIFT_SET: return ">>=";
-        case T_INCREMENT: return "++";
-        case T_DECREMENT: return "--";
-        case T_LITERAL: return "literal";
-        case T_PRINT: return "print";
-        case T_IF: return "if";
-        case T_ELSE: return "else";
-        case T_WHILE: return "while";
-        case T_FOR: return "for";
-        case T_BREAK: return "break";
-        case T_CONTINUE: return "continue";
-        case T_FUNCTION: return "function";
-        case T_RETURN: return "return";
-        case T_COMMA: return ",";
-        case T_SEMICOLON: return ";";
-        case T_OPEN_BRACKET: return "(";
-        case T_CLOSE_BRACKET: return ")";
-        case T_OPEN_CURVED_BRACKET: return "{";
-        case T_CLOSE_CURVED_BRACKET: return "}";
-        case T_ARRAY: return "array";
-        case T_POINT: return ".";
-        case T_OPEN_SQUARE_BRACKET: return "[";
-        case T_CLOSE_SQUARE_BRACKET: return "]";
-        case T_END: return "end";
-    }
-    return "Unknown";
-}
-
-int tGetMarker() {
-    return readIndex;
-}
-
-void tResetToMarker(int marker) {
-    readIndex = marker;
-}
-
-void tPrint() {
-    puts("----------------");
-    while(true) {
-        Token t = tReadToken();
-        if(t == T_END) {
-            break;
-        }
-        int line = 0;
-        tReadInt(&line);
-        printf("%d: %s\n", line, tGetTokenName(t));
-        if(t == T_INT) {
-            tReadInt(&line);
-        }
-    }
-    tResetReader();
-}

+ 3 - 1
meson.build

@@ -2,7 +2,9 @@ project('lonely tiger', 'c')
 
 src = [
     'Main.c', 
-    'Tokenizer.c', 
+    'tokenizer/Tokenizer.c', 
+    'tokenizer/Token.c', 
+    'tokenizer/File.c', 
     'Compiler.c', 
     'Utils.c', 
     'Script.c', 

+ 33 - 0
tokenizer/File.c

@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "tokenizer/File.h"
+
+static FILE* file = NULL;
+
+bool fOpen(const char* path) {
+    file = fopen(path, "r");
+    return file == NULL;
+}
+
+void fClose() {
+    fclose(file);
+}
+
+int fRead() {
+    return fgetc(file);
+}
+
+int fPeek() {
+    int c = fRead();
+    ungetc(c, file);
+    return c;
+}
+
+bool fReadIf(int c) {
+    if(fPeek() == c) {
+        fRead();
+        return true;
+    }
+    return false;
+}

+ 13 - 0
tokenizer/File.h

@@ -0,0 +1,13 @@
+#ifndef FILE_H
+#define FILE_H
+
+#include <stdbool.h>
+
+bool fOpen(const char* path);
+void fClose();
+
+int fRead();
+int fPeek();
+bool fReadIf(int c);
+
+#endif

+ 105 - 0
tokenizer/Token.c

@@ -0,0 +1,105 @@
+#include <stdbool.h>
+#include <string.h>
+
+#include "tokenizer/Token.h"
+
+const char* tGetName(Token token) {
+    switch(token) {
+        case T_INT: return "int";
+        case T_FLOAT: return "float";
+        case T_TEXT: return "text";
+        case T_NULL: return "null";
+        case T_TRUE: return "true";
+        case T_FALSE: return "false";
+        case T_ADD: return "+";
+        case T_SUB: return "-";
+        case T_MUL: return "*";
+        case T_DIV: return "/";
+        case T_MOD: return "%";
+        case T_LESS: return "<";
+        case T_LESS_EQUAL: return "<=";
+        case T_GREATER: return ">";
+        case T_GREATER_EQUAL: return ">=";
+        case T_EQUAL: return "==";
+        case T_NOT_EQUAL: return "!=";
+        case T_NOT: return "!";
+        case T_AND: return "&&";
+        case T_OR: return "||";
+        case T_BIT_NOT: return "~";
+        case T_BIT_AND: return "&";
+        case T_BIT_OR: return "|";
+        case T_BIT_XOR: return "^";
+        case T_LEFT_SHIFT: return "<<";
+        case T_RIGHT_SHIFT: return ">>";
+        case T_SET: return "=";
+        case T_ADD_SET: return "+=";
+        case T_SUB_SET: return "-=";
+        case T_MUL_SET: return "*=";
+        case T_DIV_SET: return "/=";
+        case T_MOD_SET: return "%=";
+        case T_BIT_AND_SET: return "&=";
+        case T_BIT_OR_SET: return "|=";
+        case T_BIT_XOR_SET: return "^=";
+        case T_LEFT_SHIFT_SET: return "<<=";
+        case T_RIGHT_SHIFT_SET: return ">>=";
+        case T_INCREMENT: return "++";
+        case T_DECREMENT: return "--";
+        case T_LITERAL: return "literal";
+        case T_PRINT: return "print";
+        case T_IF: return "if";
+        case T_ELSE: return "else";
+        case T_WHILE: return "while";
+        case T_FOR: return "for";
+        case T_BREAK: return "break";
+        case T_CONTINUE: return "continue";
+        case T_FUNCTION: return "function";
+        case T_RETURN: return "return";
+        case T_COMMA: return ",";
+        case T_SEMICOLON: return ";";
+        case T_OPEN_BRACKET: return "(";
+        case T_CLOSE_BRACKET: return ")";
+        case T_OPEN_CURVED_BRACKET: return "{";
+        case T_CLOSE_CURVED_BRACKET: return "}";
+        case T_ARRAY: return "array";
+        case T_POINT: return ".";
+        case T_OPEN_SQUARE_BRACKET: return "[";
+        case T_CLOSE_SQUARE_BRACKET: return "]";
+        case T_END: return "end";
+    }
+    return "unknown";
+}
+
+static bool tEqual(const char* a, const char* b) {
+    return strcmp(a, b) == 0;
+}
+
+Token tFromName(const char* name) {
+    if(tEqual(name, "print")) {
+        return T_PRINT;
+    } else if(tEqual(name, "null")) {
+        return T_NULL;
+    } else if(tEqual(name, "true")) {
+        return T_TRUE;
+    } else if(tEqual(name, "false")) {
+        return T_FALSE;
+    } else if(tEqual(name, "function")) {
+        return T_FUNCTION;
+    } else if(tEqual(name, "return")) {
+        return T_RETURN;
+    } else if(tEqual(name, "if")) {
+        return T_IF;
+    } else if(tEqual(name, "else")) {
+        return T_ELSE;
+    } else if(tEqual(name, "while")) {
+        return T_WHILE;
+    } else if(tEqual(name, "for")) {
+        return T_FOR;
+    } else if(tEqual(name, "break")) {
+        return T_BREAK;
+    } else if(tEqual(name, "continue")) {
+        return T_CONTINUE;
+    } else if(tEqual(name, "array")) {
+        return T_ARRAY;
+    }
+    return T_END;
+}

+ 4 - 24
Tokenizer.h → tokenizer/Token.h

@@ -1,8 +1,5 @@
-#ifndef TOKENIZER_H
-#define TOKENIZER_H
-
-#include <stdbool.h>
-#include <stdint.h>
+#ifndef TOKEN_H
+#define TOKEN_H
 
 typedef enum Token {
     T_INT,
@@ -67,24 +64,7 @@ typedef enum Token {
     T_END
 } Token;
 
-typedef int16_t int16;
-
-bool tTokenize(const char* path);
-const char* tGetError();
-
-void tResetReader();
-Token tPeekToken();
-Token tReadToken();
-bool tReadInt(int* i);
-bool tReadInt16(int16* i);
-bool tReadFloat(float* f);
-const char* tReadString(int* length);
-
-int tGetMarker();
-void tResetToMarker(int marker);
-
-const char* tGetTokenName(Token token);
-
-void tPrint();
+const char* tGetName(Token token);
+Token tFromName(const char* name);
 
 #endif

+ 264 - 0
tokenizer/Tokenizer.c

@@ -0,0 +1,264 @@
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Utils.h"
+#include "tokenizer/File.h"
+#include "tokenizer/Tokenizer.h"
+
+#define TOKEN_BUFFER_LENGTH (1024 * 1024)
+#define ERROR_LENGTH 256
+
+static char tokenBuffer[TOKEN_BUFFER_LENGTH];
+static int writeIndex = 0;
+static int readIndex = 0;
+static int16 line = 1;
+static char error[ERROR_LENGTH] = {'\0'};
+
+static void tError(const char* format, ...) {
+    va_list args;
+    va_start(args, format);
+    vsnprintf(error, ERROR_LENGTH, format, args);
+    va_end(args);
+}
+
+static bool tAdd(const void* data, int length) {
+    if(writeIndex + length > TOKEN_BUFFER_LENGTH) {
+        tError("the token buffer is too small");
+        return false;
+    }
+    memcpy(tokenBuffer + writeIndex, data, length);
+    writeIndex += length;
+    return true;
+}
+
+static bool tAddToken(Token token) {
+    unsigned char c = token;
+    return tAdd(&c, 1) && tAdd(&line, sizeof(line));
+}
+
+static bool tReadTokens(void* dest, int length) {
+    if(readIndex + length > writeIndex) {
+        return false;
+    }
+    memcpy(dest, tokenBuffer + readIndex, length);
+    readIndex += length;
+    return true;
+}
+
+static bool tParseLiteral(int c) {
+    int index = 1;
+    char buffer[64];
+    buffer[0] = c;
+    while(isLetter(fPeek())) {
+        if(index >= 63) {
+            tError("literal is too long");
+            return false;
+        }
+        buffer[index++] = fRead();
+    }
+    buffer[index] = '\0';
+    Token t = tFromName(buffer);
+    if(t != T_END) {
+        return tAddToken(t);
+    }
+    return tAddToken(T_LITERAL) && tAdd(buffer, index + 1);
+}
+
+static bool tParseNumber(int c) {
+    int index = 1;
+    char buffer[64];
+    buffer[0] = c;
+    bool point = false;
+    while(true) {
+        int c = fPeek();
+        if(c == '.') {
+            point = true;
+        } else if(!isNumber(c)) {
+            break;
+        } else if(index >= 63) {
+            tError("number is too long");
+            return false;
+        }
+        buffer[index++] = fRead();
+    }
+    buffer[index] = '\0';
+    if(point) {
+        char* end = NULL;
+        float f = strtof(buffer, &end);
+        if(end[0] != '\0') {
+            tError("invalid float on line %d", line);
+            return false;
+        }
+        return tAddToken(T_FLOAT) && tAdd(&f, sizeof(float));
+    } else {
+        char* end = NULL;
+        long l = strtol(buffer, &end, 10);
+        if(end[0] != '\0' || l > INT_MAX) {
+            tError("invalid int on line %d", line);
+            return false;
+        }
+        int i = l;
+        return tAddToken(T_INT) && tAdd(&i, sizeof(int));
+    }
+}
+
+static bool tAddString() {
+    if(!tAddToken(T_TEXT)) {
+        return false;
+    }
+    while(true) {
+        int c = fRead();
+        if(c == '"') {
+            break;
+        } else if(c == '\\') {
+            switch(fRead()) {
+                case '"': c = '"'; break;
+                case '\\': c = '\\'; break;
+                default:
+                    tError("unknown escaped character at line %d", line);
+                    return false;
+            }
+        } else if(c == EOF) {
+            tError("unclosed string starting at line %d", line);
+            return false;
+        }
+        if(!tAdd(&c, 1)) {
+            return false;
+        }
+    }
+    char c = '\0';
+    return tAdd(&c, 1);
+}
+
+static bool tAddToken2(Token te, Token t) {
+    return fReadIf('=') ? tAddToken(te) : tAddToken(t);
+}
+
+static bool tAddToken3(int c, Token tc, Token te, Token t) {
+    return fReadIf(c) ? tAddToken(tc) : tAddToken2(te, t);
+}
+
+static bool tAddToken4(int c, Token tce, Token tc, Token te, Token t) {
+    return fReadIf(c) ? tAddToken2(tce, tc) : tAddToken2(te, t);
+}
+
+static bool tParseToken() {
+    int c = fRead();
+    if(c == EOF) {
+        return false;
+    } else if(isLetter(c)) {
+        return tParseLiteral(c);
+    } else if(isNumber(c)) {
+        return tParseNumber(c);
+    }
+    switch(c) {
+        case ' ': return true;
+        case '\n': line++; return true;
+        case '+': return tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD);
+        case '-': return tAddToken3('-', T_DECREMENT, T_SUB_SET, T_SUB);
+        case '*': return tAddToken2(T_MUL_SET, T_MUL);
+        case '/': return tAddToken2(T_DIV_SET, T_DIV);
+        case '%': return tAddToken2(T_MOD_SET, T_MOD);
+        case '<':
+            return tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL,
+                              T_LESS);
+        case '>':
+            return tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT,
+                              T_GREATER_EQUAL, T_GREATER);
+        case '=': return tAddToken2(T_EQUAL, T_SET);
+        case '!': return tAddToken2(T_NOT_EQUAL, T_NOT);
+        case '&': return tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND);
+        case '|': return tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR);
+        case '~': return tAddToken(T_BIT_NOT);
+        case '^': return tAddToken2(T_BIT_XOR_SET, T_BIT_XOR);
+        case ',': return tAddToken(T_COMMA);
+        case ';': return tAddToken(T_SEMICOLON);
+        case '(': return tAddToken(T_OPEN_BRACKET);
+        case ')': return tAddToken(T_CLOSE_BRACKET);
+        case '{': return tAddToken(T_OPEN_CURVED_BRACKET);
+        case '}': return tAddToken(T_CLOSE_CURVED_BRACKET);
+        case '"': return tAddString();
+        case '.': return tAddToken(T_POINT);
+        case '[': return tAddToken(T_OPEN_SQUARE_BRACKET);
+        case ']': return tAddToken(T_CLOSE_SQUARE_BRACKET);
+    }
+    tError("unknown character on line %d: %c", line, c);
+    return false;
+}
+
+static void tParseFile() {
+    readIndex = 0;
+    writeIndex = 0;
+    line = 1;
+    error[0] = '\0';
+    while(tParseToken()) {
+    }
+}
+
+bool tTokenize(const char* path) {
+    if(fOpen(path)) {
+        tError("cannot read file '%s'", path);
+        return true;
+    }
+    tParseFile();
+    fClose();
+    return error[0] != '\0';
+}
+
+const char* tGetError() {
+    return error;
+}
+
+void tResetReader() {
+    readIndex = 0;
+}
+
+Token tPeekToken() {
+    if(readIndex >= writeIndex) {
+        return T_END;
+    }
+    return tokenBuffer[readIndex];
+}
+
+Token tReadToken() {
+    if(readIndex >= writeIndex) {
+        return T_END;
+    }
+    return tokenBuffer[readIndex++];
+}
+
+bool tReadInt(int* i) {
+    if(tReadTokens(i, sizeof(int))) {
+        return true;
+    }
+    return false;
+}
+
+bool tReadInt16(int16* i) {
+    if(tReadTokens(i, sizeof(int16))) {
+        return true;
+    }
+    return false;
+}
+
+bool tReadFloat(float* f) {
+    if(tReadTokens(f, sizeof(float))) {
+        return true;
+    }
+    return false;
+}
+
+const char* tReadString(int* length) {
+    *length = 0;
+    const char* s = tokenBuffer + readIndex;
+    while(readIndex <= writeIndex) {
+        (*length)++;
+        if(tokenBuffer[readIndex++] == '\0') {
+            return s;
+        }
+    }
+    return NULL;
+}

+ 22 - 0
tokenizer/Tokenizer.h

@@ -0,0 +1,22 @@
+#ifndef TOKENIZER_H
+#define TOKENIZER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "tokenizer/Token.h"
+
+typedef int16_t int16;
+
+bool tTokenize(const char* path);
+const char* tGetError();
+
+void tResetReader();
+Token tPeekToken();
+Token tReadToken();
+bool tReadInt(int* i);
+bool tReadInt16(int16* i);
+bool tReadFloat(float* f);
+const char* tReadString(int* length);
+
+#endif