Browse Source

update to C only, simple tokenizer / compiler / vm but everything works
together

Kajetan Johannes Hammerle 3 years ago
parent
commit
899e6203aa
26 changed files with 517 additions and 726 deletions
  1. 0 1
      .gitignore
  2. 98 0
      Compiler.c
  3. 9 0
      Compiler.h
  4. 44 0
      Main.c
  5. 0 11
      Main.cpp
  6. 13 0
      Object.h
  7. 6 0
      Operation.h
  8. 123 0
      Script.c
  9. 22 0
      Script.h
  10. 153 0
      Tokenizer.c
  11. 18 0
      Tokenizer.h
  12. 9 0
      Utils.c
  13. 9 0
      Utils.h
  14. 0 23
      code/ISnuviLogger.h
  15. 0 14
      code/Script.cpp
  16. 0 15
      code/Script.h
  17. 3 3
      meson.build
  18. 10 17
      test/Test.cpp
  19. 0 24
      test/Test.h
  20. 0 43
      tokenizer/Token.h
  21. 0 81
      tokenizer/TokenStream.cpp
  22. 0 32
      tokenizer/TokenStream.h
  23. 0 328
      tokenizer/Tokenizer.cpp
  24. 0 10
      tokenizer/Tokenizer.h
  25. 0 94
      utils/String.cpp
  26. 0 30
      utils/String.h

+ 0 - 1
.gitignore

@@ -1 +0,0 @@
-*.java

+ 98 - 0
Compiler.c

@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Compiler.h"
+#include "Operation.h"
+#include "Tokenizer.h"
+
+#define MAX_BYTES (1024 * 1024)
+
+static const char* UNEXPECTED_TOKEN = "unexpected token";
+static const char* FULL_BUFFER = "the compiler buffer is too small";
+static const char* error = NULL;
+
+static unsigned char byteCode[MAX_BYTES];
+static int writeIndex = 0;
+
+static bool cAddBytes(const void* data, int length) {
+    if(writeIndex + length > MAX_BYTES) {
+        error = FULL_BUFFER;
+        return false;
+    }
+    memcpy(byteCode + writeIndex, data, length);
+    writeIndex += length;
+    return true;
+}
+
+static bool cAddOperation(Operation token) {
+    unsigned char c = token;
+    return cAddBytes(&c, 1);
+}
+
+static bool cConsumeToken(Token t) {
+    if(tReadToken() == t) {
+        return true;
+    }
+    error = UNEXPECTED_TOKEN;
+    return false;
+}
+
+static bool cConsumeTokenIf(Token t) {
+    if(tPeekToken() == t) {
+        tReadToken();
+        return true;
+    }
+    return false;
+}
+
+static bool cConstant() {
+    if(cConsumeToken(T_INT)) {
+        int value;
+        return tReadInt(&value) && cAddOperation(OP_PUSH_INT) && cAddBytes(&value, sizeof(int));
+    }
+    return false;
+}
+
+static bool cAdd() {
+    if(!cConstant()) {
+        return false;
+    }
+    while(cConsumeTokenIf(T_ADD)) {
+        if(!cConstant() || !cAddOperation(OP_ADD)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static bool cPrint() {
+    return cAdd() && cConsumeToken(T_SEMICOLON) && cAddOperation(OP_PRINT);
+}
+
+static bool cLine() {
+    Token t = tReadToken();
+    if(t == T_END) {
+        return false;
+    } else if(t == T_PRINT) {
+        return cPrint();
+    }
+    error = UNEXPECTED_TOKEN;
+    return false;
+}
+
+unsigned char* cCompile(int* codeLength) {
+    while(cLine()) {
+    }
+    if(error != NULL) {
+        return NULL;
+    }
+    unsigned char* bytes = malloc(writeIndex);
+    memcpy(bytes, byteCode, writeIndex);
+    *codeLength = writeIndex;
+    return bytes;
+}
+
+const char* cGetError() {
+    return error;
+}

+ 9 - 0
Compiler.h

@@ -0,0 +1,9 @@
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#include <stdbool.h>
+
+unsigned char* cCompile(int* codeLength);
+const char* cGetError();
+
+#endif

+ 44 - 0
Main.c

@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "Compiler.h"
+#include "Script.h"
+#include "Tokenizer.h"
+
+int main() {
+    if(tTokenize("../tests/test")) {
+        puts(tGetError());
+        return 0;
+    }
+
+    while(true) {
+        Token token = tReadToken();
+        if(token == T_END) {
+            break;
+        }
+        if(token == T_INT) {
+            int value;
+            tReadInt(&value);
+            printf("> %s %d\n", tGetTokenName(token), value);
+        } else {
+            printf("> %s\n", tGetTokenName(token));
+        }
+    }
+    tResetReader();
+
+    int codeLength = 0;
+    unsigned char* code = cCompile(&codeLength);
+    if(code == NULL) {
+        if(cGetError() == NULL) {
+            puts("error not set as it should");
+        } else {
+            puts(cGetError());
+        }
+        return 0;
+    }
+
+    Script* sc = sInit(code, codeLength);
+    sRun(sc);
+    sDelete(sc);
+    return 0;
+}

+ 0 - 11
Main.cpp

@@ -1,11 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-
-#include "test/Test.h"
-
-int main(int argc, char** argv) {
-    if(argc >= 3 && strcmp(argv[1], "test") == 0) {
-        Test::start(argv[2]);
-    }
-    return 0;
-}

+ 13 - 0
Object.h

@@ -0,0 +1,13 @@
+#ifndef OBJECT_H
+#define OBJECT_H
+
+typedef enum ObjectType { OT_INT } ObjectType;
+
+typedef struct Object {
+    ObjectType type;
+    union Data {
+        int intValue;
+    } data;
+} Object;
+
+#endif

+ 6 - 0
Operation.h

@@ -0,0 +1,6 @@
+#ifndef OPERATION_H
+#define OPERATION_H
+
+typedef enum Operation { OP_NOTHING, OP_PUSH_INT, OP_ADD, OP_PRINT } Operation;
+
+#endif

+ 123 - 0
Script.c

@@ -0,0 +1,123 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Operation.h"
+#include "Script.h"
+
+static const char* MISSING_INT_CONSTANT = "cannot read an int from the bytecode";
+static const char* NOT_AN_INT = "object is not an int";
+static const char* NOT_PRINTABLE = "cannot print given object";
+static const char* STACK_OVERFLOW = "stack overflow";
+static const char* STACK_UNDERFLOW = "stack underflow";
+
+static bool sRead(Script* sc, void* buffer, int length) {
+    if(sc->readIndex + length > sc->byteCodeLength) {
+        return true;
+    }
+    memcpy(buffer, sc->byteCode + sc->readIndex, length);
+    sc->readIndex += length;
+    return false;
+}
+
+static Operation sReadOperation(Script* sc) {
+    unsigned char c;
+    if(sRead(sc, &c, 1)) {
+        return OP_NOTHING;
+    }
+    return c;
+}
+
+static void sPush(Script* sc, Object* o) {
+    if(sc->stackIndex >= SCRIPT_STACK_SIZE) {
+        sc->error = STACK_OVERFLOW;
+        return;
+    }
+    sc->stack[sc->stackIndex++] = *o;
+}
+
+static bool sPop(Script* sc, Object* o) {
+    if(sc->stackIndex <= 0) {
+        sc->error = STACK_UNDERFLOW;
+        return true;
+    }
+    *o = sc->stack[--sc->stackIndex];
+    return false;
+}
+
+static void sPushInt(Script* sc) {
+    int value = 0;
+    if(sRead(sc, &value, sizeof(int))) {
+        sc->error = MISSING_INT_CONSTANT;
+        return;
+    }
+    Object o = {.type = OT_INT, .data.intValue = value};
+    sPush(sc, &o);
+}
+
+void sAdd(Script* sc) {
+    Object a;
+    if(sPop(sc, &a)) {
+        return;
+    }
+    Object b;
+    if(sPop(sc, &b)) {
+        return;
+    }
+    if(a.type != OT_INT || b.type != OT_INT) {
+        sc->error = NOT_AN_INT;
+        return;
+    }
+    Object o = {.type = OT_INT, .data.intValue = a.data.intValue + b.data.intValue};
+    sPush(sc, &o);
+}
+
+void sPrint(Script* sc) {
+    Object o;
+    if(sPop(sc, &o)) {
+        return;
+    }
+    if(o.type == OT_INT) {
+        printf("%d\n", o.data.intValue);
+    } else {
+        sc->error = NOT_PRINTABLE;
+    }
+}
+
+static void sConsumeInstruction(Script* sc) {
+    switch(sReadOperation(sc)) {
+        case OP_NOTHING: break;
+        case OP_PUSH_INT: sPushInt(sc); break;
+        case OP_ADD: sAdd(sc); break;
+        case OP_PRINT: sPrint(sc); break;
+    }
+}
+
+static bool sHasData(Script* sc) {
+    return sc->readIndex < sc->byteCodeLength;
+}
+
+Script* sInit(unsigned char* byteCode, int codeLength) {
+    Script* sc = malloc(sizeof(Script));
+    sc->error = NULL;
+    sc->byteCode = byteCode;
+    sc->byteCodeLength = codeLength;
+    sc->readIndex = 0;
+    sc->stackIndex = 0;
+    return sc;
+}
+
+void sDelete(Script* sc) {
+    free(sc->byteCode);
+}
+
+void sRun(Script* sc) {
+    while(sHasData(sc)) {
+        sConsumeInstruction(sc);
+        if(sc->error != NULL) {
+            puts(sc->error);
+            return;
+        }
+    }
+}

+ 22 - 0
Script.h

@@ -0,0 +1,22 @@
+#ifndef SCRIPT_H
+#define SCRIPT_H
+
+#include "Object.h"
+
+#define SCRIPT_STACK_SIZE 50
+
+typedef struct Script {
+    const char* error;
+    unsigned char* byteCode;
+    int byteCodeLength;
+    int readIndex;
+    Object stack[SCRIPT_STACK_SIZE];
+    int stackIndex;
+} Script;
+
+Script* sInit(unsigned char* byteCode, int codeLength);
+void sDelete(Script* sc);
+
+void sRun(Script* sc);
+
+#endif

+ 153 - 0
Tokenizer.c

@@ -0,0 +1,153 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "Tokenizer.h"
+#include "Utils.h"
+
+#define TOKEN_BUFFER_LENGTH (1024 * 1024)
+
+static char tokenBuffer[TOKEN_BUFFER_LENGTH];
+static int writeIndex = 0;
+static int readIndex = 0;
+
+static const char* TOO_LONG_LITERAL = "literal is too long";
+static const char* UNKNOWN_CHARACTER = "unknown character";
+static FILE* file = NULL;
+static const char* error = NULL;
+
+static bool tAdd(const void* data, int length) {
+    if(writeIndex + length > TOKEN_BUFFER_LENGTH) {
+        return false;
+    }
+    memcpy(tokenBuffer + writeIndex, data, length);
+    writeIndex += length;
+    return true;
+}
+
+static bool tAddToken(Token token) {
+    unsigned char c = token;
+    return tAdd(&c, 1);
+}
+
+static bool tReadTokens(void* dest, int length) {
+    if(readIndex + length > writeIndex) {
+        return false;
+    }
+    memcpy(dest, tokenBuffer + readIndex, length);
+    readIndex += length;
+    return true;
+}
+
+static int tRead() {
+    return fgetc(file);
+}
+
+static int tPeek() {
+    int c = tRead();
+    ungetc(c, file);
+    return c;
+}
+
+static bool tParseLiteral(int c) {
+    int index = 1;
+    char buffer[64];
+    buffer[0] = c;
+    while(isLetter(tPeek())) {
+        if(index >= 63) {
+            error = TOO_LONG_LITERAL;
+            return false;
+        }
+        buffer[index++] = tRead();
+    }
+    buffer[index] = '\0';
+    if(strcmp(buffer, "print") == 0) {
+        return tAddToken(T_PRINT);
+    }
+    return true;
+}
+
+static bool tParseNumber(int c) {
+    int sum = c - '0';
+    while(isNumber(tPeek())) {
+        sum = sum * 10 + (tRead() - '0');
+    }
+    return tAddToken(T_INT) && tAdd(&sum, sizeof(int));
+}
+
+static bool tParseToken() {
+    int c = tRead();
+    if(c == EOF) {
+        return false;
+    } else if(isLetter(c)) {
+        return tParseLiteral(c);
+    } else if(isNumber(c)) {
+        return tParseNumber(c);
+    }
+    switch(c) {
+        case ' ':
+        case '\n': return true;
+        case '+': return tAddToken(T_ADD);
+        case ';': return tAddToken(T_SEMICOLON);
+    }
+    error = UNKNOWN_CHARACTER;
+    return false;
+}
+
+static void tParseFile() {
+    readIndex = 0;
+    writeIndex = 0;
+    error = NULL;
+    while(tParseToken()) {
+    }
+}
+
+bool tTokenize(const char* path) {
+    file = fopen(path, "r");
+    if(file == NULL) {
+        error = "cannot read file";
+        return true;
+    }
+    tParseFile();
+    fclose(file);
+    return error != NULL;
+}
+
+const char* tGetError() {
+    return error;
+}
+
+void tResetReader() {
+    readIndex = 0;
+}
+
+Token tPeekToken() {
+    if(readIndex >= writeIndex) {
+        return T_END;
+    }
+    return tokenBuffer[readIndex];
+}
+
+Token tReadToken() {
+    if(readIndex >= writeIndex) {
+        return T_END;
+    }
+    return tokenBuffer[readIndex++];
+}
+
+bool tReadInt(int* i) {
+    if(tReadTokens(i, sizeof(int))) {
+        return true;
+    }
+    return false;
+}
+
+const char* tGetTokenName(Token token) {
+    switch(token) {
+        case T_INT: return "int";
+        case T_ADD: return "+";
+        case T_PRINT: return "print";
+        case T_SEMICOLON: return ";";
+        case T_END: return "end";
+    }
+    return "Unknown";
+}

+ 18 - 0
Tokenizer.h

@@ -0,0 +1,18 @@
+#ifndef TOKENIZER_H
+#define TOKENIZER_H
+
+#include <stdbool.h>
+
+typedef enum Token { T_INT, T_ADD, T_PRINT, T_SEMICOLON, T_END } Token;
+
+bool tTokenize(const char* path);
+const char* tGetError();
+
+void tResetReader();
+Token tPeekToken();
+Token tReadToken();
+bool tReadInt(int* i);
+
+const char* tGetTokenName(Token token);
+
+#endif

+ 9 - 0
Utils.c

@@ -0,0 +1,9 @@
+#include "Utils.h"
+
+bool isLetter(int c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+bool isNumber(int c) {
+    return c >= '0' && c <= '9';
+}

+ 9 - 0
Utils.h

@@ -0,0 +1,9 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <stdbool.h>
+
+bool isLetter(int c);
+bool isNumber(int c);
+
+#endif

+ 0 - 23
code/ISnuviLogger.h

@@ -1,23 +0,0 @@
-#ifndef ISNUVILOGGER_H
-#define ISNUVILOGGER_H
-
-#include <string>
-#include <exception>
-
-#include "code/Script.h"
-
-class ISnuviLogger
-{
-public:
-    virtual ~ISnuviLogger() = default;
-    virtual void print(
-            const std::string* message = nullptr, 
-            const std::exception* ex = nullptr, 
-            const std::string* function = nullptr, 
-            const std::string* scriptname = nullptr, 
-            const Script* sc = nullptr,
-            int line = -1) = 0;
-};
-
-#endif
-

+ 0 - 14
code/Script.cpp

@@ -1,14 +0,0 @@
-#include "Script.h"
-
-Script::Script()
-{
-}
-
-Script::Script(const Script& orig)
-{
-}
-
-Script::~Script()
-{
-}
-

+ 0 - 15
code/Script.h

@@ -1,15 +0,0 @@
-#ifndef SCRIPT_H
-#define SCRIPT_H
-
-class Script
-{
-public:
-    Script();
-    Script(const Script& orig);
-    virtual ~Script();
-private:
-
-};
-
-#endif
-

+ 3 - 3
meson.build

@@ -1,7 +1,7 @@
-project('lonely tiger', 'cpp')
+project('lonely tiger', 'c')
 
-src = ['Main.cpp', 'test/Test.cpp', 'utils/String.cpp', 'tokenizer/Token.cpp', 'tokenizer/TokenStream.cpp', 'tokenizer/Tokenizer.cpp']
+src = ['Main.c', 'Tokenizer.c', 'Compiler.c', 'Utils.c', 'Script.c']
 
 executable('lonely_tiger', 
     sources: src,
-    cpp_args: ['-Wall', '-Wextra', '-pedantic', '-Werror'])
+    c_args: ['-Wall', '-Wextra', '-pedantic', '-Werror'])

+ 10 - 17
test/Test.cpp

@@ -1,37 +1,32 @@
-#include <iostream>
+#include <cstring>
 #include <fstream>
+#include <iostream>
 
-#include <stdio.h>
-#include <stdbool.h>
-#include <stdlib.h>
 #include <dirent.h>
-#include <string.h>
-#include <unistd.h>
 
 #include "test/Test.h"
-#include "utils/String.h"
 #include "tokenizer/TokenStream.h"
 #include "tokenizer/Tokenizer.h"
+#include "utils/String.h"
 
 static unsigned int done = 0;
 static unsigned int tests = 0;
 
-static bool checkPath(const String& path, const char* ending, bool(*f) (const String&, const String&)) {
+static bool checkPath(const String& path, const char* ending, bool (*f)(const String&, const String&)) {
     DIR* dir = opendir(path);
-    if(dir == NULL) {
+    if(dir == nullptr) {
         std::cout << "cannot open '" << path << "': " << strerror(errno) << "\n";
         return true;
     }
-    struct dirent* entry = nullptr;
     while(true) {
-        entry = readdir(dir);
+        struct dirent* entry = readdir(dir);
         if(entry == nullptr) {
             break;
         } else if(strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
             continue;
         } else if(entry->d_type == DT_DIR) {
             checkPath(path + "/" + entry->d_name, ending, f);
-        } else if(entry->d_type == DT_REG && strchr(entry->d_name, '.') == NULL) {
+        } else if(entry->d_type == DT_REG && strchr(entry->d_name, '.') == nullptr) {
             if(f(path + "/" + entry->d_name, path + "/" + entry->d_name + ending)) {
                 return true;
             }
@@ -90,15 +85,13 @@ static bool testTokenizer(const String& input, const String& output) {
     return false;
 }
 
-static void test_testTokenizer(const char* path) {
+static void startTokenizerTests(const char* path) {
     done = 0;
     tests = 0;
     checkPath(path, ".tout", testTokenizer);
-    std::cout << done << " / " << tests << " tokenizer tests succeeded" << std::endl;
+    std::cout << done << " / " << tests << " tokenizer tests succeeded\n";
 }
 
 void Test::start(const char* path) {
-    test_testTokenizer(path);
-    //testCompiler();
-    //testOutput();
+    startTokenizerTests(path);
 }

+ 0 - 24
test/Test.h

@@ -1,24 +0,0 @@
-#include "tokenizer/Token.h"
-
-const char* getTokenName(Token token) {
-    static const char* data[] = {
-        "number", "string", "literal", "label", "true", "false", "null", "(", ")", "[", "]", "{", "}", ";", ",", "++",
-        "--", "!", "~", "*", "/", "%", "+", "-", "+=", "-=", "*=", "/=", "%=", "<<", ">>", "<<=", ">>=", "&=", "^=",
-        "|=", "<", "<=", ">", ">=", "==", "!=", "&", "^", "|", "&&", "||", "=", "if", "else", "else if", "while", "try",
-        "catch", "for", "function", "break", "continue", "return", "end of file"
-    };
-    return data[token];
-}
-
-const char* getTokenEnumName(Token token) {
-    static const char* data[] = {
-        "NUMBER", "STRING", "LITERAL", "LABEL", "TRUE", "FALSE", "NULL_TOKEN", "OPEN_BRACKET", "CLOSE_BRACKET",
-        "OPEN_SQUARE_BRACKET", "CLOSE_SQUARE_BRACKET", "OPEN_CURVED_BRACKET", "CLOSE_CURVED_BRACKET", "SEMICOLON",
-        "COMMA", "INC", "DEC", "INVERT", "BIT_INVERT", "MUL", "DIV", "MOD", "ADD", "SUB", "ADD_SET", "SUB_SET",
-        "MUL_SET", "DIV_SET", "MOD_SET", "LEFT_SHIFT", "RIGHT_SHIFT", "LEFT_SHIFT_SET", "RIGHT_SHIFT_SET",
-        "BIT_AND_SET", "BIT_XOR_SET", "BIT_OR_SET", "LESS", "LESS_EQUAL", "GREATER", "GREATER_EQUAL", "EQUAL",
-        "NOT_EQUAL", "BIT_AND", "BIT_XOR", "BIT_OR", "AND", "OR", "SET", "IF", "ELSE", "ELSEIF", "WHILE", "TRY",
-        "CATCH", "FOR", "FUNCTION", "BREAK", "CONTINUE", "RETURN", "EOF"
-    };
-    return data[token];
-}

+ 0 - 43
tokenizer/Token.h

@@ -1,43 +0,0 @@
-#ifndef TOKENTYPE_H
-#define TOKENTYPE_H
-
-typedef enum Token {
-    NUMBER,
-    STRING,
-    LITERAL,
-    LABEL,
-    TRUE,
-    FALSE,
-    NULL_TOKEN,
-
-    OPEN_BRACKET, CLOSE_BRACKET,
-    OPEN_SQUARE_BRACKET, CLOSE_SQUARE_BRACKET,
-    OPEN_CURVED_BRACKET, CLOSE_CURVED_BRACKET,
-
-    SEMICOLON, COMMA,
-
-    INC, DEC,
-
-    INVERT, BIT_INVERT,
-
-    MUL, DIV, MOD, ADD, SUB,
-    ADD_SET, SUB_SET, MUL_SET, DIV_SET, MOD_SET,
-
-    LEFT_SHIFT, RIGHT_SHIFT,
-    LEFT_SHIFT_SET, RIGHT_SHIFT_SET, BIT_AND_SET, BIT_XOR_SET, BIT_OR_SET,
-
-    LESS, LESS_EQUAL, GREATER, GREATER_EQUAL, EQUAL, NOT_EQUAL,
-    BIT_AND, BIT_XOR, BIT_OR,
-    AND, OR, SET,
-
-    IF, ELSE, ELSEIF, WHILE, TRY,
-    CATCH, FOR, FUNCTION, BREAK,
-    CONTINUE, RETURN,
-
-    EOF_TOKEN
-} Token;
-
-const char* getTokenName(Token token);
-const char* getTokenEnumName(Token token);
-
-#endif

+ 0 - 81
tokenizer/TokenStream.cpp

@@ -1,81 +0,0 @@
-#include <cstring>
-
-#include "tokenizer/TokenStream.h"
-
-TokenStream::TokenStream() : readIndex(0) {
-}
-
-bool TokenStream::hasToken() const {
-    return readIndex < buffer.size();
-}
-
-String TokenStream::nextTokenString() {
-    String s;
-    s += '(';
-    s += nextLine();
-    s += ", ";
-    Token token = nextToken();
-    s += getTokenEnumName(token);
-    if(token == STRING || token == LITERAL || token == LABEL) {
-        s += ", \"";
-        s += nextString();
-        s += '"';
-    }
-    if(token == NUMBER) {
-        s += ", ";
-        s += nextDouble();
-    }
-    s += ')';
-    return s;
-}
-
-void TokenStream::read(void* data, size_t length) {
-    memcpy(data, buffer.data() + readIndex, length);
-    readIndex += length;
-}
-
-Token TokenStream::nextToken() {
-    Token token = EOF_TOKEN;
-    read(&token, sizeof (unsigned int));
-    return token;
-}
-
-unsigned int TokenStream::nextLine() {
-    unsigned int line = 0;
-    read(&line, sizeof (unsigned int));
-    return line;
-}
-
-const char* TokenStream::nextString() {
-    size_t offset = readIndex;
-    readIndex += strlen(buffer.data() + readIndex) + 1;
-    return buffer.data() + offset;
-}
-
-double TokenStream::nextDouble() {
-    double d;
-    read(&d, sizeof (double));
-    return d;
-}
-
-void TokenStream::write(const void* data, size_t length) {
-    const char* chars = static_cast<const char*> (data);
-    for(size_t i = 0; i < length; i++) {
-        buffer.push_back(chars[i]);
-    }
-}
-
-void TokenStream::add(Token token, unsigned int line) {
-    write(&line, sizeof (unsigned int));
-    write(&token, sizeof (Token));
-}
-
-void TokenStream::add(Token token, unsigned int line, double d) {
-    add(token, line);
-    write(&d, sizeof (double));
-}
-
-void TokenStream::add(Token token, unsigned int line, const char* text) {
-    add(token, line);
-    write(text, strlen(text) + 1);
-}

+ 0 - 32
tokenizer/TokenStream.h

@@ -1,32 +0,0 @@
-#ifndef TOKENSTREAM_H
-#define TOKENSTREAM_H
-
-#include <vector> 
-
-#include "tokenizer/Token.h"
-#include "utils/String.h"
-
-class TokenStream {
-public:
-    TokenStream();
-
-    bool hasToken() const;
-    String nextTokenString();
-    Token nextToken();
-    unsigned int nextLine();
-    const char* nextString();
-    double nextDouble();
-
-    void add(Token token, unsigned int line);
-    void add(Token token, unsigned int line, double d);
-    void add(Token token, unsigned int line, const char* text);
-
-private:
-    void read(void* data, size_t length);
-    void write(const void* data, size_t length);
-    
-    size_t readIndex;
-    std::vector<char> buffer;
-};
-
-#endif

+ 0 - 328
tokenizer/Tokenizer.cpp

@@ -1,328 +0,0 @@
-#include <iostream>
-#include <fstream>
-
-#include "tokenizer/Tokenizer.h"
-#include "utils/String.h"
-
-static void onError(const String& message, unsigned int line) {
-    std::cout << message << " Line: " << line << "\n";
-}
-
-static bool isLetter(char32_t c) {
-    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
-}
-
-static bool isDigit(char32_t c) {
-    return c >= '0' && c <= '9';
-}
-
-static bool isValidNameStart(char32_t c) {
-    return isLetter(c) || c == '.' || c == '_';
-}
-
-static bool isValidNamePart(char32_t c) {
-    return isDigit(c) || isValidNameStart(c);
-}
-
-class Data {
-public:
-
-    Data(const char* inputPath, TokenStream& tokens) : tokens(tokens) {
-        stream.open(inputPath);
-    }
-
-    bool hasFileError() {
-        return !stream.good();
-    }
-
-    bool next(char32_t& c) {
-        if(buffer != 0) {
-            c = buffer;
-            buffer = 0;
-            return true;
-        }
-        c = stream.get();
-        return stream.good();
-    }
-
-    bool peek(char32_t& c) {
-        if(buffer != 0 || next(buffer)) {
-            c = buffer;
-            return true;
-        }
-        return false;
-    }
-
-    bool nextIf(char32_t c) {
-        char32_t nextChar;
-        if(peek(nextChar) && c == nextChar) {
-            next(nextChar);
-            return true;
-        }
-        return false;
-    }
-
-    void addToken(Token token) {
-        tokens.add(token, line);
-    }
-
-    void addToken(Token token, const char* text) {
-        tokens.add(token, line, text);
-    }
-
-    Token chooseToken(char c, Token aCharEqual, Token aChar, Token aEqual, Token other) {
-        if(nextIf(c)) {
-            if(nextIf('=')) {
-                return aCharEqual;
-            }
-            return aChar;
-        } else if(nextIf('=')) {
-            return aEqual;
-        }
-        return other;
-    }
-
-    bool handleLiteral(char32_t c, Token token) {
-        String s;
-        s += (char) c;
-        while(true) {
-            if(s.isFull()) {
-                onError("string buffer to small", line);
-                return true;
-            }
-            char32_t data;
-            if(!peek(data) || !isValidNamePart(data)) {
-                break;
-            }
-            s += (char) data;
-            next(data);
-        }
-        if(s == "if") {
-            addToken(Token::IF);
-        } else if(s == "else") {
-            addToken(Token::ELSE);
-        } else if(s == "elseif") {
-            addToken(Token::ELSEIF);
-        } else if(s == "while") {
-            addToken(Token::WHILE);
-        } else if(s == "try") {
-            addToken(Token::TRY);
-        } else if(s == "catch") {
-            addToken(Token::CATCH);
-        } else if(s == "for") {
-            addToken(Token::FOR);
-        } else if(s == "function") {
-            addToken(Token::FUNCTION);
-        } else if(s == "break") {
-            addToken(Token::BREAK);
-        } else if(s == "continue") {
-            addToken(Token::CONTINUE);
-        } else if(s == "return") {
-            addToken(Token::RETURN);
-        } else if(s == "true") {
-            addToken(Token::TRUE);
-        } else if(s == "false") {
-            addToken(Token::FALSE);
-        } else if(s == "null") {
-            addToken(Token::NULL_TOKEN);
-        } else {
-            addToken(token, s);
-        }
-        return false;
-    }
-
-    bool handleNumber(char32_t c) {
-        double number = c - '0';
-        char32_t data;
-        while(peek(data)) {
-            if(!isDigit(data)) {
-                if(data != '.') {
-                    break;
-                }
-                next(data);
-                double factor = 10;
-                while(peek(data) && isDigit(data)) {
-                    number += (data - '0') / factor;
-                    factor *= 10;
-                    next(data);
-                }
-                break;
-            }
-            number = (number * 10) + (data - '0');
-            next(data);
-        }
-        tokens.add(Token::NUMBER, line, number);
-        return false;
-    }
-
-    bool handleString() {
-        String s;
-        unsigned int oldLine = line;
-        while(!s.isFull()) {
-            char32_t data;
-            if(!next(data)) {
-                onError("non closed string literal", oldLine);
-                return true;
-            }
-            if(data == '"') {
-                addToken(Token::STRING, s);
-                return false;
-            }
-            if(data == '\n') {
-                line++;
-            }
-            if(data == '\\') {
-                char32_t escape;
-                if(!next(escape)) {
-                    onError("missing escaped character", line);
-                    return true;
-                }
-                switch(escape) {
-                    case 'n': data = '\n';
-                        break;
-                    case '\\': data = '\\';
-                        break;
-                    case '"': data = '"';
-                        break;
-                    default:
-                        onError("invalid escaped character", line);
-                        return true;
-                }
-            }
-            s += data;
-        }
-        onError("string buffer to small", line);
-        return true;
-    }
-
-    bool handleOneLineComment() {
-        char32_t data;
-        while(next(data) && data != '\n');
-        line++;
-        return false;
-    }
-
-    bool handleMultiLineComment() {
-        char32_t first;
-        char32_t sec = 0;
-        unsigned int oldLine = line;
-        while(true) {
-            first = sec;
-            if(!next(sec)) {
-                onError("unclosed multiline comment", oldLine);
-                return true;
-            }
-            if(first == '*' && sec == '/') {
-                return false;
-            }
-            line += (sec == '\n');
-        }
-    }
-
-    bool handleSlash() {
-        if(nextIf('/')) {
-            return handleOneLineComment();
-        } else if(nextIf('*')) {
-            return handleMultiLineComment();
-        } else if(nextIf('=')) {
-            addToken(Token::DIV_SET);
-            return false;
-        }
-        addToken(Token::DIV);
-        return false;
-    }
-
-    bool handleSpecial(char32_t c) {
-        switch(c) {
-            case ' ':
-            case '\t':
-            case '\r':
-                return false;
-            case '\n': line++;
-                return false;
-            case '"':
-                return handleString();
-            case '(': addToken(Token::OPEN_BRACKET);
-                return false;
-            case ')': addToken(Token::CLOSE_BRACKET);
-                return false;
-            case '[': addToken(Token::OPEN_SQUARE_BRACKET);
-                return false;
-            case ']': addToken(Token::CLOSE_SQUARE_BRACKET);
-                return false;
-            case '{': addToken(Token::OPEN_CURVED_BRACKET);
-                return false;
-            case '}': addToken(Token::CLOSE_CURVED_BRACKET);
-                return false;
-            case '$':
-                return handleLiteral(c, Token::LITERAL);
-            case '@':
-                return handleLiteral(c, Token::LABEL);
-            case ';': addToken(Token::SEMICOLON);
-                return false;
-            case ',': addToken(Token::COMMA);
-                return false;
-            case '~': addToken(Token::BIT_INVERT);
-                return false;
-            case '+': addToken(nextIf('=') ? Token::ADD_SET: (nextIf('+') ? Token::INC: Token::ADD));
-                return false;
-            case '-': addToken(nextIf('=') ? Token::SUB_SET: (nextIf('-') ? Token::DEC: Token::SUB));
-                return false;
-            case '!': addToken(nextIf('=') ? Token::NOT_EQUAL: Token::INVERT);
-                break;
-            case '=': addToken(nextIf('=') ? Token::EQUAL: Token::SET);
-                return false;
-            case '*': addToken(nextIf('=') ? Token::MUL_SET: Token::MUL);
-                return false;
-            case '/':
-                return handleSlash();
-            case '%': addToken(nextIf('=') ? Token::MOD_SET: Token::MOD);
-                return false;
-            case '&': addToken(nextIf('=') ? Token::BIT_AND_SET: (nextIf('&') ? Token::AND: Token::BIT_AND));
-                return false;
-            case '|': addToken(nextIf('=') ? Token::BIT_OR_SET: (nextIf('|') ? Token::OR: Token::BIT_OR));
-                return false;
-            case '^': addToken(nextIf('=') ? Token::BIT_XOR_SET: Token::BIT_XOR);
-                return false;
-            case '<': addToken(chooseToken('<', Token::LEFT_SHIFT_SET, Token::LEFT_SHIFT, Token::LESS_EQUAL, Token::LESS));
-                return false;
-            case '>': addToken(chooseToken('>', Token::RIGHT_SHIFT_SET, Token::RIGHT_SHIFT, Token::GREATER_EQUAL, Token::GREATER));
-                return false;
-        }
-        String s("unknown token '");
-        s += c;
-        s += '\'';
-        onError(s, line);
-        return true;
-    }
-
-    bool handleChar(char32_t c) {
-        if(isValidNameStart(c)) {
-            return handleLiteral(c, Token::LITERAL);
-        } else if(isDigit(c)) {
-            return handleNumber(c);
-        }
-        return handleSpecial(c);
-    }
-
-private:
-    std::basic_ifstream<char32_t> stream;
-    TokenStream& tokens;
-    unsigned int line = 1;
-    char32_t buffer = 0;
-};
-
-bool Tokenizer::tokenize(TokenStream& tokenStream, const char* inputPath) {
-    Data d(inputPath, tokenStream);
-    if(d.hasFileError()) {
-        return true;
-    }
-    char32_t c;
-    while(d.next(c)) {
-        if(d.handleChar(c)) {
-            return true;
-        }
-    }
-    d.addToken(Token::EOF_TOKEN);
-    return false;
-}

+ 0 - 10
tokenizer/Tokenizer.h

@@ -1,10 +0,0 @@
-#ifndef TOKENIZER_H
-#define TOKENIZER_H
-
-#include "tokenizer/TokenStream.h"
-
-namespace Tokenizer {
-    bool tokenize(TokenStream& tokenStream, const char* inputPath);
-}
-
-#endif

+ 0 - 94
utils/String.cpp

@@ -1,94 +0,0 @@
-#include <cstdio>
-#include <cstring>
-
-#include "utils/String.h"
-
-String::String() : String("") {
-}
-
-String::String(const char* str) : usedCapacity(1) {
-    *this += str;
-}
-
-String& String::operator+=(const char* str) {
-    usedCapacity--;
-    size_t start = usedCapacity;
-    while(usedCapacity + 1 < capacity && str[usedCapacity - start] != '\0') {
-        data[usedCapacity] = str[usedCapacity - start];
-        usedCapacity++;
-    }
-    data[usedCapacity++] = '\0';
-    return *this;
-}
-
-String& String::operator+=(char c) {
-    if(usedCapacity + 1 < capacity) {
-        data[usedCapacity - 1] = c;
-        data[usedCapacity] = '\0';
-        usedCapacity++;
-    }
-    return *this;
-}
-
-String& String::operator+=(char32_t c) {
-    if(c <= 0x7F) {
-        *this += (char) c;
-    } else if(c <= 0x7FF) {
-        *this += (char) (0xC0 | ((c >> 6) & 0x1F));
-        *this += (char) (0x80 | ((c >> 0) & 0x3F));
-    } else if(c <= 0xFFFF) {
-        *this += (char) (0xE0 | ((c >> 12) & 0xF));
-        *this += (char) (0x80 | ((c >> 6) & 0x3F));
-        *this += (char) (0x80 | ((c >> 0) & 0x3F));
-    } else {
-        *this += (char) (0xF0 | ((c >> 18) & 0x7));
-        *this += (char) (0x80 | ((c >> 12) & 0x3F));
-        *this += (char) (0x80 | ((c >> 6) & 0x3F));
-        *this += (char) (0x80 | ((c >> 0) & 0x3F));
-    }
-    return *this;
-}
-
-String& String::operator+=(unsigned int i) {
-    usedCapacity += snprintf(data + usedCapacity - 1, capacity - usedCapacity, "%u", i);
-    return *this;
-}
-
-String& String::operator+=(double d) {
-    usedCapacity += snprintf(data + usedCapacity - 1, capacity - usedCapacity, (d == (long) d) ? "%lg.0" : "%lg", d);
-    return *this;
-}
-
-String String::operator+(const char* str) const {
-    String s(this->data);
-    s += str;
-    return s;
-}
-
-String::operator const char*() const {
-    return data;
-}
-
-size_t String::getLength() const {
-    return usedCapacity - 1;
-}
-
-bool String::isFull() const {
-    return usedCapacity >= capacity;
-}
-
-bool String::operator==(const String& other) const {
-    return usedCapacity == other.usedCapacity && *this == other.data;
-}
-
-bool String::operator!=(const String& other) const {
-    return !(*this == other);
-}
-
-bool String::operator==(const char* other) const {
-    return strcmp(data, other) == 0;
-}
-
-bool String::operator!=(const char* other) const {
-    return !(*this == other);
-}

+ 0 - 30
utils/String.h

@@ -1,30 +0,0 @@
-#ifndef STRING_H
-#define STRING_H
-
-#include <cstddef>
-
-class String final {
-public:
-    String();
-    String(const char* str);
-    String& operator+=(const char* str);
-    String& operator+=(char c);
-    String& operator+=(char32_t c);
-    String& operator+=(unsigned int i);
-    String& operator+=(double d);
-    String operator+(const char* str) const;
-    operator const char*() const;
-    size_t getLength() const;
-    bool isFull() const;
-    bool operator==(const String& other) const;
-    bool operator!=(const String& other) const;
-    bool operator==(const char* other) const;
-    bool operator!=(const char* other) const;
-    
-private:
-    static constexpr size_t capacity = 4096;
-    size_t usedCapacity;
-    char data[capacity];
-};
-
-#endif