Browse Source

preprocessor define, ifndef, endif and include work with the new file
tokenizer

Kajetan Johannes Hammerle 2 years ago
parent
commit
f8fe8969c4
5 changed files with 372 additions and 383 deletions
  1. 1 0
      tests/other
  2. 0 0
      tests/other.out
  3. 238 248
      tokenizer/FileTokens.c
  4. 10 18
      tokenizer/FileTokens.h
  5. 123 117
      tokenizer/Tokenizer.c

+ 1 - 0
tests/other

@@ -4,6 +4,7 @@
 #include "another"
 
 void wusi(int a) {
+    test(a);
 }
 
 #ifndef WUSI

+ 0 - 0
tests/other.out


+ 238 - 248
tokenizer/FileTokens.c

@@ -6,84 +6,110 @@
 #include "tokenizer/FileTokens.h"
 #include "utils/SnuviUtils.h"
 
-/*typedef char String[64];
+static Error* error;
+static int line = 0;
+
+typedef char String[64];
 #define STRING_LENGTH ((int)sizeof(String))
 
-#define END_IF "#endif"
-#define END_IF_LENGTH ((int)sizeof(END_IF) - 1)*/
+typedef struct {
+    String name;
+} Define;
 
-static Error* error;
+#define DEFINES 256
+static Define defines[DEFINES];
+static int defineIndex = 0;
 
-static const char* ftGetPath(const FileTokens* ft, int line) {
-    if(ft->capacity < (2 + (int)sizeof(int)) || ft->data[0] != FT_PATH) {
+static const char* ftGetPath(const FileTokens* ft) {
+    if(ft->length < 1 || ft->tokens[0].type != FT_PATH) {
         eInitError(error, line, "path not set");
         return "";
     }
-    return (const char*)(ft->data + 1 + sizeof(int));
+    return ft->tokens[0].literal;
 }
 
-static void ftSystemError(const char* msg, int line, const FileTokens* ft) {
-    eInitError(error, line, "%s '%s': %s", msg, ftGetPath(ft, line),
-               strerror(errno));
+static void ftSystemError(const char* msg, const FileTokens* ft) {
+    eInitError(error, line, "%s '%s': %s", msg, ftGetPath(ft), strerror(errno));
 }
 
-static void ftAddData(FileTokens* ft, const void* data, int dataSize) {
-    if(ft->writeIndex + dataSize > ft->capacity) {
-        int newSize = ft->capacity;
-        while(ft->writeIndex + dataSize > newSize) {
-            newSize += 1024;
-        }
-        ft->data = realloc(ft->data, newSize);
-        ft->capacity = newSize;
+static void ftToSpace(FileToken* t) {
+    free(t->literal);
+    if(t->type != FT_NEWLINE) {
+        t->type = FT_SPACE;
     }
-    memcpy(ft->data + ft->writeIndex, data, dataSize);
-    ft->writeIndex += dataSize;
+    t->literal = NULL;
+    t->single = 0;
 }
 
-static void ftAddToken(FileTokens* ft, FileToken t) {
-    unsigned char c = t;
-    ftAddData(ft, &c, 1);
+static FileToken* ftAddToken(FileTokens* ft, FileTokenType type) {
+    if(ft->length >= ft->capacity) {
+        ft->capacity = (ft->capacity * 5) / 4 + 8;
+        ft->tokens = realloc(ft->tokens, ft->capacity * sizeof(FileToken));
+    }
+    FileToken* t = ft->tokens + ft->length++;
+    t->type = type;
+    t->literal = NULL;
+    t->single = 0;
+    return t;
 }
 
-static void ftAddInt(FileTokens* ft, int i) {
-    ftAddData(ft, &i, sizeof(int));
+static void ftAddPath(FileTokens* ft, const char* path) {
+    ftAddToken(ft, FT_PATH)->literal = strdup(path);
 }
 
-static void ftAddString(FileTokens* ft, const void* s, int length) {
-    ftAddInt(ft, length + 1);
-    ftAddData(ft, s, length);
-    const char c = '\0';
-    ftAddData(ft, &c, 1);
+static void ftAddEndPath(FileTokens* ft) {
+    ftAddToken(ft, FT_END_PATH);
 }
 
-static void ftAddTerminatedString(FileTokens* ft, const char* s) {
-    ftAddString(ft, s, strlen(s));
+static void ftAddNewline(FileTokens* ft) {
+    ftAddToken(ft, FT_NEWLINE);
 }
 
-/*typedef struct {
-    String name;
-} Define;
+static void ftAddLiteral(FileTokens* ft, const char* path, int length) {
+    ftAddToken(ft, FT_LITERAL)->literal = strndup(path, length);
+}
 
-#define DEFINES 256
-static Define defines[DEFINES];
-static int defineIndex = 0;
+static void ftAddSingle(FileTokens* ft, int single) {
+    ftAddToken(ft, FT_SINGLE)->single = single;
+}
 
-static bool frHasError() {
-    return error->message[0] != '\0';
+static void ftAddSpace(FileTokens* ft) {
+    ftAddToken(ft, FT_SPACE);
 }
 
-static bool frStringCompare(const String a, const char* b) {
+static void ftPrint(FileTokens* ft) {
+    for(int i = 0; i < ft->length; i++) {
+        switch(ft->tokens[i].type) {
+            case FT_PATH:
+                printf("\033[0;34mPath: %s\n", ft->tokens[i].literal);
+                break;
+            case FT_END_PATH: puts("\033[0;34mEnd Path"); break;
+            case FT_NEWLINE: putchar('\n'); break;
+            case FT_LITERAL:
+                printf("\033[0;33m%s", ft->tokens[i].literal);
+                break;
+            case FT_SINGLE:
+                printf("\033[0;31m%c", (char)ft->tokens[i].single);
+                break;
+            case FT_SPACE: putchar(' '); break;
+        }
+    }
+    printf("\033[0m");
+}
+
+static bool ftHasError() {
+    return eHasError(error);
+}
+
+static bool ftStringCompare(const char* a, const char* b) {
     return strcmp(a, b) == 0;
-}*/
+}
 
 static void ftInitFileTokens(FileTokens* ft, const char* path) {
-    ft->data = NULL;
+    ft->tokens = NULL;
     ft->capacity = 0;
-    ft->writeIndex = 0;
-    ft->readIndex = 0;
-
-    ftAddToken(ft, FT_PATH);
-    ftAddTerminatedString(ft, path);
+    ft->length = 0;
+    ftAddPath(ft, path);
 }
 
 static void ftTokenize(FileTokens* ft, unsigned char* data) {
@@ -98,8 +124,7 @@ static void ftTokenize(FileTokens* ft, unsigned char* data) {
             continue;
         }
         if(oldIndex < index) {
-            ftAddToken(ft, FT_LITERAL);
-            ftAddString(ft, data + oldIndex, index - oldIndex);
+            ftAddLiteral(ft, (const char*)(data + oldIndex), index - oldIndex);
         }
         if(c == '"') {
             inString = !inString;
@@ -108,14 +133,14 @@ static void ftTokenize(FileTokens* ft, unsigned char* data) {
             break;
         } else if(c == '\n') {
             line++;
-            ftAddToken(ft, FT_NEWLINE);
+            ftAddNewline(ft);
         } else if(c == ' ') {
-            ftAddToken(ft, FT_SPACE);
+            ftAddSpace(ft);
         } else if(c == '/' && data[index + 1] == '/') {
             while(data[index] != '\0' && data[index] != '\n') {
                 index++;
             }
-            ftAddToken(ft, FT_NEWLINE);
+            ftAddNewline(ft);
         } else if(c == '/' && data[index + 1] == '*') {
             int startLine = line;
             index += 2;
@@ -124,7 +149,7 @@ static void ftTokenize(FileTokens* ft, unsigned char* data) {
                     eInitError(error, startLine, "unclosed multiline comment");
                     return;
                 } else if(data[index] == '\n') {
-                    ftAddToken(ft, FT_NEWLINE);
+                    ftAddNewline(ft);
                     line++;
                 } else if(data[index] == '*' && data[index + 1] == '/') {
                     index++;
@@ -133,22 +158,21 @@ static void ftTokenize(FileTokens* ft, unsigned char* data) {
                 index++;
             }
         } else {
-            ftAddToken(ft, FT_SINGLE);
-            ftAddData(ft, &c, 1);
+            ftAddSingle(ft, c);
         }
         index++;
         oldIndex = index;
     }
 }
 
-static void ftReadFull(FILE* file, FileTokens* ft, int line) {
+static void ftReadFull(FILE* file, FileTokens* ft) {
     if(fseek(file, 0, SEEK_END)) {
-        ftSystemError("cannot seek end of file", line, ft);
+        ftSystemError("cannot seek end of file", ft);
         return;
     }
     long length = ftell(file);
     if(length < 0) {
-        ftSystemError("cannot tell end of file", line, ft);
+        ftSystemError("cannot tell end of file", ft);
         return;
     }
     rewind(file);
@@ -156,275 +180,241 @@ static void ftReadFull(FILE* file, FileTokens* ft, int line) {
     unsigned char* data = malloc(length + 1);
     int readValues = fread(data, 1, length, file);
     if(readValues != length) {
-        ftSystemError("cannot read file", line, ft);
+        ftSystemError("cannot read file", ft);
         free(data);
         return;
     }
     data[length] = '\0';
     ftTokenize(ft, data);
+    ftAddEndPath(ft);
     free(data);
 }
 
-static void ftReadFullFile(FileTokens* ft, int line) {
-    FILE* file = fopen(ftGetPath(ft, line), "r");
+static void ftReadFullFile(FileTokens* ft) {
+    FILE* file = fopen(ftGetPath(ft), "r");
     if(file == NULL) {
-        ftSystemError("cannot open file", 1, ft);
+        ftSystemError("cannot open file", ft);
         return;
     }
-    ftReadFull(file, ft, line);
+    ftReadFull(file, ft);
     if(fclose(file) < 0) {
-        ftSystemError("cannot close file", 1, ft);
+        ftSystemError("cannot close file", ft);
     }
 }
 
-/*static int frReadUntil(int from, char end, String s, FileReader* fr) {
-    int index = 0;
-    while(from < fr->length && index < STRING_LENGTH - 1 &&
-          fr->data[from] != end) {
-        s[index++] = fr->data[from];
-        from++;
+static void ftReplace(int from, int to, FileTokens* ft,
+                      FileTokens* replacement) {
+    int newCapacity = ft->capacity + replacement->capacity;
+    FileToken* newTokens = malloc(newCapacity * sizeof(FileToken));
+
+    // these will be overwritten
+    for(int i = from; i < to; i++) {
+        free(ft->tokens[i].literal);
+        ft->tokens[i].literal = NULL;
     }
-    s[index] = '\0';
-    return index;
-}
 
-static int frReadUntilOrLine(int from, char end, String s, FileReader* fr) {
-    int index = 0;
-    while(from < fr->length && index < STRING_LENGTH - 1 &&
-          fr->data[from] != end && fr->data[from] != '\n') {
-        s[index++] = fr->data[from];
-        from++;
+    memcpy(newTokens, ft->tokens, from * sizeof(FileToken));
+    memcpy(newTokens + from, replacement->tokens,
+           replacement->length * sizeof(FileToken));
+    memcpy(newTokens + from + replacement->length, ft->tokens + to,
+           (ft->length - to) * sizeof(FileToken));
+
+    free(ft->tokens);
+    ft->tokens = newTokens;
+    ft->capacity = newCapacity;
+    ft->length = from + replacement->length + ft->length - to;
+
+    // ownership moved
+    for(int i = 0; i < replacement->length; i++) {
+        replacement->tokens[i].literal = NULL;
     }
-    s[index] = '\0';
-    return index;
 }
 
-static void frMakePath(String fullPath, const char* parent, String name) {
-    int i = 0;
-    int lastSlash = 0;
-    while(i < STRING_LENGTH - 1 && parent[i] != '\0') {
-        fullPath[i] = parent[i];
-        lastSlash = parent[i] == '/' ? i : lastSlash;
-        i++;
-    }
-    i = lastSlash + 1;
-    int nameI = 0;
-    while(i < STRING_LENGTH - 1 && name[nameI] != '\0') {
-        fullPath[i] = name[nameI];
-        i++;
-        nameI++;
-    }
-    fullPath[i] = '\0';
+static void ftSkipSpaces(int* index, FileTokens* ft) {
+    while(*index < ft->length) {
+        FileToken* t = ft->tokens + *index;
+        if(t->type != FT_SPACE) {
+            return;
+        }
+        (*index)++;
+    }
 }
 
-static void frReplace(int from, int to, FileReader* fr,
-                      FileReader* replacement) {
-    int replaceLength = replacement->length - 1;
-    replaceLength = replaceLength < 0 ? 0 : replaceLength;
-    int newLength = fr->length + replaceLength - (to - from);
-    unsigned char* newData = malloc(newLength + 1);
-    memcpy(newData, fr->data, from);
-    memcpy(newData + from, replacement->data, replaceLength);
-    memcpy(newData + from + replaceLength, fr->data + to, fr->length - to);
-    newData[newLength] = '\0';
-
-    free(fr->data);
-    fr->data = newData;
-    fr->length = newLength;
-}
+const char* INCLUDE_ERROR = "include path must be a string";
 
-static int skipSpaces(int index, FileReader* fr) {
-    while(index < fr->length && fr->data[index] == ' ') {
-        index++;
+static bool ftIncludeDoubleQuote(int* index, FileTokens* ft) {
+    if(*index >= ft->length || ft->tokens[*index].type != FT_SINGLE ||
+       ft->tokens[*index].single != '"') {
+        eInitError(error, line, INCLUDE_ERROR);
+        return true;
     }
-    return index;
+    (*index)++;
+    return false;
 }
 
-static void frHandleInclude(int from, int length, FileReader* fr) {
-    int index = from + length;
-    index = skipSpaces(index, fr);
-    if(index >= fr->length || fr->data[index] != '"') {
-        eInitError(error, fr->line, "expected '\"' and not '%c' in '%s'",
-                   fr->data[index], fr->path);
+static void ftHandleInclude(int* index, int start, FileTokens* ft) {
+    ftSkipSpaces(index, ft);
+    if(ftIncludeDoubleQuote(index, ft)) {
+        return;
+    }
+    if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
+        eInitError(error, line, INCLUDE_ERROR);
         return;
     }
-    index++;
-    String s;
-    int pathLength = frReadUntil(index, '"', s, fr) + 1;
+    const char* path = ft->tokens[(*index)++].literal;
+    if(ftIncludeDoubleQuote(index, ft)) {
+        return;
+    }
+    const char* parentPath = ftGetPath(ft);
 
-    String fullPath;
-    frMakePath(fullPath, fr->path, s);
+    int afterLastSlash = 0;
+    for(int i = 0; parentPath[i] != '\0'; i++) {
+        afterLastSlash = parentPath[i] == '/' ? i + 1 : afterLastSlash;
+    }
 
-    FileReader include;
+    int pathLength = strlen(path) + 1;
+    char* fullPath = malloc(afterLastSlash + pathLength);
+    memcpy(fullPath, parentPath, afterLastSlash);
+    memcpy(fullPath + afterLastSlash, path, pathLength);
+
+    FileTokens include;
     ftInitFileTokens(&include, fullPath);
+    free(fullPath);
+
     ftReadFullFile(&include);
-    if(frHasError()) {
-        frDelete(&include);
-        return;
+    if(!ftHasError()) {
+        ftReplace(start, *index, ft, &include);
+        *index = start;
     }
-    frReplace(from, index + pathLength, fr, &include);
-    frDelete(&include);
-}
-
-static int frReadWord(String s, int from, FileReader* fr) {
-    int index = skipSpaces(from, fr);
-    int wordLength = frReadUntilOrLine(index, ' ', s, fr);
-    return index + wordLength;
+    ftDelete(&include);
 }
 
-static bool frIsDefined(String s) {
+static bool ftIsDefined(const char* s) {
     for(int i = 0; i < defineIndex; i++) {
-        if(frStringCompare(defines[i].name, s)) {
+        if(ftStringCompare(defines[i].name, s)) {
             return true;
         }
     }
     return false;
 }
 
-static bool frIsMatching(int from, FileReader* fr, const char* s) {
-    int i = 0;
-    while(true) {
-        if(s[i] == '\0') {
-            return true;
-        } else if(from >= fr->length || fr->data[from] != s[i]) {
-            return false;
-        }
-        from++;
-        i++;
-    }
-}
-
-static int frFindEndIf(int from, FileReader* fr) {
+static int ftFindEndIf(int from, FileTokens* ft) {
     int layer = 0;
-    for(int i = from; i < fr->length; i++) {
-        if(frIsMatching(i, fr, END_IF)) {
+    for(int i = from; i < ft->length - 1; i++) {
+        if(ft->tokens[i].type != FT_SINGLE || ft->tokens[i].single != '#') {
+            continue;
+        } else if(ft->tokens[i + 1].type != FT_LITERAL) {
+            continue;
+        }
+        if(ftStringCompare(ft->tokens[i + 1].literal, "ifndef")) {
+            layer++;
+        } else if(ftStringCompare(ft->tokens[i + 1].literal, "endif")) {
             if(layer == 0) {
                 return i;
             }
             layer--;
-        } else if(frIsMatching(i, fr, "#ifndef")) {
-            layer++;
         }
     }
     return -1;
 }
 
-static void frHandleIfNotDefined(int from, int length, FileReader* fr) {
-    String s;
-    int end = frReadWord(s, from + length, fr);
-    int endIf = frFindEndIf(end, fr);
+static void ftHandleIfNotDefined(int* index, int start, FileTokens* ft) {
+    (void)start;
+    ftSkipSpaces(index, ft);
+    if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
+        eInitError(error, line, "ifndef expects a literal");
+        return;
+    }
+    const char* check = ft->tokens[(*index)++].literal;
+    int endIf = ftFindEndIf(*index, ft);
     if(endIf == -1) {
-        eInitError(error, fr->line, "cannot find #endif in '%s'", fr->path);
+        eInitError(error, line, "cannot find #endif");
         return;
     }
-    if(frIsDefined(s)) {
-        memset(fr->data + from, ' ', endIf - from + END_IF_LENGTH);
+    if(ftIsDefined(check)) {
+        for(int i = start; i < endIf + 2; i++) {
+            ftToSpace(ft->tokens + i);
+        }
     } else {
-        memset(fr->data + from, ' ', end - from);
-        memset(fr->data + endIf, ' ', END_IF_LENGTH);
+        ftToSpace(ft->tokens + start);        // #
+        ftToSpace(ft->tokens + start + 1);    // ifndef
+        ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
+
+        ftToSpace(ft->tokens + endIf);     // #
+        ftToSpace(ft->tokens + endIf + 1); // endif
     }
 }
 
-static void frHandleDefine(int from, int length, FileReader* fr) {
+static void frHandleDefine(int* index, int start, FileTokens* ft) {
     if(defineIndex >= DEFINES) {
-        eInitError(error, fr->line, "too much defines in '%s'", fr->path);
+        eInitError(error, line, "too much defines");
         return;
     }
-    String s;
-    int end = frReadWord(s, from + length, fr);
-    if(frIsDefined(s)) {
-        puts("already defined");
+    ftSkipSpaces(index, ft);
+    if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
+        eInitError(error, line, "define expects a literal");
         return;
     }
-    strncpy(defines[defineIndex].name, s, STRING_LENGTH);
+    const char* define = ft->tokens[(*index)++].literal;
+    if(ftIsDefined(define)) {
+        return;
+    }
+    strncpy(defines[defineIndex].name, define, STRING_LENGTH);
     defineIndex++;
-    printf("defined '%s'\n", s);
-    memset(fr->data + from, ' ', end - from);
+
+    ftToSpace(ft->tokens + start);        // #
+    ftToSpace(ft->tokens + start + 1);    // define
+    ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
 }
 
-static void frParseInstruction(int from, FileReader* fr) {
-    String s;
-    int length = frReadUntil(from, ' ', s, fr);
-    if(frStringCompare(s, "#include")) {
-        frHandleInclude(from, length, fr);
-    } else if(frStringCompare(s, "#ifndef")) {
-        frHandleIfNotDefined(from, length, fr);
-    } else if(frStringCompare(s, "#define")) {
-        frHandleDefine(from, length, fr);
+static void ftParseInstruction(int* index, FileTokens* ft) {
+    int start = *index;
+    (*index)++;
+    if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
+        eInitError(error, line, "expected literal after #");
+        return;
+    }
+    const char* name = ft->tokens[(*index)++].literal;
+    if(ftStringCompare(name, "include")) {
+        ftHandleInclude(index, start, ft);
+    } else if(ftStringCompare(name, "ifndef")) {
+        ftHandleIfNotDefined(index, start, ft);
+    } else if(ftStringCompare(name, "define")) {
+        frHandleDefine(index, start, ft);
+    } else if(ftStringCompare(name, "endif")) {
+        eInitError(error, line, "endif without if");
     } else {
-        eInitError(error, fr->line, "unknown preprocessor token '%s' in '%s'",
-                   s, fr->path);
+        eInitError(error, line, "unknown preprocessor literal '%s'", name);
     }
 }
 
-static void frSearchInstruction(FileReader* fr) {
-    if(fr->data == NULL) {
-        return;
-    }
-    for(int i = 0; i < fr->length && !frHasError(); i++) {
-        unsigned char c = fr->data[i];
-        if(c == '#') {
-            frParseInstruction(i, fr);
-            i--;
-        } else if(c == '\n') {
-            fr->line++;
+static void ftSearchInstruction(FileTokens* ft) {
+    for(int i = 0; i < ft->length && !ftHasError(); i++) {
+        if(ft->tokens[i].type == FT_SINGLE && ft->tokens[i].single == '#') {
+            ftParseInstruction(&i, ft);
         }
     }
-}*/
+}
 
 void ftInit(const char* path, FileTokens* ft, Error* e) {
-    // defineIndex = 0;
+    defineIndex = 0;
     error = e;
     eInitSuccess(error);
     ftInitFileTokens(ft, path);
-    ftReadFullFile(ft, 1);
-    // frSearchInstruction(fr);
-    // if(fr->data != NULL) {
-    //     printf((char*)fr->data);
-    // }
-}
-
-void ftDelete(FileTokens* ft) {
-    free(ft->data);
-    ft->data = NULL;
-    ft->capacity = 0;
-    ft->writeIndex = 0;
-    ft->readIndex = 0;
-}
-
-FileToken ftRead(FileTokens* ft) {
-    if(ft->readIndex < ft->writeIndex) {
-        return ft->data[ft->readIndex++];
-    }
-    return FT_END;
-}
-
-bool ftReadSingleIf(FileTokens* ft, char c) {
-    if(ft->readIndex + 1 >= ft->writeIndex) {
-        return false;
-    } else if(ft->data[ft->readIndex] != FT_SINGLE) {
-        return false;
-    } else if(ft->data[ft->readIndex + 1] != c) {
-        return false;
-    }
-    ft->readIndex += 2;
-    return true;
-}
-
-char ftReadSingle(FileTokens* ft) {
-    if(ft->readIndex < ft->writeIndex) {
-        return ft->data[ft->readIndex++];
+    ftReadFullFile(ft);
+    if(!ftHasError()) {
+        ftSearchInstruction(ft);
     }
-    return -1;
+    (void)ftPrint;
+    // ftPrint(ft);
 }
 
-const char* ftReadString(FileTokens* ft, int* length) {
-    *length = 0;
-    if(ft->readIndex + ((int)sizeof(int)) + 1 >= ft->writeIndex) {
-        return NULL;
+void ftDelete(FileTokens* ft) {
+    for(int i = 0; i < ft->length; i++) {
+        free(ft->tokens[i].literal);
     }
-    memcpy(length, ft->data + ft->readIndex, sizeof(int));
-    const char* s = (const char*)(ft->data + ft->readIndex + sizeof(int));
-    ft->readIndex += *length + sizeof(int);
-    return s;
+    free(ft->tokens);
+    ft->tokens = NULL;
+    ft->capacity = 0;
+    ft->length = 0;
 }

+ 10 - 18
tokenizer/FileTokens.h

@@ -9,36 +9,28 @@ extern "C" {
 
 typedef enum {
     FT_PATH,
+    FT_END_PATH,
     FT_NEWLINE,
     FT_LITERAL,
     FT_SINGLE,
-    FT_SPACE,
-    FT_END
-} FileToken;
+    FT_SPACE
+} FileTokenType;
 
 typedef struct {
-    unsigned char* data;
-    int capacity;
-    int writeIndex;
-    int readIndex;
-} FileTokens;
+    FileTokenType type;
+    int single;
+    char* literal;
+} FileToken;
 
 typedef struct {
-    unsigned char* data;
-    const char* path;
+    FileToken* tokens;
+    int capacity;
     int length;
-    int readIndex;
-    int line;
-} FileReader;
+} FileTokens;
 
 void ftInit(const char* path, FileTokens* ft, Error* e);
 void ftDelete(FileTokens* ft);
 
-FileToken ftRead(FileTokens* ft);
-bool ftReadSingleIf(FileTokens* ft, char c);
-char ftReadSingle(FileTokens* ft);
-const char* ftReadString(FileTokens* ft, int* length);
-
 #ifdef __cplusplus
 }
 #endif

+ 123 - 117
tokenizer/Tokenizer.c

@@ -14,6 +14,7 @@
 #define ERROR_LENGTH 256
 
 static FileTokens fileTokens;
+static int fileTokenIndex;
 static jmp_buf errorJump;
 static Error* error;
 
@@ -53,7 +54,24 @@ static bool tReadTokens(void* dest, int length) {
     return false;
 }
 
-static void tParseLiteral(const char* buffer, int length) {
+static FileToken* tNextToken() {
+    if(fileTokenIndex >= fileTokens.length) {
+        tError("unexpected end of file");
+    }
+    return fileTokens.tokens + fileTokenIndex++;
+}
+
+static bool tReadSingleIf(char c) {
+    if(fileTokenIndex >= fileTokens.length ||
+       fileTokens.tokens[fileTokenIndex].type != FT_SINGLE ||
+       fileTokens.tokens[fileTokenIndex].single != c) {
+        return false;
+    }
+    fileTokenIndex++;
+    return true;
+}
+
+static void tParseLiteral(const char* buffer) {
     if(strcmp(buffer, "return") == 0) {
         tAddToken(T_RETURN);
     } else if(strcmp(buffer, "if") == 0) {
@@ -90,21 +108,13 @@ static void tParseLiteral(const char* buffer, int length) {
         tAdd(&i, sizeof(int32));
     } else {
         tAddToken(T_LITERAL);
-        tAdd(buffer, length);
-    }
-}
-
-static const char* tReadFileString(int* length) {
-    const char* buffer = ftReadString(&fileTokens, length);
-    if(buffer == NULL) {
-        tError("cannot read literal string");
+        tAdd(buffer, strlen(buffer) + 1);
     }
-    return buffer;
 }
 
-static long long tParseInt(const char* s, int length) {
+static long long tParseInt(const char* s) {
     long long l = 0;
-    for(int i = 0; i < length; i++) {
+    for(int i = 0; s[i] != '\0'; i++) {
         if(l > (LLONG_MAX / 10)) {
             tError("invalid number on line %d", line);
         }
@@ -121,16 +131,14 @@ static long long tParseInt(const char* s, int length) {
     return l;
 }
 
-static void tParseNumber(const char* buffer, int length) {
-    long long l = tParseInt(buffer, length);
-    if(ftReadSingleIf(&fileTokens, '.')) {
-        FileToken t = ftRead(&fileTokens);
-        if(t != FT_LITERAL) {
+static void tParseNumber(const char* buffer) {
+    long long l = tParseInt(buffer);
+    if(tReadSingleIf('.')) {
+        FileToken* t = tNextToken();
+        if(t->type != FT_LITERAL) {
             tError("expected literal after comma of number on line %d", line);
         }
-        int commaLength = 0;
-        const char* commaBuffer = tReadFileString(&commaLength);
-        long long comma = tParseInt(commaBuffer, commaLength - 1);
+        long long comma = tParseInt(t->literal);
 
         float f = comma;
         while(f > 1.0f) {
@@ -150,16 +158,15 @@ static void tParseNumber(const char* buffer, int length) {
 }
 
 static int32 tNextUnicodePart() {
-    FileToken t = ftRead(&fileTokens);
-    if(t == FT_SINGLE) {
-        return ftReadSingle(&fileTokens);
-    } else if(t == FT_LITERAL) {
-        int length = 0;
-        const char* s = tReadFileString(&length);
-        if(length != 2) {
+    FileToken* t = tNextToken();
+    if(t->type == FT_SINGLE) {
+        return t->single;
+    } else if(t->type == FT_LITERAL) {
+        int length = strlen(t->literal);
+        if(length != 1) {
             tError("unicode literal has wrong length %d", length);
         }
-        return s[0];
+        return t->literal[0];
     } else {
         tError("cannot read next unicode character part on line %d", line);
         return 0;
@@ -226,22 +233,20 @@ static int32 tStringUnicode(const char* s, int* index, int length) {
 static void tAddString() {
     tAddToken(T_TEXT);
 
-    FileToken t = ftRead(&fileTokens);
-    if(t == FT_SINGLE) {
-        int32 c = ftReadSingle(&fileTokens);
-        if(c != '"') {
-            tError("unexpected single %d", c);
+    FileToken* t = tNextToken();
+    if(t->type == FT_SINGLE) {
+        if(t->single != '"') {
+            tError("unexpected single '%d'", t->single);
         }
-    } else if(t == FT_LITERAL) {
-        int length = 0;
-        const char* s = tReadFileString(&length);
-        length--;
+    } else if(t->type == FT_LITERAL) {
+        int length = strlen(t->literal);
+        const char* s = t->literal;
         int index = 0;
         while(index < length) {
             int32 c = tStringUnicode(s, &index, length);
             tAdd(&c, sizeof(int32));
         }
-        if(!ftReadSingleIf(&fileTokens, '"')) {
+        if(!tReadSingleIf('"')) {
             tError("unclosed string");
         }
     } else {
@@ -253,31 +258,29 @@ static void tAddString() {
 }
 
 static void tAddUnicode() {
-    FileToken t = ftRead(&fileTokens);
-    if(t == FT_LITERAL) {
-        int length = 0;
-        const char* s = tReadFileString(&length);
-        if(length != 2) {
+    FileToken* t = tNextToken();
+    if(t->type == FT_LITERAL) {
+        int length = strlen(t->literal);
+        if(length != 1) {
             tError("invalid character on line %d", line);
         }
-        int32 c = s[0];
+        int32 c = t->literal[0];
         tAddToken(T_INT_VALUE);
         tAdd(&c, sizeof(int32));
-    } else if(t == FT_SINGLE) {
-        int32 c = ftReadSingle(&fileTokens);
-        c = tUnicode(c);
+    } else if(t->type == FT_SINGLE) {
+        int32 c = tUnicode(t->single);
         tAddToken(T_INT_VALUE);
         tAdd(&c, sizeof(int32));
     } else {
         tError("invalid character on line %d", line);
     }
-    if(!ftReadSingleIf(&fileTokens, '\'')) {
+    if(!tReadSingleIf('\'')) {
         tError("expecting unicode end");
     }
 }
 
 static void tAddToken2(Token te, Token t) {
-    if(ftReadSingleIf(&fileTokens, '=')) {
+    if(tReadSingleIf('=')) {
         tAddToken(te);
     } else {
         tAddToken(t);
@@ -285,7 +288,7 @@ static void tAddToken2(Token te, Token t) {
 }
 
 static void tAddToken3(int c, Token tc, Token te, Token t) {
-    if(ftReadSingleIf(&fileTokens, c)) {
+    if(tReadSingleIf(c)) {
         tAddToken(tc);
     } else {
         tAddToken2(te, t);
@@ -293,75 +296,81 @@ static void tAddToken3(int c, Token tc, Token te, Token t) {
 }
 
 static void tAddToken4(int c, Token tce, Token tc, Token te, Token t) {
-    if(ftReadSingleIf(&fileTokens, c)) {
+    if(tReadSingleIf(c)) {
         tAddToken2(tce, tc);
     } else {
         tAddToken2(te, t);
     }
 }
 
-static void tParseToken(FileToken t) {
-    if(t == FT_PATH) {
-        int length = 0;
-        const char* s = ftReadString(&fileTokens, &length);
-        (void)s;
-        // TODO: do something useful with the path
-        return;
-    } else if(t == FT_NEWLINE) {
-        line++;
-        return;
-    } else if(t == FT_LITERAL) {
-        int length = 0;
-        const char* buffer = tReadFileString(&length);
-        if(isLetter(buffer[0])) {
-            tParseLiteral(buffer, length);
-        } else if(isNumber(buffer[0])) {
-            tParseNumber(buffer, length - 1);
-        } else {
-            tError("invalid literal string '%s'", buffer);
-        }
-        return;
-    } else if(t == FT_END || t == FT_SPACE) {
-        return;
-    }
-    char c = ftReadSingle(&fileTokens);
-    switch(c) {
-        case ' ': return;
-        case '+': tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD); return;
-        case '-': tAddToken3('-', T_DECREMENT, T_SUB_SET, T_SUB); return;
-        case '*': tAddToken2(T_MUL_SET, T_MUL); return;
-        case '/': tAddToken2(T_DIV_SET, T_DIV); return;
-        case '%': tAddToken2(T_MOD_SET, T_MOD); return;
-        case '<':
-            tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL,
-                       T_LESS);
+static void tParseToken(FileToken* t) {
+    switch(t->type) {
+        case FT_PATH:
+            // TODO: do something useful with the path
             return;
-        case '>':
-            tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT, T_GREATER_EQUAL,
-                       T_GREATER);
+        case FT_END_PATH:
+            // TODO: do something useful
             return;
-        case '=': tAddToken2(T_EQUAL, T_SET); return;
-        case '!': tAddToken2(T_NOT_EQUAL, T_NOT); return;
-        case '&': tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND); return;
-        case '|': tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR); return;
-        case '~': tAddToken(T_BIT_NOT); return;
-        case '^': tAddToken2(T_BIT_XOR_SET, T_BIT_XOR); return;
-        case ',': tAddToken(T_COMMA); return;
-        case ';': tAddToken(T_SEMICOLON); return;
-        case '(': tAddToken(T_OPEN_BRACKET); return;
-        case ')': tAddToken(T_CLOSE_BRACKET); return;
-        case '{': tAddToken(T_OPEN_CURVED_BRACKET); return;
-        case '}': tAddToken(T_CLOSE_CURVED_BRACKET); return;
-        case '"': tAddString(); return;
-        case '\'': tAddUnicode(); return;
-        case '.': tAddToken(T_POINT); return;
-        case '[': tAddToken(T_OPEN_SQUARE_BRACKET); return;
-        case ']': tAddToken(T_CLOSE_SQUARE_BRACKET); return;
-    }
-    if(isprint(c)) {
-        tError("unknown character on line %d: %c", line, c);
-    } else {
-        tError("unknown character on line %d: %d", line, (int)c);
+        case FT_NEWLINE: line++; return;
+        case FT_LITERAL: {
+            const char* buffer = t->literal;
+            if(isLetter(buffer[0])) {
+                tParseLiteral(buffer);
+            } else if(isNumber(buffer[0])) {
+                tParseNumber(buffer);
+            } else {
+                tError("invalid literal string '%s'", buffer);
+            }
+            return;
+        }
+        case FT_SPACE: return;
+        case FT_SINGLE: {
+            char c = t->single;
+            switch(c) {
+                case ' ': return;
+                case '+':
+                    tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD);
+                    return;
+                case '-':
+                    tAddToken3('-', T_DECREMENT, T_SUB_SET, T_SUB);
+                    return;
+                case '*': tAddToken2(T_MUL_SET, T_MUL); return;
+                case '/': tAddToken2(T_DIV_SET, T_DIV); return;
+                case '%': tAddToken2(T_MOD_SET, T_MOD); return;
+                case '<':
+                    tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT,
+                               T_LESS_EQUAL, T_LESS);
+                    return;
+                case '>':
+                    tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT,
+                               T_GREATER_EQUAL, T_GREATER);
+                    return;
+                case '=': tAddToken2(T_EQUAL, T_SET); return;
+                case '!': tAddToken2(T_NOT_EQUAL, T_NOT); return;
+                case '&':
+                    tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND);
+                    return;
+                case '|': tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR); return;
+                case '~': tAddToken(T_BIT_NOT); return;
+                case '^': tAddToken2(T_BIT_XOR_SET, T_BIT_XOR); return;
+                case ',': tAddToken(T_COMMA); return;
+                case ';': tAddToken(T_SEMICOLON); return;
+                case '(': tAddToken(T_OPEN_BRACKET); return;
+                case ')': tAddToken(T_CLOSE_BRACKET); return;
+                case '{': tAddToken(T_OPEN_CURVED_BRACKET); return;
+                case '}': tAddToken(T_CLOSE_CURVED_BRACKET); return;
+                case '"': tAddString(); return;
+                case '\'': tAddUnicode(); return;
+                case '.': tAddToken(T_POINT); return;
+                case '[': tAddToken(T_OPEN_SQUARE_BRACKET); return;
+                case ']': tAddToken(T_CLOSE_SQUARE_BRACKET); return;
+            }
+            if(isprint(c)) {
+                tError("unknown character on line %d: %c", line, c);
+            } else {
+                tError("unknown character on line %d: %d", line, (int)c);
+            }
+        }
     }
 }
 
@@ -369,12 +378,9 @@ static void tParseFile() {
     readIndex = 0;
     writeIndex = 0;
     line = 1;
-    while(true) {
-        FileToken t = ftRead(&fileTokens);
-        if(t == FT_END) {
-            return;
-        }
-        tParseToken(t);
+    fileTokenIndex = 0;
+    while(fileTokenIndex < fileTokens.length) {
+        tParseToken(fileTokens.tokens + fileTokenIndex++);
     }
 }