Bläddra i källkod

Use heap for buffer, updated tokenizer

Kajetan Johannes Hammerle 2 veckor sedan
förälder
incheckning
67a3033d39
8 ändrade filer med 160 tillägg och 148 borttagningar
  1. 20 5
      src/Buffer.c
  2. 2 1
      src/Buffer.h
  3. 19 19
      src/Compiler.c
  4. 1 1
      src/Compiler.h
  5. 26 28
      src/Main.c
  6. 3 0
      src/Memory.c
  7. 87 84
      src/Tokenizer.c
  8. 2 10
      src/Tokenizer.h

+ 20 - 5
src/Buffer.c

@@ -2,21 +2,36 @@
 
 #include <string.h>
 
-void bufferInit(Buffer* b, u8* data, size_t n) {
-    b->data = data;
-    b->maxIndex = n;
+#include "Memory.h"
+
+void bufferInit(Buffer* b) {
+    b->data = nullptr;
+    b->maxIndex = 0;
     b->readIndex = 0;
     b->writeIndex = 0;
 }
 
+void bufferDestroy(Buffer* b) {
+    memoryFree(b->data);
+    *b = (Buffer){};
+}
+
 void bufferReset(Buffer* b) {
     b->readIndex = 0;
     b->writeIndex = 0;
 }
 
 bool bufferWrite(Buffer* b, const void* p, size_t n) {
-    if(b->writeIndex + n > b->maxIndex) {
-        return true;
+    while(b->writeIndex + n > b->maxIndex) {
+        size_t newSize = b->maxIndex <= 0 ? 16 : (b->maxIndex * 5) / 4;
+        u8* newData = memoryAllocate(newSize);
+        if(newData == nullptr) {
+            return true;
+        }
+        memcpy(newData, b->data, b->writeIndex);
+        memoryFree(b->data);
+        b->data = newData;
+        b->maxIndex = newSize;
     }
     memcpy(b->data + b->writeIndex, p, n);
     b->writeIndex += n;

+ 2 - 1
src/Buffer.h

@@ -10,7 +10,8 @@ typedef struct {
     size_t writeIndex;
 } Buffer;
 
-void bufferInit(Buffer* b, u8* data, size_t n);
+void bufferInit(Buffer* b);
+void bufferDestroy(Buffer* b);
 void bufferReset(Buffer* b);
 [[nodiscard]] bool bufferWrite(Buffer* b, const void* p, size_t n);
 [[nodiscard]] bool bufferRead(Buffer* b, void* p, size_t n);

+ 19 - 19
src/Compiler.c

@@ -8,31 +8,32 @@
 #include "Code.h"
 
 static Error error = {};
+static int line = 0;
 static jmp_buf jumpPosition = {};
 
-#define THROW_ERROR(format, ...)                                       \
-    snprintf(                                                          \
-        error.text, sizeof(error.text), "Line %zu | " format, t->line, \
-        __VA_ARGS__);                                                  \
+#define THROW_ERROR(format, ...)                             \
+    snprintf(                                                \
+        error.text, sizeof(error.text), "Line %d | " format, \
+        line __VA_OPT__(, ) __VA_ARGS__);                    \
     longjmp(jumpPosition, 1)
 
-#define CODE(command)                                         \
-    do {                                                      \
-        if(command) {                                         \
-            THROW_ERROR("Line %zu | Code overflow", t->line); \
-        }                                                     \
+#define CODE(command)                     \
+    do {                                  \
+        if(command) {                     \
+            THROW_ERROR("Code overflow"); \
+        }                                 \
     } while(false)
 
-[[noreturn]] static void unexpectedToken(Tokenizer* t, Token token) {
+[[noreturn]] static void unexpectedToken(Token token) {
     char buffer[128];
     tokenizerPrintToken(&token, buffer, sizeof(buffer));
-    THROW_ERROR("Line %zu | Unexpected %s token", t->line, buffer);
+    THROW_ERROR("Unexpected %s token", buffer);
 }
 
 static Token consumeToken(Tokenizer* t, TokenType type) {
     Token actual = tokenizerNext(t);
     if(actual.type != type) {
-        unexpectedToken(t, actual);
+        unexpectedToken(actual);
     }
     return actual;
 }
@@ -54,7 +55,7 @@ static void compileConstant(Tokenizer* t) {
         CODE(codePushInstruction(PUSH_INT64));
         CODE(codePushI64(token.intValue));
     } else {
-        unexpectedToken(t, token);
+        unexpectedToken(token);
     }
 }
 
@@ -100,7 +101,7 @@ static void compileLine(Tokenizer* t, Token token) {
         return;
     }
     if(token.type != LITERAL) {
-        unexpectedToken(t, token);
+        unexpectedToken(token);
     }
     const char* s = token.stringValue;
     if(strcmp(s, "print") == 0) {
@@ -128,11 +129,10 @@ static void parseTokens(Tokenizer* t) {
     }
 }
 
-const Error* compileFile(Tokenizer* t) {
+Error compileFile(Tokenizer* t) {
     error.text[0] = '\0';
-    if(setjmp(jumpPosition)) {
-        return &error;
+    if(!setjmp(jumpPosition)) {
+        parseTokens(t);
     }
-    parseTokens(t);
-    return &error;
+    return error;
 }

+ 1 - 1
src/Compiler.h

@@ -4,6 +4,6 @@
 #include "Error.h"
 #include "Tokenizer.h"
 
-const Error* compileFile(Tokenizer* t);
+Error compileFile(Tokenizer* t);
 
 #endif

+ 26 - 28
src/Main.c

@@ -1,18 +1,15 @@
 #include <stdint.h>
 #include <stdio.h>
-#include <string.h>
 
 #include "Code.h"
 #include "Compiler.h"
 #include "Memory.h"
 #include "Tokenizer.h"
 
-static u8 tokens[1000];
-
 static void compileAndRun(Tokenizer* t) {
-    const Error* e = compileFile(t);
-    if(hasError(e)) {
-        puts(e->text);
+    Error e = compileFile(t);
+    if(hasError(&e)) {
+        puts(e.text);
         return;
     }
     codeRun();
@@ -23,31 +20,33 @@ int main(int argCount, const char** args) {
         return 0;
     }
 
-    static char heap[100];
+    static char heap[1024];
     memoryInit(heap, sizeof(heap));
-    memoryDump();
+    // memoryDump();
 
-    char* c1 = memoryAllocate(16);
-    char* c2 = memoryAllocate(22);
-    char* c3 = memoryAllocate(32);
-    if(c1 == nullptr || c2 == nullptr || c3 == nullptr) {
-        return 0;
-    }
-    memset(c1, 1, 16);
-    memset(c2, 2, 22);
-    memset(c3, 3, 32);
-    memoryDump();
-    memoryFree(c2);
-    memoryDump();
-    memoryFree(c3);
-    memoryDump();
-    memoryFree(c1);
-    memoryDump();
+    // char* c1 = memoryAllocate(16);
+    // char* c2 = memoryAllocate(22);
+    // char* c3 = memoryAllocate(32);
+    // if(c1 == nullptr || c2 == nullptr || c3 == nullptr) {
+    //     return 0;
+    // }
+    // memset(c1, 1, 16);
+    // memset(c2, 2, 22);
+    // memset(c3, 3, 32);
+    // memoryDump();
+    // memoryFree(c2);
+    // memoryDump();
+    // memoryFree(c3);
+    // memoryDump();
+    // memoryFree(c1);
+    // memoryDump();
 
     Tokenizer t;
-    if(tokenizerInit(&t, args[1], tokens, sizeof(tokens))) {
-        puts(tokenizerGetError(&t));
-        return 0;
+    Error e = tokenizerInit(&t, args[1]);
+    if(hasError(&e)) {
+        puts(e.text);
+    } else {
+        compileAndRun(&t);
     }
 
     // while(true) {
@@ -63,7 +62,6 @@ int main(int argCount, const char** args) {
     //         break;
     //     }
     // }
-    compileAndRun(&t);
     tokenizerDestroy(&t);
 
     //  char line[256];

+ 3 - 0
src/Memory.c

@@ -74,6 +74,9 @@ void* memoryAllocate(size_t bytes) {
 }
 
 void memoryFree(void* p) {
+    if(p == nullptr) {
+        return;
+    }
     Slot* s = (Slot*)p - 1;
     if(s->next != CANARY) {
         fprintf(stderr, "free with corupt canary\n");

+ 87 - 84
src/Tokenizer.c

@@ -1,38 +1,47 @@
 #include "Tokenizer.h"
 
+#include <assert.h>
 #include <errno.h>
+#include <setjmp.h>
 #include <stdarg.h>
+#include <stdio.h>
 #include <stdlib.h>
 
-check_format(2, 3) static void tokenizerError(
-    Tokenizer* t, const char* format, ...) {
-    va_list args;
-    va_start(args, format);
-    vsnprintf(t->error, sizeof(t->error), format, args);
-    va_end(args);
-}
+typedef struct {
+    int line;
+    FILE* file;
+    jmp_buf jump;
+    Error error;
+    Tokenizer* tokenizer;
+} TState;
+
+#define THROW_ERROR(format, ...)                                   \
+    snprintf(                                                      \
+        t->error.text, sizeof(t->error.text), "Line %d | " format, \
+        t->line __VA_OPT__(, ) __VA_ARGS__);                       \
+    longjmp(t->jump, 1)
 
-static void tokenizerTooMuchTokens(Tokenizer* t) {
-    tokenizerError(t, "Line has too much tokens");
+[[noreturn]] static void tTooMuchTokens(TState* t) {
+    THROW_ERROR("Line has too much tokens");
 }
 
-static void tokenizerInvalidToken(Tokenizer* t, char c) {
-    tokenizerError(t, "Unexpected token '%c'", c);
+[[noreturn]] static void tInvalidToken(TState* t, char c) {
+    THROW_ERROR("Unexpected token '%c'", c);
 }
 
-static void tokenizerInvalidNumber(Tokenizer* t) {
-    tokenizerError(t, "Invalid number");
+[[noreturn]] static void tInvalidNumber(TState* t) {
+    THROW_ERROR("Invalid number");
 }
 
-static void tokenizerAddToken(Tokenizer* t, TokenType type) {
-    if(bufferWriteU8(&t->buffer, type)) {
-        tokenizerTooMuchTokens(t);
+static void tAddToken(TState* t, TokenType type) {
+    if(bufferWriteU8(&t->tokenizer->buffer, type)) {
+        tTooMuchTokens(t);
     }
 }
 
-static void tokenizerAddChar(Tokenizer* t, char c) {
-    if(bufferWriteI8(&t->buffer, c)) {
-        tokenizerTooMuchTokens(t);
+static void tAddChar(TState* t, char c) {
+    if(bufferWriteI8(&t->tokenizer->buffer, c)) {
+        tTooMuchTokens(t);
     }
 }
 
@@ -52,25 +61,24 @@ static bool isTokenEnd(char c) {
     return c == ' ' || c == '\0' || c == '\n';
 }
 
-static const char* tokenizerAddLiteral(Tokenizer* t, const char* s) {
-    tokenizerAddToken(t, LITERAL);
-    tokenizerAddChar(t, *s);
+static const char* tokenizerAddLiteral(TState* t, const char* s) {
+    tAddToken(t, LITERAL);
+    tAddChar(t, *s);
     while(true) {
         char c = *(++s);
         if(isAlphaNumeric(c)) {
-            tokenizerAddChar(t, c);
+            tAddChar(t, c);
         } else if(isTokenEnd(c)) {
             break;
         } else {
-            tokenizerInvalidToken(t, c);
-            break;
+            tInvalidToken(t, c);
         }
     }
-    tokenizerAddChar(t, '\0');
+    tAddChar(t, '\0');
     return s;
 }
 
-static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
+static const char* tokenizerAddNumber(TState* t, const char* s) {
     size_t nIndex = 0;
     char number[64] = {};
     number[nIndex++] = *s;
@@ -79,9 +87,9 @@ static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
         if(isTokenEnd(c)) {
             break;
         } else if(!isNumber(c)) {
-            tokenizerInvalidToken(t, c);
+            tInvalidToken(t, c);
         } else if(nIndex >= sizeof(number) - 1) {
-            tokenizerInvalidNumber(t);
+            tInvalidNumber(t);
         }
         number[nIndex++] = c;
     }
@@ -89,36 +97,35 @@ static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
     errno = 0;
     i64 i = strtoll(number, &end, 10);
     if(errno != 0) {
-        tokenizerInvalidNumber(t);
+        tInvalidNumber(t);
     } else if(*end == '\0') {
-        tokenizerAddToken(t, INT64);
-        if(bufferWriteI64(&t->buffer, i)) {
-            tokenizerTooMuchTokens(t);
+        tAddToken(t, INT64);
+        if(bufferWriteI64(&t->tokenizer->buffer, i)) {
+            tTooMuchTokens(t);
         }
         return s;
     }
     return s;
 }
 
-static const char* tokenizerAddString(Tokenizer* t, const char* s) {
-    tokenizerAddToken(t, STRING);
+static const char* tokenizerAddString(TState* t, const char* s) {
+    tAddToken(t, STRING);
     while(true) {
         char c = *(++s);
         if(c == '\0') {
-            tokenizerError(t, "Unclosed string");
-            break;
+            THROW_ERROR("Unclosed string");
         } else if(c == '"') {
             s++;
             break;
         }
-        tokenizerAddChar(t, c);
+        tAddChar(t, c);
     }
-    tokenizerAddChar(t, '\0');
+    tAddChar(t, '\0');
     return s;
 }
 
-static void tokenizerParseLineString(Tokenizer* t, const char* s) {
-    while(!tokenizerHasError(t)) {
+static void tParseLineString(TState* t, const char* s) {
+    while(true) {
         char c = *s;
         if(isLetter(c)) {
             s = tokenizerAddLiteral(t, s);
@@ -127,104 +134,100 @@ static void tokenizerParseLineString(Tokenizer* t, const char* s) {
         } else if(c == '"') {
             s = tokenizerAddString(t, s);
         } else if(c == '\n') {
-            tokenizerAddToken(t, NEWLINE);
+            tAddToken(t, NEWLINE);
             break;
         } else if(c == ' ') {
             s++;
         } else if(c == '+') {
-            tokenizerAddToken(t, PLUS);
+            tAddToken(t, PLUS);
             s++;
         } else if(c == '\0') {
             break;
         } else {
-            tokenizerInvalidToken(t, c);
+            tInvalidToken(t, c);
         }
     }
 }
 
-static void tokenizerParseLine(Tokenizer* t) {
-    bufferReset(&t->buffer);
-    t->line++;
-    char line[256] = {};
-    if(fgets(line, sizeof(line), t->file) == nullptr) {
-        return;
-    }
-    char c = line[sizeof(line) - 2];
-    if(c != '\n' && c != '\0') {
-        tokenizerError(t, "Too long line");
-        return;
+static void tParseLines(TState* t) {
+    while(true) {
+        t->line++;
+        char line[256] = {};
+        if(fgets(line, sizeof(line), t->file) == nullptr) {
+            return;
+        }
+        char c = line[sizeof(line) - 2];
+        if(c != '\n' && c != '\0') {
+            THROW_ERROR("Too long line");
+        }
+        tParseLineString(t, line);
     }
-    tokenizerParseLineString(t, line);
 }
 
-static const char* tokenizerReadString(Tokenizer* t) {
+static const char* tReadString(Tokenizer* t) {
     const char* c = (char*)(t->buffer.data + t->buffer.readIndex);
     i8 i = 1;
     while(i != 0) {
         if(bufferReadI8(&t->buffer, &i)) {
-            tokenizerError(t, "empty buffer on readInt64");
+            assert(false);
             return "";
         }
     }
     return c;
 }
 
-static i64 tokenizerReadInt64(Tokenizer* t) {
+static i64 tReadInt64(Tokenizer* t) {
     i64 i = 0;
     if(bufferReadI64(&t->buffer, &i)) {
-        tokenizerError(t, "empty buffer on readInt64");
+        assert(false);
     }
     return i;
 }
 
 Token tokenizerNext(Tokenizer* t) {
     Token token = {.type = END};
-    if(bufferIsEmpty(&t->buffer)) {
-        tokenizerParseLine(t);
-    }
-    if(tokenizerHasError(t) || bufferReadU8(&t->buffer, &token.type)) {
+    if(bufferReadU8(&t->buffer, &token.type)) {
         return token;
     }
     switch(token.type) {
         case STRING:
-        case LITERAL: token.stringValue = tokenizerReadString(t); break;
-        case INT64: token.intValue = tokenizerReadInt64(t); break;
+        case LITERAL: token.stringValue = tReadString(t); break;
+        case INT64: token.intValue = tReadInt64(t); break;
         default: break;
     }
     return token;
 }
 
 Token tokenizerPeek(Tokenizer* t) {
-    size_t index =
-        bufferIsEmpty(&t->buffer) ? 0 : bufferGetReadIndex(&t->buffer);
+    size_t index = bufferGetReadIndex(&t->buffer);
     Token token = tokenizerNext(t);
     bufferSetReadIndex(&t->buffer, index);
     return token;
 }
 
-bool tokenizerInit(Tokenizer* t, const char* path, u8* tokens, size_t n) {
-    bufferInit(&t->buffer, tokens, n);
-    t->error[0] = '\0';
-    t->line = 0;
+static void tParseFile(TState* t, const char* path) {
     t->file = fopen(path, "r");
     if(t->file == nullptr) {
-        tokenizerError(t, "Cannot read file '%s'", path);
-        return true;
+        THROW_ERROR("Cannot read file '%s'", path);
     }
-    return false;
-}
-
-void tokenizerDestroy(Tokenizer* t) {
-    fclose(t->file);
-    *t = (Tokenizer){};
+    tParseLines(t);
 }
 
-bool tokenizerHasError(const Tokenizer* t) {
-    return t->error[0] != '\0';
+Error tokenizerInit(Tokenizer* t, const char* path) {
+    bufferInit(&t->buffer);
+    TState state = {.tokenizer = t};
+    if(!setjmp(state.jump)) {
+        tParseFile(&state, path);
+    }
+    if(state.file != nullptr) {
+        fclose(state.file);
+    }
+    return state.error;
 }
 
-const char* tokenizerGetError(const Tokenizer* t) {
-    return t->error;
+void tokenizerDestroy(Tokenizer* t) {
+    bufferDestroy(&t->buffer);
+    *t = (Tokenizer){};
 }
 
 void tokenizerPrintToken(const Token* token, char* buffer, size_t n) {

+ 2 - 10
src/Tokenizer.h

@@ -1,9 +1,8 @@
 #ifndef BASIC_TOKENIZER_H
 #define BASIC_TOKENIZER_H
 
-#include <stdio.h>
-
 #include "Buffer.h"
+#include "Error.h"
 
 typedef enum : u8 { LITERAL, INT64, STRING, PLUS, NEWLINE, END } TokenType;
 
@@ -17,22 +16,15 @@ typedef struct {
 } Token;
 
 typedef struct {
-    char error[256];
-    FILE* file;
-    size_t line;
     Buffer buffer;
 } Tokenizer;
 
-[[nodiscard]] bool tokenizerInit(
-    Tokenizer* t, const char* path, u8* tokens, size_t n);
+[[nodiscard]] Error tokenizerInit(Tokenizer* t, const char* path);
 void tokenizerDestroy(Tokenizer* t);
 
 Token tokenizerNext(Tokenizer* t);
 Token tokenizerPeek(Tokenizer* t);
 
-bool tokenizerHasError(const Tokenizer* t);
-const char* tokenizerGetError(const Tokenizer* t);
-
 void tokenizerPrintToken(const Token* token, char* buffer, size_t n);
 
 #endif