Kajetan Johannes Hammerle 2 tygodni temu
rodzic
commit
eac910740d
8 zmienionych plików z 490 dodań i 16 usunięć
  1. 2 0
      CMakeLists.txt
  2. 70 0
      src/Buffer.c
  3. 38 0
      src/Buffer.h
  4. 6 4
      src/Compiler.c
  5. 37 12
      src/Main.c
  6. 276 0
      src/Tokenizer.c
  7. 38 0
      src/Tokenizer.h
  8. 23 0
      src/Types.h

+ 2 - 0
CMakeLists.txt

@@ -10,6 +10,8 @@ set(SRC
     "src/Values.c"
     "src/Error.c"
     "src/Compiler.c"
+    "src/Tokenizer.c"
+    "src/Buffer.c"
 )
 
 set(COMPILER_ARGUMENTS

+ 70 - 0
src/Buffer.c

@@ -0,0 +1,70 @@
+#include "Buffer.h"
+
+#include <string.h>
+
+void bufferInit(Buffer* b, u8* data, size_t n) {
+    b->data = data;
+    b->maxIndex = n;
+    b->readIndex = 0;
+    b->writeIndex = 0;
+}
+
+void bufferReset(Buffer* b) {
+    b->readIndex = 0;
+    b->writeIndex = 0;
+}
+
+bool bufferWrite(Buffer* b, const void* p, size_t n) {
+    if(b->writeIndex + n > b->maxIndex) {
+        return true;
+    }
+    memcpy(b->data + b->writeIndex, p, n);
+    b->writeIndex += n;
+    return false;
+}
+
+bool bufferRead(Buffer* b, void* p, size_t n) {
+    if(b->readIndex + n > b->writeIndex) {
+        return true;
+    }
+    memcpy(p, b->data + b->readIndex, n);
+    b->readIndex += n;
+    return false;
+}
+
+#define BUFFER_READ_WRITE_IMPL(Type, type)       \
+    bool bufferWrite##Type(Buffer* b, type t) {  \
+        return bufferWrite(b, &t, sizeof(type)); \
+    }                                            \
+    bool bufferRead##Type(Buffer* b, type* t) {  \
+        return bufferRead(b, t, sizeof(type));   \
+    }
+
+BUFFER_READ_WRITE_IMPL(I8, i8)
+BUFFER_READ_WRITE_IMPL(I16, i16)
+BUFFER_READ_WRITE_IMPL(I32, i32)
+BUFFER_READ_WRITE_IMPL(I64, i64)
+BUFFER_READ_WRITE_IMPL(U8, u8)
+BUFFER_READ_WRITE_IMPL(U16, u16)
+BUFFER_READ_WRITE_IMPL(U32, u32)
+BUFFER_READ_WRITE_IMPL(U64, u64)
+
+bool bufferIsEmpty(const Buffer* b) {
+    return b->readIndex >= b->writeIndex;
+}
+
+size_t bufferGetReadIndex(const Buffer* b) {
+    return b->readIndex;
+}
+
+void bufferSetReadIndex(Buffer* b, size_t n) {
+    b->readIndex = n;
+}
+
+size_t bufferGetWriteIndex(const Buffer* b) {
+    return b->writeIndex;
+}
+
+void bufferSetWriteIndex(Buffer* b, size_t n) {
+    b->writeIndex = n;
+}

+ 38 - 0
src/Buffer.h

@@ -0,0 +1,38 @@
+#ifndef BASIC_BUFFER_H
+#define BASIC_BUFFER_H
+
+#include "Types.h"
+
+typedef struct {
+    u8* data;
+    size_t maxIndex;
+    size_t readIndex;
+    size_t writeIndex;
+} Buffer;
+
+void bufferInit(Buffer* b, u8* data, size_t n);
+void bufferReset(Buffer* b);
+[[nodiscard]] bool bufferWrite(Buffer* b, const void* p, size_t n);
+[[nodiscard]] bool bufferRead(Buffer* b, void* p, size_t n);
+
+#define BUFFER_READ_WRITE(Type, type)                        \
+    [[nodiscard]] bool bufferWrite##Type(Buffer* b, type t); \
+    [[nodiscard]] bool bufferRead##Type(Buffer* b, type* t)
+
+BUFFER_READ_WRITE(I8, i8);
+BUFFER_READ_WRITE(I16, i16);
+BUFFER_READ_WRITE(I32, i32);
+BUFFER_READ_WRITE(I64, i64);
+BUFFER_READ_WRITE(U8, u8);
+BUFFER_READ_WRITE(U16, u16);
+BUFFER_READ_WRITE(U32, u32);
+BUFFER_READ_WRITE(U64, u64);
+
+bool bufferIsEmpty(const Buffer* b);
+
+size_t bufferGetReadIndex(const Buffer* b);
+void bufferSetReadIndex(Buffer* b, size_t n);
+size_t bufferGetWriteIndex(const Buffer* b);
+void bufferSetWriteIndex(Buffer* b, size_t n);
+
+#endif

+ 6 - 4
src/Compiler.c

@@ -23,10 +23,12 @@ typedef enum : u8 { LITERAL, INT64, DOUBLE, STRING, PLUS, NEWLINE, END } Token;
     snprintf(error.text, sizeof(error.text), __VA_ARGS__); \
     longjmp(jumpPosition, 1)
 
-#define CODE(command)                                          \
-    if(command) {                                              \
-        THROW_ERROR("Code overflow on line %zu", lineCounter); \
-    }
+#define CODE(command)                                              \
+    do {                                                           \
+        if(command) {                                              \
+            THROW_ERROR("Code overflow on line %zu", lineCounter); \
+        }                                                          \
+    } while(false)
 
 static void cleanup() {
     if(file != nullptr) {

+ 37 - 12
src/Main.c

@@ -3,25 +3,50 @@
 
 #include "Code.h"
 #include "Compiler.h"
+#include "Tokenizer.h"
+
+static u8 tokens[1000];
 
 int main(int argCount, const char** args) {
     if(argCount < 2) {
         return 0;
     }
-    const Error* e = compileFile(args[1]);
-    if(hasError(e)) {
-        puts(e->text);
+
+    Tokenizer t;
+    if(tokenizerInit(&t, args[1], tokens, sizeof(tokens))) {
+        puts(tokenizerGetError(&t));
         return 0;
     }
-    codeRun();
-    // char line[256];
-    // while(true) {
-    //     fgets(line, sizeof(line), stdin);
-    //     if(strcmp(line, "quit\n") == 0) {
-    //         break;
-    //     }
-    //     puts(line);
+
+    while(true) {
+        Token token = tokenizerNext(&t);
+        if(tokenizerHasError(&t)) {
+            puts(tokenizerGetError(&t));
+            break;
+        }
+        char buffer[256];
+        tokenizerPrintToken(&token, buffer, sizeof(buffer));
+        puts(buffer);
+        if(token.type == END) {
+            break;
+        }
+    }
+
+    // const Error* e = compileFile(args[1]);
+    // if(hasError(e)) {
+    //     puts(e->text);
+    //     return 0;
     // }
-    // puts("quit");
+    // codeRun();
+
+    //  char line[256];
+    //  while(true) {
+    //      fgets(line, sizeof(line), stdin);
+    //      if(strcmp(line, "quit\n") == 0) {
+    //          break;
+    //      }
+    //      puts(line);
+    //  }
+    //  puts("quit");
     return 0;
 }

+ 276 - 0
src/Tokenizer.c

@@ -0,0 +1,276 @@
+#include "Tokenizer.h"
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+check_format(2, 3) static void tokenizerError(
+    Tokenizer* t, const char* format, ...) {
+    va_list args;
+    va_start(args, format);
+    vsnprintf(t->error, sizeof(t->error), format, args);
+    va_end(args);
+}
+
+static void tokenizerTooMuchTokens(Tokenizer* t) {
+    tokenizerError(t, "Line has too much tokens");
+}
+
+static void tokenizerInvalidToken(Tokenizer* t, char c) {
+    tokenizerError(t, "Unexpected token '%c'", c);
+}
+
+static void tokenizerInvalidNumber(Tokenizer* t) {
+    tokenizerError(t, "Invalid number");
+}
+
+static void tokenizerAddToken(Tokenizer* t, TokenType type) {
+    if(bufferWriteU8(&t->buffer, type)) {
+        tokenizerTooMuchTokens(t);
+    }
+}
+
+static void tokenizerAddChar(Tokenizer* t, char c) {
+    if(bufferWriteI8(&t->buffer, c)) {
+        tokenizerTooMuchTokens(t);
+    }
+}
+
+static bool isLetter(char c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool isNumber(char c) {
+    return c >= '0' && c <= '9';
+}
+
+static bool isAlphaNumeric(char c) {
+    return isLetter(c) || isNumber(c);
+}
+
+static bool isTokenEnd(char c) {
+    return c == ' ' || c == '\0' || c == '\n';
+}
+
+static const char* tokenizerAddLiteral(Tokenizer* t, const char* s) {
+    tokenizerAddToken(t, LITERAL);
+    tokenizerAddChar(t, *s);
+    while(true) {
+        char c = *(++s);
+        if(isAlphaNumeric(c)) {
+            tokenizerAddChar(t, c);
+        } else if(isTokenEnd(c)) {
+            break;
+        } else {
+            tokenizerInvalidToken(t, c);
+            break;
+        }
+    }
+    tokenizerAddChar(t, '\0');
+    return s;
+}
+
+static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
+    size_t nIndex = 0;
+    char number[64] = {};
+    number[nIndex++] = *s;
+    while(true) {
+        char c = *(++s);
+        if(isTokenEnd(c)) {
+            break;
+        } else if(!isNumber(c)) {
+            tokenizerInvalidToken(t, c);
+        } else if(nIndex >= sizeof(number) - 1) {
+            tokenizerInvalidNumber(t);
+        }
+        number[nIndex++] = c;
+    }
+    char* end = nullptr;
+    errno = 0;
+    i64 i = strtoll(number, &end, 10);
+    if(errno != 0) {
+        tokenizerInvalidNumber(t);
+    } else if(*end == '\0') {
+        tokenizerAddToken(t, INT64);
+        if(bufferWriteI64(&t->buffer, i)) {
+            tokenizerTooMuchTokens(t);
+        }
+        return s;
+    }
+    return s;
+}
+
+static const char* tokenizerAddString(Tokenizer* t, const char* s) {
+    tokenizerAddToken(t, STRING);
+    while(true) {
+        char c = *(++s);
+        if(c == '\0') {
+            tokenizerError(t, "Unclosed string");
+            break;
+        } else if(c == '"') {
+            s++;
+            break;
+        }
+        tokenizerAddChar(t, c);
+    }
+    tokenizerAddChar(t, '\0');
+    return s;
+}
+
+static void tokenizerParseLineString(Tokenizer* t, const char* s) {
+    while(!tokenizerHasError(t)) {
+        char c = *s;
+        if(isLetter(c)) {
+            s = tokenizerAddLiteral(t, s);
+        } else if(isNumber(c)) {
+            s = tokenizerAddNumber(t, s);
+        } else if(c == '"') {
+            s = tokenizerAddString(t, s);
+        } else if(c == '\n') {
+            tokenizerAddToken(t, NEWLINE);
+            break;
+        } else if(c == ' ') {
+            s++;
+        } else if(c == '+') {
+            tokenizerAddToken(t, PLUS);
+            s++;
+        } else if(c == '\0') {
+            break;
+        } else {
+            tokenizerInvalidToken(t, c);
+        }
+    }
+}
+
+static void tokenizerParseLine(Tokenizer* t) {
+    bufferReset(&t->buffer);
+    t->line++;
+    char line[256] = {};
+    if(fgets(line, sizeof(line), t->file) == nullptr) {
+        return;
+    }
+    char c = line[sizeof(line) - 2];
+    if(c != '\n' && c != '\0') {
+        tokenizerError(t, "Too long line");
+        return;
+    }
+    tokenizerParseLineString(t, line);
+}
+
+//[[noreturn]] static void unexpectedToken(Token t) {
+//    switch(t) {
+//        case LITERAL:
+//            THROW_ERROR(
+//                "Unexpected literal(%s) on line %zu", readString(),
+//                lineCounter);
+//            break;
+//        case INT64:
+//            THROW_ERROR(
+//                "Unexpected int(%ld) on line %zu", readInt64(), lineCounter);
+//            break;
+//        case DOUBLE:
+//            THROW_ERROR(
+//                "Unexpected double(%lf) on line %zu", readDouble(),
+//                lineCounter);
+//            break;
+//        case STRING:
+//            THROW_ERROR(
+//                "Unexpected string(%s) on line %zu", readString(),
+//                lineCounter);
+//            break;
+//        case PLUS:
+//            THROW_ERROR("Unexpected plus on line %zu", lineCounter);
+//            break;
+//        case NEWLINE:
+//            THROW_ERROR("Unexpected newline on line %zu", lineCounter);
+//            break;
+//        case END: THROW_ERROR("Unexpected end on line %zu", lineCounter);
+//        break;
+//    }
+//    THROW_ERROR("Unexpected unknown token on line %zu", lineCounter);
+//}
+
+static const char* tokenizerReadString(Tokenizer* t) {
+    const char* c = (char*)(t->buffer.data + t->buffer.readIndex);
+    i8 i = 1;
+    while(i != 0) {
+        if(bufferReadI8(&t->buffer, &i)) {
+            tokenizerError(t, "empty buffer on readInt64");
+            return "";
+        }
+    }
+    return c;
+}
+
+static i64 tokenizerReadInt64(Tokenizer* t) {
+    i64 i = 0;
+    if(bufferReadI64(&t->buffer, &i)) {
+        tokenizerError(t, "empty buffer on readInt64");
+    }
+    return i;
+}
+
+Token tokenizerNext(Tokenizer* t) {
+    Token token = {.type = END};
+    if(bufferIsEmpty(&t->buffer)) {
+        tokenizerParseLine(t);
+    }
+    if(tokenizerHasError(t) || bufferReadU8(&t->buffer, &token.type)) {
+        return token;
+    }
+    switch(token.type) {
+        case STRING:
+        case LITERAL: token.stringValue = tokenizerReadString(t); break;
+        case INT64: token.intValue = tokenizerReadInt64(t); break;
+        default: break;
+    }
+    return token;
+}
+
+Token tokenizerPeek(Tokenizer* t) {
+    size_t index = bufferGetReadIndex(&t->buffer);
+    Token token = tokenizerNext(t);
+    bufferSetReadIndex(&t->buffer, index);
+    return token;
+}
+
+bool tokenizerInit(Tokenizer* t, const char* path, u8* tokens, size_t n) {
+    bufferInit(&t->buffer, tokens, n);
+    t->error[0] = '\0';
+    t->line = 0;
+    t->file = fopen(path, "r");
+    if(t->file == nullptr) {
+        tokenizerError(t, "Cannot read file '%s'", path);
+        return true;
+    }
+    return false;
+}
+
+void tokenizerDestroy(Tokenizer* t) {
+    fclose(t->file);
+    *t = (Tokenizer){};
+}
+
+bool tokenizerHasError(const Tokenizer* t) {
+    return t->error[0] != '\0';
+}
+
+const char* tokenizerGetError(const Tokenizer* t) {
+    return t->error;
+}
+
+void tokenizerPrintToken(const Token* token, char* buffer, size_t n) {
+    switch(token->type) {
+        case LITERAL:
+            snprintf(buffer, n, "Literal(%s)", token->stringValue);
+            break;
+        case INT64: snprintf(buffer, n, "Int64(%ld)", token->intValue); break;
+        case STRING:
+            snprintf(buffer, n, "String(%s)", token->stringValue);
+            break;
+        case PLUS: snprintf(buffer, n, "Plus"); break;
+        case NEWLINE: snprintf(buffer, n, "Newline"); break;
+        case END: snprintf(buffer, n, "End"); break;
+        default: snprintf(buffer, n, "Unknown"); break;
+    }
+}

+ 38 - 0
src/Tokenizer.h

@@ -0,0 +1,38 @@
+#ifndef BASIC_TOKENIZER_H
+#define BASIC_TOKENIZER_H
+
+#include <stdio.h>
+
+#include "Buffer.h"
+
+typedef enum : u8 { LITERAL, INT64, STRING, PLUS, NEWLINE, END } TokenType;
+
+typedef struct {
+    TokenType type;
+
+    union {
+        const char* stringValue;
+        i64 intValue;
+    };
+} Token;
+
+typedef struct {
+    char error[256];
+    FILE* file;
+    size_t line;
+    Buffer buffer;
+} Tokenizer;
+
+[[nodiscard]] bool tokenizerInit(
+    Tokenizer* t, const char* path, u8* tokens, size_t n);
+void tokenizerDestroy(Tokenizer* t);
+
+Token tokenizerNext(Tokenizer* t);
+Token tokenizerPeek(Tokenizer* t);
+
+bool tokenizerHasError(const Tokenizer* t);
+const char* tokenizerGetError(const Tokenizer* t);
+
+void tokenizerPrintToken(const Token* token, char* buffer, size_t n);
+
+#endif

+ 23 - 0
src/Types.h

@@ -0,0 +1,23 @@
+#ifndef BASIC_TYPES_H
+#define BASIC_TYPES_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+typedef int64_t i64;
+typedef int32_t i32;
+typedef int16_t i16;
+typedef int8_t i8;
+
+#if defined(__GNUC__)
+#define check_format(format_index, arg_start_index)                \
+    __attribute__((format(printf, format_index, arg_start_index)))
+#else
+#error "please add a 'check_format' option"
+#endif
+
+#endif