Parcourir la source

First program compiles

Kajetan Johannes Hammerle il y a 2 semaines
Parent
commit
b28e5c1264
8 fichiers modifiés avec 384 ajouts et 31 suppressions
  1. 2 0
      CMakeLists.txt
  2. 1 1
      src/Code.h
  3. 322 0
      src/Compiler.c
  4. 8 0
      src/Compiler.h
  5. 1 0
      src/Constants.h
  6. 5 0
      src/Error.c
  7. 10 0
      src/Error.h
  8. 35 30
      src/Main.c

+ 2 - 0
CMakeLists.txt

@@ -7,6 +7,8 @@ set(SRC
     "src/Main.c"
     "src/Code.c"
     "src/Values.c"
+    "src/Error.c"
+    "src/Compiler.c"
 )
 
 set(COMPILER_ARGUMENTS

+ 1 - 1
src/Code.h

@@ -6,7 +6,7 @@
 typedef enum : u8 {
     ADD,
     PUSH_CONSTANT_STRING,
-    PUSH_INT,
+    PUSH_INT64,
     PRINT,
     PRINT_NEWLINE,
     STOP

+ 322 - 0
src/Compiler.c

@@ -0,0 +1,322 @@
+#include "Compiler.h"
+
+#include <errno.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Code.h"
+#include "Constants.h"
+
+static Error error = {};
+static FILE* file = nullptr;
+static size_t lineCounter = 0;
+static jmp_buf jumpPosition = {};
+static u8 tokens[MAX_TOKENS] = {};
+static size_t tokenReadIndex = 0;
+static size_t tokenWriteIndex = 0;
+
+typedef enum : u8 { LITERAL, INT64, DOUBLE, STRING, NEWLINE, END } Token;
+
+#define THROW_ERROR(...)                                   \
+    snprintf(error.text, sizeof(error.text), __VA_ARGS__); \
+    longjmp(jumpPosition, 1)
+
+#define CODE(command)                                          \
+    if(command) {                                              \
+        THROW_ERROR("Code overflow on line %zu", lineCounter); \
+    }
+
+static void cleanup() {
+    if(file != nullptr) {
+        fclose(file);
+        file = nullptr;
+    }
+}
+
+static void reset() {
+    error.text[0] = '\0';
+    lineCounter = 0;
+    tokenReadIndex = 0;
+    tokenWriteIndex = 0;
+}
+
+static void addTokenN(const void* p, size_t n) {
+    if(tokenWriteIndex + n > MAX_TOKENS) {
+        THROW_ERROR("Line %zu has too much tokens", lineCounter);
+    }
+    memcpy(tokens + tokenWriteIndex, p, n);
+    tokenWriteIndex += n;
+}
+
+static void addToken(Token t) {
+    addTokenN(&t, sizeof(t));
+}
+
+static void addChar(char c) {
+    addTokenN(&c, sizeof(c));
+}
+
+static bool isLetter(char c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool isNumber(char c) {
+    return c >= '0' && c <= '9';
+}
+
+static bool isAlphaNumeric(char c) {
+    return isLetter(c) || isNumber(c);
+}
+
+[[noreturn]] static void invalidToken(char c, int line) {
+    THROW_ERROR("Unexpected token '%c' on line %zu %d", c, lineCounter, line);
+}
+
+static bool isTokenEnd(char c) {
+    return c == ' ' || c == '\0' || c == '\n';
+}
+
+static const char* tokenizeLiteral(const char* s) {
+    addToken(LITERAL);
+    addChar(*s);
+    while(true) {
+        char c = *(++s);
+        if(isAlphaNumeric(c)) {
+            addChar(c);
+        } else if(isTokenEnd(c)) {
+            break;
+        } else {
+            invalidToken(c, __LINE__);
+        }
+    }
+    addChar('\0');
+    return s;
+}
+
+[[noreturn]] static void invalidNumber() {
+    THROW_ERROR("Invalid number on line %zu", lineCounter);
+}
+
+static const char* tokenizeNumber(const char* s) {
+    size_t nIndex = 0;
+    char number[64] = {};
+    number[nIndex++] = *s;
+    while(true) {
+        char c = *(++s);
+        if(isTokenEnd(c)) {
+            break;
+        } else if(!isNumber(c) && c != '.') {
+            invalidToken(c, __LINE__);
+        } else if(nIndex >= sizeof(number) - 1) {
+            invalidNumber();
+        }
+        number[nIndex++] = c;
+    }
+    char* end = nullptr;
+    errno = 0;
+    i64 i = strtoll(number, &end, 10);
+    if(errno != 0) {
+        invalidNumber();
+    } else if(*end == '\0') {
+        addToken(INT64);
+        addTokenN(&i, sizeof(i));
+        return s;
+    }
+    double d = strtod(number, &end);
+    if(errno != 0) {
+        invalidNumber();
+    } else if(*end != '\0') {
+        invalidNumber();
+    }
+    addToken(DOUBLE);
+    addTokenN(&d, sizeof(d));
+    return s;
+}
+
+static const char* tokenizeString(const char* s) {
+    addToken(STRING);
+    while(true) {
+        char c = *(++s);
+        if(c == '\0') {
+            THROW_ERROR("Unclosed string on line %zu", lineCounter);
+        } else if(c == '"') {
+            s++;
+            break;
+        }
+        addChar(c);
+    }
+    addChar('\0');
+    return s;
+}
+
+static void tokenizeLineString(const char* s) {
+    while(true) {
+        char c = *s;
+        if(isLetter(c)) {
+            s = tokenizeLiteral(s);
+        } else if(isNumber(c)) {
+            s = tokenizeNumber(s);
+        } else if(c == '"') {
+            s = tokenizeString(s);
+        } else if(c == '\n') {
+            addToken(NEWLINE);
+            break;
+        } else if(c == ' ') {
+            s++;
+        } else if(c == '\0') {
+            break;
+        } else {
+            invalidToken(c, __LINE__);
+        }
+    }
+}
+
+static void tokenizeLine() {
+    tokenReadIndex = 0;
+    tokenWriteIndex = 0;
+    lineCounter++;
+    char line[256] = {};
+    if(fgets(line, sizeof(line), file) == nullptr) {
+        return;
+    }
+    char c = line[sizeof(line) - 2];
+    if(c != '\n' && c != '\0') {
+        THROW_ERROR("Line %zu is too long", lineCounter);
+    }
+    tokenizeLineString(line);
+}
+
+static Token peekToken() {
+    if(tokenReadIndex < tokenWriteIndex) {
+        return tokens[tokenReadIndex];
+    }
+    tokenizeLine();
+    return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex] : END;
+}
+
+static Token nextToken() {
+    if(tokenReadIndex < tokenWriteIndex) {
+        return tokens[tokenReadIndex++];
+    }
+    tokenizeLine();
+    return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex++] : END;
+}
+
+static const char* readString() {
+    if(tokenReadIndex >= tokenWriteIndex) {
+        THROW_ERROR("readString on empty buffer, line %zu", lineCounter);
+    }
+    const char* c = (char*)(tokens + tokenReadIndex);
+    while(tokenReadIndex < tokenWriteIndex && tokens[tokenReadIndex] != '\0') {
+        tokenReadIndex++;
+    }
+    tokenReadIndex++;
+    return c;
+}
+
+static i64 readInt64() {
+    if(tokenReadIndex + sizeof(i64) >= tokenWriteIndex) {
+        THROW_ERROR("readInt64 on empty buffer, line %zu", lineCounter);
+    }
+    i64 i = 0;
+    memcpy(&i, tokens + tokenReadIndex, sizeof(i));
+    tokenReadIndex += sizeof(i);
+    return i;
+}
+
+static double readDouble() {
+    if(tokenReadIndex + sizeof(double) >= tokenWriteIndex) {
+        THROW_ERROR("readDouble on empty buffer, line %zu", lineCounter);
+    }
+    double d = 0;
+    memcpy(&d, tokens + tokenReadIndex, sizeof(d));
+    tokenReadIndex += sizeof(d);
+    return d;
+}
+
+[[noreturn]] static void unexpectedToken(Token t) {
+    switch(t) {
+        case LITERAL:
+            THROW_ERROR(
+                "Unexpected literal(%s) on line %zu", readString(),
+                lineCounter);
+            break;
+        case INT64:
+            THROW_ERROR(
+                "Unexpected int(%ld) on line %zu", readInt64(), lineCounter);
+            break;
+        case DOUBLE:
+            THROW_ERROR(
+                "Unexpected double(%lf) on line %zu", readDouble(),
+                lineCounter);
+            break;
+        case STRING:
+            THROW_ERROR(
+                "Unexpected string(%s) on line %zu", readString(), lineCounter);
+            break;
+        case NEWLINE:
+            THROW_ERROR("Unexpected newline on line %zu", lineCounter);
+            break;
+        case END: THROW_ERROR("Unexpected end on line %zu", lineCounter); break;
+    }
+    THROW_ERROR("Unexpected unknown token on line %zu", lineCounter);
+}
+
+static void compileExpression() {
+    Token t = nextToken();
+    if(t == STRING) {
+        CODE(pushInstruction(PUSH_CONSTANT_STRING));
+        CODE(pushConstantString(readString()));
+    } else if(t == INT64) {
+        CODE(pushInstruction(PUSH_INT64));
+        CODE(pushI64(readInt64()));
+    } else {
+        unexpectedToken(t);
+    }
+}
+
+static void compileLine(Token t) {
+    if(t != LITERAL) {
+        unexpectedToken(t);
+    }
+    const char* s = readString();
+    if(strcmp(s, "print") == 0) {
+        while(peekToken() != NEWLINE) {
+            compileExpression();
+            CODE(pushInstruction(PRINT));
+        }
+        nextToken();
+        CODE(pushInstruction(PRINT_NEWLINE));
+    } else {
+        THROW_ERROR("Unexpected literal(%s) on line %zu", s, lineCounter);
+    }
+}
+
+static void parseTokens() {
+    resetCode();
+    while(true) {
+        Token t = nextToken();
+        if(t == END) {
+            break;
+        }
+        compileLine(t);
+    }
+}
+
+const Error* compileFile(const char* path) {
+    reset();
+    if(setjmp(jumpPosition)) {
+        cleanup();
+        return &error;
+    }
+    file = fopen(path, "r");
+    if(file == nullptr) {
+        THROW_ERROR("Cannot read file '%s'", path);
+    } else {
+        parseTokens();
+    }
+    cleanup();
+    return &error;
+}

+ 8 - 0
src/Compiler.h

@@ -0,0 +1,8 @@
+#ifndef BASIC_COMPILER_H
+#define BASIC_COMPILER_H
+
+#include "Error.h"
+
+const Error* compileFile(const char* path);
+
+#endif

+ 1 - 0
src/Constants.h

@@ -10,5 +10,6 @@ typedef int64_t i64;
 
 [[maybe_unused]] constexpr size_t MAX_CODE = 1024 * 1024 * 2;
 [[maybe_unused]] constexpr size_t MAX_VALUES = 1024;
+[[maybe_unused]] constexpr size_t MAX_TOKENS = 1024;
 
 #endif

+ 5 - 0
src/Error.c

@@ -0,0 +1,5 @@
+#include "Error.h"
+
+bool hasError(const Error* e) {
+    return e->text[0] != '\0';
+}

+ 10 - 0
src/Error.h

@@ -0,0 +1,10 @@
+#ifndef BASIC_ERROR_H
+#define BASIC_ERROR_H
+
+typedef struct {
+    char text[256];
+} Error;
+
+bool hasError(const Error* e);
+
+#endif

+ 35 - 30
src/Main.c

@@ -1,8 +1,8 @@
 #include <stdint.h>
 #include <stdio.h>
-#include <string.h>
 
 #include "Code.h"
+#include "Compiler.h"
 #include "Values.h"
 
 #define POP_VALUE(name) \
@@ -42,7 +42,7 @@ static bool execute(Instruction command) {
     switch(command) {
         case ADD: return iAdd();
         case PUSH_CONSTANT_STRING: return iPushConstantString();
-        case PUSH_INT: return iPushInt();
+        case PUSH_INT64: return iPushInt();
         case PRINT: return iPrint();
         case PRINT_NEWLINE: return iPrintNewline();
         case STOP: return true;
@@ -54,36 +54,41 @@ int main(int argCount, const char** args) {
     if(argCount < 2) {
         return 0;
     }
-    if(strcmp(args[1], "./test/Print.basic") == 0) {
-        (void)pushInstruction(PUSH_CONSTANT_STRING);
-        (void)pushConstantString("Hi");
-        (void)pushInstruction(PRINT);
-        (void)pushInstruction(PRINT_NEWLINE);
+    const Error* e = compileFile(args[1]);
+    if(hasError(e)) {
+        puts(e->text);
+        return 0;
+    }
+    // if(strcmp(args[1], "./test/Print.basic") == 0) {
+    //     (void)pushInstruction(PUSH_CONSTANT_STRING);
+    //     (void)pushConstantString("Hi");
+    //     (void)pushInstruction(PRINT);
+    //     (void)pushInstruction(PRINT_NEWLINE);
 
-        (void)pushInstruction(PUSH_INT);
-        (void)pushI64(6);
-        (void)pushInstruction(PRINT);
-        (void)pushInstruction(PRINT_NEWLINE);
+    //    (void)pushInstruction(PUSH_INT);
+    //    (void)pushI64(6);
+    //    (void)pushInstruction(PRINT);
+    //    (void)pushInstruction(PRINT_NEWLINE);
 
-        (void)pushInstruction(PUSH_CONSTANT_STRING);
-        (void)pushConstantString("Hi there ");
-        (void)pushInstruction(PRINT);
-        (void)pushInstruction(PUSH_INT);
-        (void)pushI64(8);
-        (void)pushInstruction(PRINT);
-        (void)pushInstruction(PUSH_CONSTANT_STRING);
-        (void)pushConstantString(" great");
-        (void)pushInstruction(PRINT);
-        (void)pushInstruction(PRINT_NEWLINE);
-    } else if(strcmp(args[1], "./test/Add.basic") == 0) {
-        (void)pushInstruction(PUSH_INT);
-        (void)pushI64(1);
-        (void)pushInstruction(PUSH_INT);
-        (void)pushI64(20);
-        (void)pushInstruction(ADD);
-        (void)pushInstruction(PRINT);
-        (void)pushInstruction(PRINT_NEWLINE);
-    }
+    //    (void)pushInstruction(PUSH_CONSTANT_STRING);
+    //    (void)pushConstantString("Hi there ");
+    //    (void)pushInstruction(PRINT);
+    //    (void)pushInstruction(PUSH_INT);
+    //    (void)pushI64(8);
+    //    (void)pushInstruction(PRINT);
+    //    (void)pushInstruction(PUSH_CONSTANT_STRING);
+    //    (void)pushConstantString(" great");
+    //    (void)pushInstruction(PRINT);
+    //    (void)pushInstruction(PRINT_NEWLINE);
+    //} else if(strcmp(args[1], "./test/Add.basic") == 0) {
+    //    (void)pushInstruction(PUSH_INT);
+    //    (void)pushI64(1);
+    //    (void)pushInstruction(PUSH_INT);
+    //    (void)pushI64(20);
+    //    (void)pushInstruction(ADD);
+    //    (void)pushInstruction(PRINT);
+    //    (void)pushInstruction(PRINT_NEWLINE);
+    //}
     while(!execute(readInstruction())) {}
     // char line[256];
     // while(true) {