3 年之前 · bc537d34ef
--- a/tests/pre/pre
+++ b/tests/pre/pre
@@ -0,0 +1,8 @@
 
				+#define WUSI print 5;
			
 
				+#define BAUM print 6;
			
 
				+#define IF if(true) {
			
 
				+
			
 
				+void main() {
			
 
				+    WUSI WUSI BAUM WUSI
			
 
				+    IF BAUM }
			
 
				+}
			
--- a/tests/pre/pre.out
+++ b/tests/pre/pre.out
@@ -0,0 +1,5 @@
 
				+5
			
 
				+5
			
 
				+6
			
 
				+5
			
 
				+6
			
--- a/tokenizer/File.c
+++ b/tokenizer/File.c
@@ -1,27 +1,161 @@
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				+#include <string.h>
			
 
				 
			
 
				 #include "tokenizer/File.h"
			
 
				+#include "utils/Utils.h"
			
 
				 
			
 
				+#define MAX_DEFINES 50
			
 
				+
			
 
				+static FileError fileError = NULL;
			
 
				 static FILE* file = NULL;
			
 
				+static int readIndex = 0;
			
 
				+static int writeIndex = 0;
			
 
				+static int fileSize = 0;
			
 
				+static char* fileContent = NULL;
			
 
				+
			
 
				+typedef struct {
			
 
				+    int nameLength;
			
 
				+    const char* name;
			
 
				+    const char* code;
			
 
				+} Defines;
			
 
				+
			
 
				+static int defineIndex = 0;
			
 
				+static Defines defines[MAX_DEFINES];
			
 
				+static int defineReadIndex = 0;
			
 
				+static const char* defineCode = NULL;
			
 
				+
			
 
				+static void fAdd(int c) {
			
 
				+    if(writeIndex >= fileSize) {
			
 
				+        fileSize *= 2;
			
 
				+        fileContent = realloc(fileContent, fileSize);
			
 
				+    }
			
 
				+    fileContent[writeIndex++] = c;
			
 
				+}
			
 
				+
			
 
				+static void fReset() {
			
 
				+    readIndex = 0;
			
 
				+    writeIndex = 0;
			
 
				+    fileSize = 16;
			
 
				+    fileContent = malloc(fileSize);
			
 
				+    defineIndex = 0;
			
 
				+    defineReadIndex = 0;
			
 
				+    defineCode = NULL;
			
 
				+}
			
 
				 
			
 
				-bool fOpen(const char* path) {
			
 
				+void fOpen(const char* path, FileError fe) {
			
 
				+    fileError = fe;
			
 
				     file = fopen(path, "r");
			
 
				-    return file == NULL;
			
 
				+    if(file == NULL) {
			
 
				+        fileError("cannot read file '%s'", path);
			
 
				+        return;
			
 
				+    }
			
 
				+    fReset();
			
 
				+    while(true) {
			
 
				+        int c = fgetc(file);
			
 
				+        if(c == '#') {
			
 
				+            while(c != EOF && c != '\n') {
			
 
				+                fAdd(c);
			
 
				+                c = fgetc(file);
			
 
				+            }
			
 
				+            fAdd('\0');
			
 
				+        }
			
 
				+        fAdd(c);
			
 
				+        if(c == EOF) {
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+    fAdd('\0');
			
 
				+    fclose(file);
			
 
				 }
			
 
				 
			
 
				 void fClose() {
			
 
				-    fclose(file);
			
 
				+    free(fileContent);
			
 
				+}
			
 
				+
			
 
				+static void fSkipString(int skip) {
			
 
				+    readIndex += skip;
			
 
				+    while(fileContent[readIndex] == ' ') {
			
 
				+        readIndex++;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void fDefine() {
			
 
				+    fSkipString(7);
			
 
				+    const char* name = fileContent + readIndex;
			
 
				+    while(true) {
			
 
				+        if(fileContent[readIndex] == ' ') {
			
 
				+            fileContent[readIndex] = '\0';
			
 
				+            break;
			
 
				+        } else if(!isLetter(fileContent[readIndex])) {
			
 
				+            fileError("invalid define name '%s'", name);
			
 
				+        }
			
 
				+        readIndex++;
			
 
				+    }
			
 
				+    readIndex++;
			
 
				+    if(defineIndex >= MAX_DEFINES) {
			
 
				+        fileError("too many defines");
			
 
				+    }
			
 
				+    defines[defineIndex].nameLength = strlen(name);
			
 
				+    defines[defineIndex].name = name;
			
 
				+    defines[defineIndex].code = fileContent + readIndex;
			
 
				+    defineIndex++;
			
 
				+    fSkipString(strlen(fileContent + readIndex));
			
 
				+}
			
 
				+
			
 
				+static bool fMatch(Defines* def, const char* code) {
			
 
				+    return strncmp(def->name, code, def->nameLength) == 0;
			
 
				+}
			
 
				+
			
 
				+static bool fPrepareChar() {
			
 
				+    if(defineCode != NULL) {
			
 
				+        if(defineCode[defineReadIndex] == '\0') {
			
 
				+            defineCode = NULL;
			
 
				+            defineReadIndex = 0;
			
 
				+        } else {
			
 
				+            return true;
			
 
				+        }
			
 
				+    }
			
 
				+    if(!isLetter(fileContent[readIndex])) {
			
 
				+        defineReadIndex = 0;
			
 
				+    }
			
 
				+    if(isLetter(fileContent[readIndex]) && defineReadIndex == 0) {
			
 
				+        const char* replace = fileContent + readIndex;
			
 
				+        for(int i = 0; i < defineIndex; i++) {
			
 
				+            if(fMatch(defines + i, replace)) {
			
 
				+                defineCode = defines[i].code;
			
 
				+                readIndex += defines[i].nameLength;
			
 
				+                return true;
			
 
				+            }
			
 
				+        }
			
 
				+        defineReadIndex = -1;
			
 
				+    }
			
 
				+    if(fileContent[readIndex] == '#') {
			
 
				+        const char* command = fileContent + readIndex + 1;
			
 
				+        if(strncmp(command, "define", 6) == 0) {
			
 
				+            fDefine();
			
 
				+        } else {
			
 
				+            fileError("unknown preprocessor command '%s'", command);
			
 
				+        }
			
 
				+    }
			
 
				+    if(fileContent[readIndex] == '\0') {
			
 
				+        readIndex++;
			
 
				+    }
			
 
				+    return false;
			
 
				 }
			
 
				 
			
 
				 int fRead() {
			
 
				-    return fgetc(file);
			
 
				+    if(fPrepareChar()) {
			
 
				+        return defineCode[defineReadIndex++];
			
 
				+    }
			
 
				+    return fileContent[readIndex++];
			
 
				 }
			
 
				 
			
 
				 int fPeek() {
			
 
				-    int c = fRead();
			
 
				-    ungetc(c, file);
			
 
				-    return c;
			
 
				+    if(fPrepareChar()) {
			
 
				+        return defineCode[defineReadIndex];
			
 
				+    }
			
 
				+    return fileContent[readIndex];
			
 
				 }
			
 
				 
			
 
				 bool fReadIf(int c) {
			
--- a/tokenizer/File.h
+++ b/tokenizer/File.h
@@ -3,7 +3,9 @@
 
				 
			
 
				 #include <stdbool.h>
			
 
				 
			
 
				-bool fOpen(const char* path);
			
 
				+typedef void (*FileError)(const char*, ...);
			
 
				+
			
 
				+void fOpen(const char* path, FileError fe);
			
 
				 void fClose();
			
 
				 
			
 
				 int fRead();
			
--- a/tokenizer/Tokenizer.c
+++ b/tokenizer/Tokenizer.c
@@ -1,4 +1,5 @@
 
				 #include <limits.h>
			
 
				+#include <setjmp.h>
			
 
				 #include <stdarg.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
@@ -11,6 +12,7 @@
 
				 #define TOKEN_BUFFER_LENGTH (1024 * 1024)
			
 
				 #define ERROR_LENGTH 256
			
 
				 
			
 
				+static jmp_buf errorJump;
			
 
				 static char tokenBuffer[TOKEN_BUFFER_LENGTH];
			
 
				 static int writeIndex = 0;
			
 
				 static int readIndex = 0;
			
@@ -22,21 +24,21 @@ static void tError(const char* format, ...) {
 
				     va_start(args, format);
			
 
				     vsnprintf(error, ERROR_LENGTH, format, args);
			
 
				     va_end(args);
			
 
				+    longjmp(errorJump, 0);
			
 
				 }
			
 
				 
			
 
				-static bool tAdd(const void* data, int length) {
			
 
				+static void tAdd(const void* data, int length) {
			
 
				     if(writeIndex + length > TOKEN_BUFFER_LENGTH) {
			
 
				         tError("the token buffer is too small");
			
 
				-        return false;
			
 
				     }
			
 
				     memcpy(tokenBuffer + writeIndex, data, length);
			
 
				     writeIndex += length;
			
 
				-    return true;
			
 
				 }
			
 
				 
			
 
				-static bool tAddToken(Token token) {
			
 
				+static void tAddToken(Token token) {
			
 
				     unsigned char c = token;
			
 
				-    return tAdd(&c, 1) && tAdd(&line, sizeof(line));
			
 
				+    tAdd(&c, 1);
			
 
				+    tAdd(&line, sizeof(line));
			
 
				 }
			
 
				 
			
 
				 static bool tReadTokens(void* dest, int length) {
			
@@ -48,26 +50,27 @@ static bool tReadTokens(void* dest, int length) {
 
				     return true;
			
 
				 }
			
 
				 
			
 
				-static bool tParseLiteral(int c) {
			
 
				+static void tParseLiteral(int c) {
			
 
				     int index = 1;
			
 
				     char buffer[64];
			
 
				     buffer[0] = c;
			
 
				     while(isLetter(fPeek())) {
			
 
				         if(index >= 63) {
			
 
				             tError("literal is too long");
			
 
				-            return false;
			
 
				         }
			
 
				         buffer[index++] = fRead();
			
 
				     }
			
 
				     buffer[index] = '\0';
			
 
				     Token t = tFromName(buffer);
			
 
				     if(t != T_END) {
			
 
				-        return tAddToken(t);
			
 
				+        tAddToken(t);
			
 
				+    } else {
			
 
				+        tAddToken(T_LITERAL);
			
 
				+        tAdd(buffer, index + 1);
			
 
				     }
			
 
				-    return tAddToken(T_LITERAL) && tAdd(buffer, index + 1);
			
 
				 }
			
 
				 
			
 
				-static bool tParseNumber(int c) {
			
 
				+static void tParseNumber(int c) {
			
 
				     int index = 1;
			
 
				     char buffer[64];
			
 
				     buffer[0] = c;
			
@@ -80,7 +83,6 @@ static bool tParseNumber(int c) {
 
				             break;
			
 
				         } else if(index >= 63) {
			
 
				             tError("number is too long");
			
 
				-            return false;
			
 
				         }
			
 
				         buffer[index++] = fRead();
			
 
				     }
			
@@ -90,25 +92,23 @@ static bool tParseNumber(int c) {
 
				         float f = strtof(buffer, &end);
			
 
				         if(end[0] != '\0') {
			
 
				             tError("invalid float on line %d", line);
			
 
				-            return false;
			
 
				         }
			
 
				-        return tAddToken(T_CONST_FLOAT) && tAdd(&f, sizeof(float));
			
 
				+        tAddToken(T_CONST_FLOAT);
			
 
				+        tAdd(&f, sizeof(float));
			
 
				     } else {
			
 
				         char* end = NULL;
			
 
				         long l = strtol(buffer, &end, 10);
			
 
				         if(end[0] != '\0' || l > INT_MAX) {
			
 
				             tError("invalid int on line %d", line);
			
 
				-            return false;
			
 
				         }
			
 
				         int i = l;
			
 
				-        return tAddToken(T_CONST_INT) && tAdd(&i, sizeof(int));
			
 
				+        tAddToken(T_CONST_INT);
			
 
				+        tAdd(&i, sizeof(int));
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-static bool tAddString() {
			
 
				-    if(!tAddToken(T_TEXT)) {
			
 
				-        return false;
			
 
				-    }
			
 
				+static void tAddString() {
			
 
				+    tAddToken(T_TEXT);
			
 
				     while(true) {
			
 
				         int c = fRead();
			
 
				         if(c == '"') {
			
@@ -117,114 +117,120 @@ static bool tAddString() {
 
				             switch(fRead()) {
			
 
				                 case '"': c = '"'; break;
			
 
				                 case '\\': c = '\\'; break;
			
 
				-                default:
			
 
				-                    tError("unknown escaped character at line %d", line);
			
 
				-                    return false;
			
 
				+                default: tError("unknown escaped character at line %d", line);
			
 
				             }
			
 
				         } else if(c == EOF) {
			
 
				             tError("unclosed string starting at line %d", line);
			
 
				-            return false;
			
 
				-        }
			
 
				-        if(!tAdd(&c, 1)) {
			
 
				-            return false;
			
 
				         }
			
 
				+        tAdd(&c, 1);
			
 
				     }
			
 
				     char c = '\0';
			
 
				-    return tAdd(&c, 1);
			
 
				+    tAdd(&c, 1);
			
 
				 }
			
 
				 
			
 
				-static bool tAddToken2(Token te, Token t) {
			
 
				-    return fReadIf('=') ? tAddToken(te) : tAddToken(t);
			
 
				+static void tAddToken2(Token te, Token t) {
			
 
				+    if(fReadIf('=')) {
			
 
				+        tAddToken(te);
			
 
				+    } else {
			
 
				+        tAddToken(t);
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				-static bool tAddToken3(int c, Token tc, Token te, Token t) {
			
 
				-    return fReadIf(c) ? tAddToken(tc) : tAddToken2(te, t);
			
 
				+static void tAddToken3(int c, Token tc, Token te, Token t) {
			
 
				+    if(fReadIf(c)) {
			
 
				+        tAddToken(tc);
			
 
				+    } else {
			
 
				+        tAddToken2(te, t);
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				-static bool tAddToken4(int c, Token tce, Token tc, Token te, Token t) {
			
 
				-    return fReadIf(c) ? tAddToken2(tce, tc) : tAddToken2(te, t);
			
 
				+static void tAddToken4(int c, Token tce, Token tc, Token te, Token t) {
			
 
				+    if(fReadIf(c)) {
			
 
				+        tAddToken2(tce, tc);
			
 
				+    } else {
			
 
				+        tAddToken2(te, t);
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				-static bool tAddTokenMinus() {
			
 
				-    return tAddToken3('-', T_DECREMENT, T_SUB_SET,
			
 
				-                      fReadIf('>') ? T_ARROW : T_SUB);
			
 
				+static void tAddTokenMinus() {
			
 
				+    tAddToken3('-', T_DECREMENT, T_SUB_SET, fReadIf('>') ? T_ARROW : T_SUB);
			
 
				 }
			
 
				 
			
 
				-static bool tLineComment() {
			
 
				+static void tLineComment() {
			
 
				     while(true) {
			
 
				         int c = fRead();
			
 
				         if(c == EOF || c == '\n') {
			
 
				             line++;
			
 
				-            return true;
			
 
				+            return;
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-static bool tMultipleLineComment() {
			
 
				+static void tMultipleLineComment() {
			
 
				     while(true) {
			
 
				         int c = fRead();
			
 
				         if(c == EOF) {
			
 
				             tError("unclosed comment at line %d", line);
			
 
				-            return false;
			
 
				         } else if(c == '\n') {
			
 
				             line++;
			
 
				         } else if(c == '*' && fReadIf('/')) {
			
 
				-            return true;
			
 
				+            return;
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-static bool tSlash() {
			
 
				+static void tSlash() {
			
 
				     if(fReadIf('/')) {
			
 
				-        return tLineComment();
			
 
				+        tLineComment();
			
 
				     } else if(fReadIf('*')) {
			
 
				-        return tMultipleLineComment();
			
 
				+        tMultipleLineComment();
			
 
				+    } else {
			
 
				+        tAddToken2(T_DIV_SET, T_DIV);
			
 
				     }
			
 
				-    return tAddToken2(T_DIV_SET, T_DIV);
			
 
				 }
			
 
				 
			
 
				-static bool tParseToken() {
			
 
				-    int c = fRead();
			
 
				-    if(c == EOF) {
			
 
				-        return false;
			
 
				-    } else if(isLetter(c)) {
			
 
				-        return tParseLiteral(c);
			
 
				+static void tParseToken(int c) {
			
 
				+    if(isLetter(c)) {
			
 
				+        tParseLiteral(c);
			
 
				+        return;
			
 
				     } else if(isNumber(c)) {
			
 
				-        return tParseNumber(c);
			
 
				+        tParseNumber(c);
			
 
				+        return;
			
 
				     }
			
 
				     switch(c) {
			
 
				-        case ' ': return true;
			
 
				-        case '\n': line++; return true;
			
 
				-        case '+': return tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD);
			
 
				-        case '-': return tAddTokenMinus();
			
 
				-        case '*': return tAddToken2(T_MUL_SET, T_MUL);
			
 
				-        case '/': return tSlash();
			
 
				-        case '%': return tAddToken2(T_MOD_SET, T_MOD);
			
 
				+        case ' ': return;
			
 
				+        case '\n': line++; return;
			
 
				+        case '+': tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD); return;
			
 
				+        case '-': tAddTokenMinus(); return;
			
 
				+        case '*': tAddToken2(T_MUL_SET, T_MUL); return;
			
 
				+        case '/': tSlash(); return;
			
 
				+        case '%': tAddToken2(T_MOD_SET, T_MOD); return;
			
 
				         case '<':
			
 
				-            return tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL,
			
 
				-                              T_LESS);
			
 
				+            tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL,
			
 
				+                       T_LESS);
			
 
				+            return;
			
 
				         case '>':
			
 
				-            return tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT,
			
 
				-                              T_GREATER_EQUAL, T_GREATER);
			
 
				-        case '=': return tAddToken2(T_EQUAL, T_SET);
			
 
				-        case '!': return tAddToken2(T_NOT_EQUAL, T_NOT);
			
 
				-        case '&': return tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND);
			
 
				-        case '|': return tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR);
			
 
				-        case '~': return tAddToken(T_BIT_NOT);
			
 
				-        case '^': return tAddToken2(T_BIT_XOR_SET, T_BIT_XOR);
			
 
				-        case ',': return tAddToken(T_COMMA);
			
 
				-        case ';': return tAddToken(T_SEMICOLON);
			
 
				-        case '(': return tAddToken(T_OPEN_BRACKET);
			
 
				-        case ')': return tAddToken(T_CLOSE_BRACKET);
			
 
				-        case '{': return tAddToken(T_OPEN_CURVED_BRACKET);
			
 
				-        case '}': return tAddToken(T_CLOSE_CURVED_BRACKET);
			
 
				-        case '"': return tAddString();
			
 
				-        case '.': return tAddToken(T_POINT);
			
 
				-        case '[': return tAddToken(T_OPEN_SQUARE_BRACKET);
			
 
				-        case ']': return tAddToken(T_CLOSE_SQUARE_BRACKET);
			
 
				+            tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT, T_GREATER_EQUAL,
			
 
				+                       T_GREATER);
			
 
				+            return;
			
 
				+        case '=': tAddToken2(T_EQUAL, T_SET); return;
			
 
				+        case '!': tAddToken2(T_NOT_EQUAL, T_NOT); return;
			
 
				+        case '&': tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND); return;
			
 
				+        case '|': tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR); return;
			
 
				+        case '~': tAddToken(T_BIT_NOT); return;
			
 
				+        case '^': tAddToken2(T_BIT_XOR_SET, T_BIT_XOR); return;
			
 
				+        case ',': tAddToken(T_COMMA); return;
			
 
				+        case ';': tAddToken(T_SEMICOLON); return;
			
 
				+        case '(': tAddToken(T_OPEN_BRACKET); return;
			
 
				+        case ')': tAddToken(T_CLOSE_BRACKET); return;
			
 
				+        case '{': tAddToken(T_OPEN_CURVED_BRACKET); return;
			
 
				+        case '}': tAddToken(T_CLOSE_CURVED_BRACKET); return;
			
 
				+        case '"': tAddString(); return;
			
 
				+        case '.': tAddToken(T_POINT); return;
			
 
				+        case '[': tAddToken(T_OPEN_SQUARE_BRACKET); return;
			
 
				+        case ']': tAddToken(T_CLOSE_SQUARE_BRACKET); return;
			
 
				     }
			
 
				     tError("unknown character on line %d: %c", line, c);
			
 
				-    return false;
			
 
				 }
			
 
				 
			
 
				 static void tParseFile() {
			
@@ -232,16 +238,20 @@ static void tParseFile() {
 
				     writeIndex = 0;
			
 
				     line = 1;
			
 
				     error[0] = '\0';
			
 
				-    while(tParseToken()) {
			
 
				+    while(true) {
			
 
				+        int c = fRead();
			
 
				+        if(c == EOF) {
			
 
				+            return;
			
 
				+        }
			
 
				+        tParseToken(c);
			
 
				     }
			
 
				 }
			
 
				 
			
 
				 bool tTokenize(const char* path) {
			
 
				-    if(fOpen(path)) {
			
 
				-        tError("cannot read file '%s'", path);
			
 
				-        return true;
			
 
				+    if(!setjmp(errorJump)) {
			
 
				+        fOpen(path, tError);
			
 
				+        tParseFile();
			
 
				     }
			
 
				-    tParseFile();
			
 
				     fClose();
			
 
				     return error[0] != '\0';
			
 
				 }