#include "Compiler.h" #include #include #include #include #include #include "Code.h" #include "Constants.h" static Error error = {}; static FILE* file = nullptr; static size_t lineCounter = 0; static jmp_buf jumpPosition = {}; static u8 tokens[MAX_TOKENS] = {}; static size_t tokenReadIndex = 0; static size_t tokenWriteIndex = 0; typedef enum : u8 { LITERAL, INT64, DOUBLE, STRING, PLUS, NEWLINE, END } Token; #define THROW_ERROR(...) \ snprintf(error.text, sizeof(error.text), __VA_ARGS__); \ longjmp(jumpPosition, 1) #define CODE(command) \ if(command) { \ THROW_ERROR("Code overflow on line %zu", lineCounter); \ } static void cleanup() { if(file != nullptr) { fclose(file); file = nullptr; } } static void reset() { error.text[0] = '\0'; lineCounter = 0; tokenReadIndex = 0; tokenWriteIndex = 0; } static void addTokenN(const void* p, size_t n) { if(tokenWriteIndex + n > MAX_TOKENS) { THROW_ERROR("Line %zu has too much tokens", lineCounter); } memcpy(tokens + tokenWriteIndex, p, n); tokenWriteIndex += n; } static void addToken(Token t) { addTokenN(&t, sizeof(t)); } static void addChar(char c) { addTokenN(&c, sizeof(c)); } static bool isLetter(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static bool isNumber(char c) { return c >= '0' && c <= '9'; } static bool isAlphaNumeric(char c) { return isLetter(c) || isNumber(c); } [[noreturn]] static void invalidToken(char c, int line) { THROW_ERROR("Unexpected token '%c' on line %zu %d", c, lineCounter, line); } static bool isTokenEnd(char c) { return c == ' ' || c == '\0' || c == '\n'; } static const char* tokenizeLiteral(const char* s) { addToken(LITERAL); addChar(*s); while(true) { char c = *(++s); if(isAlphaNumeric(c)) { addChar(c); } else if(isTokenEnd(c)) { break; } else { invalidToken(c, __LINE__); } } addChar('\0'); return s; } [[noreturn]] static void invalidNumber() { THROW_ERROR("Invalid number on line %zu", lineCounter); } static const char* tokenizeNumber(const char* s) { size_t nIndex = 0; char number[64] = {}; number[nIndex++] = *s; while(true) { char c = *(++s); if(isTokenEnd(c)) { break; } else if(!isNumber(c) && c != '.') { invalidToken(c, __LINE__); } else if(nIndex >= sizeof(number) - 1) { invalidNumber(); } number[nIndex++] = c; } char* end = nullptr; errno = 0; i64 i = strtoll(number, &end, 10); if(errno != 0) { invalidNumber(); } else if(*end == '\0') { addToken(INT64); addTokenN(&i, sizeof(i)); return s; } double d = strtod(number, &end); if(errno != 0) { invalidNumber(); } else if(*end != '\0') { invalidNumber(); } addToken(DOUBLE); addTokenN(&d, sizeof(d)); return s; } static const char* tokenizeString(const char* s) { addToken(STRING); while(true) { char c = *(++s); if(c == '\0') { THROW_ERROR("Unclosed string on line %zu", lineCounter); } else if(c == '"') { s++; break; } addChar(c); } addChar('\0'); return s; } static void tokenizeLineString(const char* s) { while(true) { char c = *s; if(isLetter(c)) { s = tokenizeLiteral(s); } else if(isNumber(c)) { s = tokenizeNumber(s); } else if(c == '"') { s = tokenizeString(s); } else if(c == '\n') { addToken(NEWLINE); break; } else if(c == ' ') { s++; } else if(c == '+') { addToken(PLUS); s++; } else if(c == '\0') { break; } else { invalidToken(c, __LINE__); } } } static void tokenizeLine() { tokenReadIndex = 0; tokenWriteIndex = 0; lineCounter++; char line[256] = {}; if(fgets(line, sizeof(line), file) == nullptr) { return; } char c = line[sizeof(line) - 2]; if(c != '\n' && c != '\0') { THROW_ERROR("Line %zu is too long", lineCounter); } tokenizeLineString(line); } static Token peekToken() { if(tokenReadIndex < tokenWriteIndex) { return tokens[tokenReadIndex]; } tokenizeLine(); return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex] : END; } static Token nextToken() { if(tokenReadIndex < tokenWriteIndex) { return tokens[tokenReadIndex++]; } tokenizeLine(); return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex++] : END; } static const char* peekLiteral() { Token t = peekToken(); if(t != LITERAL) { return nullptr; } return (char*)(tokens + tokenReadIndex + sizeof(Token)); } static const char* readString() { if(tokenReadIndex >= tokenWriteIndex) { THROW_ERROR("readString on empty buffer, line %zu", lineCounter); } const char* c = (char*)(tokens + tokenReadIndex); while(tokenReadIndex < tokenWriteIndex && tokens[tokenReadIndex] != '\0') { tokenReadIndex++; } tokenReadIndex++; return c; } static i64 readInt64() { if(tokenReadIndex + sizeof(i64) >= tokenWriteIndex) { THROW_ERROR("readInt64 on empty buffer, line %zu", lineCounter); } i64 i = 0; memcpy(&i, tokens + tokenReadIndex, sizeof(i)); tokenReadIndex += sizeof(i); return i; } static double readDouble() { if(tokenReadIndex + sizeof(double) >= tokenWriteIndex) { THROW_ERROR("readDouble on empty buffer, line %zu", lineCounter); } double d = 0; memcpy(&d, tokens + tokenReadIndex, sizeof(d)); tokenReadIndex += sizeof(d); return d; } [[noreturn]] static void unexpectedToken(Token t) { switch(t) { case LITERAL: THROW_ERROR( "Unexpected literal(%s) on line %zu", readString(), lineCounter); break; case INT64: THROW_ERROR( "Unexpected int(%ld) on line %zu", readInt64(), lineCounter); break; case DOUBLE: THROW_ERROR( "Unexpected double(%lf) on line %zu", readDouble(), lineCounter); break; case STRING: THROW_ERROR( "Unexpected string(%s) on line %zu", readString(), lineCounter); break; case PLUS: THROW_ERROR("Unexpected plus on line %zu", lineCounter); break; case NEWLINE: THROW_ERROR("Unexpected newline on line %zu", lineCounter); break; case END: THROW_ERROR("Unexpected end on line %zu", lineCounter); break; } THROW_ERROR("Unexpected unknown token on line %zu", lineCounter); } static void consumeToken(Token t) { Token actual = nextToken(); if(t != actual) { unexpectedToken(actual); } } static void consumeLiteral(const char* name) { consumeToken(LITERAL); const char* actual = readString(); if(strcmp(actual, name) != 0) { THROW_ERROR("Unexpected literal(%s) on line %zu", actual, lineCounter); } } static void compileConstant() { Token t = nextToken(); if(t == STRING) { CODE(codePushInstruction(PUSH_CONSTANT_STRING)); CODE(codePushConstantString(readString())); } else if(t == INT64) { CODE(codePushInstruction(PUSH_INT64)); CODE(codePushI64(readInt64())); } else { unexpectedToken(t); } } static void compileAdd() { compileConstant(); while(peekToken() == PLUS) { nextToken(); compileConstant(); CODE(codePushInstruction(ADD)); } } static void compileExpression() { compileAdd(); } static void compileLine(Token t); static void compileIf() { compileExpression(); CODE(codePushInstruction(JUMP_ON_0)); size_t posIndex = codeGetWritePosition(); CODE(codePushSize(0)); consumeLiteral("then"); consumeToken(NEWLINE); while(true) { const char* s = peekLiteral(); if(s == nullptr || strcmp(s, "endif") != 0) { compileLine(nextToken()); continue; } else { break; } } consumeLiteral("endif"); consumeToken(NEWLINE); size_t endIndex = codeGetWritePosition(); codeSetWritePosition(posIndex); CODE(codePushSize(endIndex)); codeSetWritePosition(endIndex); } static void compileLine(Token t) { if(t == NEWLINE) { return; } if(t != LITERAL) { unexpectedToken(t); } const char* s = readString(); if(strcmp(s, "print") == 0) { while(peekToken() != NEWLINE) { compileExpression(); CODE(codePushInstruction(PRINT)); } nextToken(); CODE(codePushInstruction(PRINT_NEWLINE)); } else if(strcmp(s, "if") == 0) { compileIf(); } else { THROW_ERROR("Unexpected literal(%s) on line %zu", s, lineCounter); } } static void parseTokens() { codeReset(); while(true) { Token t = nextToken(); if(t == END) { break; } compileLine(t); } } const Error* compileFile(const char* path) { reset(); if(setjmp(jumpPosition)) { cleanup(); return &error; } file = fopen(path, "r"); if(file == nullptr) { THROW_ERROR("Cannot read file '%s'", path); } else { parseTokens(); } cleanup(); return &error; }