| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395 |
- #include "Compiler.h"
- #include <errno.h>
- #include <setjmp.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "Code.h"
- #include "Constants.h"
- static Error error = {};
- static FILE* file = nullptr;
- static size_t lineCounter = 0;
- static jmp_buf jumpPosition = {};
- static u8 tokens[MAX_TOKENS] = {};
- static size_t tokenReadIndex = 0;
- static size_t tokenWriteIndex = 0;
- typedef enum : u8 { LITERAL, INT64, DOUBLE, STRING, PLUS, NEWLINE, END } Token;
- #define THROW_ERROR(...) \
- snprintf(error.text, sizeof(error.text), __VA_ARGS__); \
- longjmp(jumpPosition, 1)
- #define CODE(command) \
- if(command) { \
- THROW_ERROR("Code overflow on line %zu", lineCounter); \
- }
- static void cleanup() {
- if(file != nullptr) {
- fclose(file);
- file = nullptr;
- }
- }
- static void reset() {
- error.text[0] = '\0';
- lineCounter = 0;
- tokenReadIndex = 0;
- tokenWriteIndex = 0;
- }
- static void addTokenN(const void* p, size_t n) {
- if(tokenWriteIndex + n > MAX_TOKENS) {
- THROW_ERROR("Line %zu has too much tokens", lineCounter);
- }
- memcpy(tokens + tokenWriteIndex, p, n);
- tokenWriteIndex += n;
- }
- static void addToken(Token t) {
- addTokenN(&t, sizeof(t));
- }
- static void addChar(char c) {
- addTokenN(&c, sizeof(c));
- }
- static bool isLetter(char c) {
- return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
- }
- static bool isNumber(char c) {
- return c >= '0' && c <= '9';
- }
- static bool isAlphaNumeric(char c) {
- return isLetter(c) || isNumber(c);
- }
- [[noreturn]] static void invalidToken(char c, int line) {
- THROW_ERROR("Unexpected token '%c' on line %zu %d", c, lineCounter, line);
- }
- static bool isTokenEnd(char c) {
- return c == ' ' || c == '\0' || c == '\n';
- }
- static const char* tokenizeLiteral(const char* s) {
- addToken(LITERAL);
- addChar(*s);
- while(true) {
- char c = *(++s);
- if(isAlphaNumeric(c)) {
- addChar(c);
- } else if(isTokenEnd(c)) {
- break;
- } else {
- invalidToken(c, __LINE__);
- }
- }
- addChar('\0');
- return s;
- }
- [[noreturn]] static void invalidNumber() {
- THROW_ERROR("Invalid number on line %zu", lineCounter);
- }
- static const char* tokenizeNumber(const char* s) {
- size_t nIndex = 0;
- char number[64] = {};
- number[nIndex++] = *s;
- while(true) {
- char c = *(++s);
- if(isTokenEnd(c)) {
- break;
- } else if(!isNumber(c) && c != '.') {
- invalidToken(c, __LINE__);
- } else if(nIndex >= sizeof(number) - 1) {
- invalidNumber();
- }
- number[nIndex++] = c;
- }
- char* end = nullptr;
- errno = 0;
- i64 i = strtoll(number, &end, 10);
- if(errno != 0) {
- invalidNumber();
- } else if(*end == '\0') {
- addToken(INT64);
- addTokenN(&i, sizeof(i));
- return s;
- }
- double d = strtod(number, &end);
- if(errno != 0) {
- invalidNumber();
- } else if(*end != '\0') {
- invalidNumber();
- }
- addToken(DOUBLE);
- addTokenN(&d, sizeof(d));
- return s;
- }
- static const char* tokenizeString(const char* s) {
- addToken(STRING);
- while(true) {
- char c = *(++s);
- if(c == '\0') {
- THROW_ERROR("Unclosed string on line %zu", lineCounter);
- } else if(c == '"') {
- s++;
- break;
- }
- addChar(c);
- }
- addChar('\0');
- return s;
- }
- static void tokenizeLineString(const char* s) {
- while(true) {
- char c = *s;
- if(isLetter(c)) {
- s = tokenizeLiteral(s);
- } else if(isNumber(c)) {
- s = tokenizeNumber(s);
- } else if(c == '"') {
- s = tokenizeString(s);
- } else if(c == '\n') {
- addToken(NEWLINE);
- break;
- } else if(c == ' ') {
- s++;
- } else if(c == '+') {
- addToken(PLUS);
- s++;
- } else if(c == '\0') {
- break;
- } else {
- invalidToken(c, __LINE__);
- }
- }
- }
- static void tokenizeLine() {
- tokenReadIndex = 0;
- tokenWriteIndex = 0;
- lineCounter++;
- char line[256] = {};
- if(fgets(line, sizeof(line), file) == nullptr) {
- return;
- }
- char c = line[sizeof(line) - 2];
- if(c != '\n' && c != '\0') {
- THROW_ERROR("Line %zu is too long", lineCounter);
- }
- tokenizeLineString(line);
- }
- static Token peekToken() {
- if(tokenReadIndex < tokenWriteIndex) {
- return tokens[tokenReadIndex];
- }
- tokenizeLine();
- return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex] : END;
- }
- static Token nextToken() {
- if(tokenReadIndex < tokenWriteIndex) {
- return tokens[tokenReadIndex++];
- }
- tokenizeLine();
- return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex++] : END;
- }
- static const char* peekLiteral() {
- Token t = peekToken();
- if(t != LITERAL) {
- return nullptr;
- }
- return (char*)(tokens + tokenReadIndex + sizeof(Token));
- }
- static const char* readString() {
- if(tokenReadIndex >= tokenWriteIndex) {
- THROW_ERROR("readString on empty buffer, line %zu", lineCounter);
- }
- const char* c = (char*)(tokens + tokenReadIndex);
- while(tokenReadIndex < tokenWriteIndex && tokens[tokenReadIndex] != '\0') {
- tokenReadIndex++;
- }
- tokenReadIndex++;
- return c;
- }
- static i64 readInt64() {
- if(tokenReadIndex + sizeof(i64) >= tokenWriteIndex) {
- THROW_ERROR("readInt64 on empty buffer, line %zu", lineCounter);
- }
- i64 i = 0;
- memcpy(&i, tokens + tokenReadIndex, sizeof(i));
- tokenReadIndex += sizeof(i);
- return i;
- }
- static double readDouble() {
- if(tokenReadIndex + sizeof(double) >= tokenWriteIndex) {
- THROW_ERROR("readDouble on empty buffer, line %zu", lineCounter);
- }
- double d = 0;
- memcpy(&d, tokens + tokenReadIndex, sizeof(d));
- tokenReadIndex += sizeof(d);
- return d;
- }
- [[noreturn]] static void unexpectedToken(Token t) {
- switch(t) {
- case LITERAL:
- THROW_ERROR(
- "Unexpected literal(%s) on line %zu", readString(),
- lineCounter);
- break;
- case INT64:
- THROW_ERROR(
- "Unexpected int(%ld) on line %zu", readInt64(), lineCounter);
- break;
- case DOUBLE:
- THROW_ERROR(
- "Unexpected double(%lf) on line %zu", readDouble(),
- lineCounter);
- break;
- case STRING:
- THROW_ERROR(
- "Unexpected string(%s) on line %zu", readString(), lineCounter);
- break;
- case PLUS:
- THROW_ERROR("Unexpected plus on line %zu", lineCounter);
- break;
- case NEWLINE:
- THROW_ERROR("Unexpected newline on line %zu", lineCounter);
- break;
- case END: THROW_ERROR("Unexpected end on line %zu", lineCounter); break;
- }
- THROW_ERROR("Unexpected unknown token on line %zu", lineCounter);
- }
- static void consumeToken(Token t) {
- Token actual = nextToken();
- if(t != actual) {
- unexpectedToken(actual);
- }
- }
- static void consumeLiteral(const char* name) {
- consumeToken(LITERAL);
- const char* actual = readString();
- if(strcmp(actual, name) != 0) {
- THROW_ERROR("Unexpected literal(%s) on line %zu", actual, lineCounter);
- }
- }
- static void compileConstant() {
- Token t = nextToken();
- if(t == STRING) {
- CODE(codePushInstruction(PUSH_CONSTANT_STRING));
- CODE(codePushConstantString(readString()));
- } else if(t == INT64) {
- CODE(codePushInstruction(PUSH_INT64));
- CODE(codePushI64(readInt64()));
- } else {
- unexpectedToken(t);
- }
- }
- static void compileAdd() {
- compileConstant();
- while(peekToken() == PLUS) {
- nextToken();
- compileConstant();
- CODE(codePushInstruction(ADD));
- }
- }
- static void compileExpression() {
- compileAdd();
- }
- static void compileLine(Token t);
- static void compileIf() {
- compileExpression();
- CODE(codePushInstruction(JUMP_ON_0));
- size_t posIndex = codeGetWritePosition();
- CODE(codePushSize(0));
- consumeLiteral("then");
- consumeToken(NEWLINE);
- while(true) {
- const char* s = peekLiteral();
- if(s == nullptr || strcmp(s, "endif") != 0) {
- compileLine(nextToken());
- continue;
- } else {
- break;
- }
- }
- consumeLiteral("endif");
- consumeToken(NEWLINE);
- size_t endIndex = codeGetWritePosition();
- codeSetWritePosition(posIndex);
- CODE(codePushSize(endIndex));
- codeSetWritePosition(endIndex);
- }
- static void compileLine(Token t) {
- if(t == NEWLINE) {
- return;
- }
- if(t != LITERAL) {
- unexpectedToken(t);
- }
- const char* s = readString();
- if(strcmp(s, "print") == 0) {
- while(peekToken() != NEWLINE) {
- compileExpression();
- CODE(codePushInstruction(PRINT));
- }
- nextToken();
- CODE(codePushInstruction(PRINT_NEWLINE));
- } else if(strcmp(s, "if") == 0) {
- compileIf();
- } else {
- THROW_ERROR("Unexpected literal(%s) on line %zu", s, lineCounter);
- }
- }
- static void parseTokens() {
- codeReset();
- while(true) {
- Token t = nextToken();
- if(t == END) {
- break;
- }
- compileLine(t);
- }
- }
- const Error* compileFile(const char* path) {
- reset();
- if(setjmp(jumpPosition)) {
- cleanup();
- return &error;
- }
- file = fopen(path, "r");
- if(file == nullptr) {
- THROW_ERROR("Cannot read file '%s'", path);
- } else {
- parseTokens();
- }
- cleanup();
- return &error;
- }
|