|
@@ -0,0 +1,322 @@
|
|
|
|
|
+#include "Compiler.h"
|
|
|
|
|
+
|
|
|
|
|
+#include <errno.h>
|
|
|
|
|
+#include <setjmp.h>
|
|
|
|
|
+#include <stdio.h>
|
|
|
|
|
+#include <stdlib.h>
|
|
|
|
|
+#include <string.h>
|
|
|
|
|
+
|
|
|
|
|
+#include "Code.h"
|
|
|
|
|
+#include "Constants.h"
|
|
|
|
|
+
|
|
|
|
|
+static Error error = {};
|
|
|
|
|
+static FILE* file = nullptr;
|
|
|
|
|
+static size_t lineCounter = 0;
|
|
|
|
|
+static jmp_buf jumpPosition = {};
|
|
|
|
|
+static u8 tokens[MAX_TOKENS] = {};
|
|
|
|
|
+static size_t tokenReadIndex = 0;
|
|
|
|
|
+static size_t tokenWriteIndex = 0;
|
|
|
|
|
+
|
|
|
|
|
+typedef enum : u8 { LITERAL, INT64, DOUBLE, STRING, NEWLINE, END } Token;
|
|
|
|
|
+
|
|
|
|
|
+#define THROW_ERROR(...) \
|
|
|
|
|
+ snprintf(error.text, sizeof(error.text), __VA_ARGS__); \
|
|
|
|
|
+ longjmp(jumpPosition, 1)
|
|
|
|
|
+
|
|
|
|
|
+#define CODE(command) \
|
|
|
|
|
+ if(command) { \
|
|
|
|
|
+ THROW_ERROR("Code overflow on line %zu", lineCounter); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+static void cleanup() {
|
|
|
|
|
+ if(file != nullptr) {
|
|
|
|
|
+ fclose(file);
|
|
|
|
|
+ file = nullptr;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void reset() {
|
|
|
|
|
+ error.text[0] = '\0';
|
|
|
|
|
+ lineCounter = 0;
|
|
|
|
|
+ tokenReadIndex = 0;
|
|
|
|
|
+ tokenWriteIndex = 0;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void addTokenN(const void* p, size_t n) {
|
|
|
|
|
+ if(tokenWriteIndex + n > MAX_TOKENS) {
|
|
|
|
|
+ THROW_ERROR("Line %zu has too much tokens", lineCounter);
|
|
|
|
|
+ }
|
|
|
|
|
+ memcpy(tokens + tokenWriteIndex, p, n);
|
|
|
|
|
+ tokenWriteIndex += n;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void addToken(Token t) {
|
|
|
|
|
+ addTokenN(&t, sizeof(t));
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void addChar(char c) {
|
|
|
|
|
+ addTokenN(&c, sizeof(c));
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isLetter(char c) {
|
|
|
|
|
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isNumber(char c) {
|
|
|
|
|
+ return c >= '0' && c <= '9';
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isAlphaNumeric(char c) {
|
|
|
|
|
+ return isLetter(c) || isNumber(c);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+[[noreturn]] static void invalidToken(char c, int line) {
|
|
|
|
|
+ THROW_ERROR("Unexpected token '%c' on line %zu %d", c, lineCounter, line);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isTokenEnd(char c) {
|
|
|
|
|
+ return c == ' ' || c == '\0' || c == '\n';
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizeLiteral(const char* s) {
|
|
|
|
|
+ addToken(LITERAL);
|
|
|
|
|
+ addChar(*s);
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *(++s);
|
|
|
|
|
+ if(isAlphaNumeric(c)) {
|
|
|
|
|
+ addChar(c);
|
|
|
|
|
+ } else if(isTokenEnd(c)) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ invalidToken(c, __LINE__);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ addChar('\0');
|
|
|
|
|
+ return s;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+[[noreturn]] static void invalidNumber() {
|
|
|
|
|
+ THROW_ERROR("Invalid number on line %zu", lineCounter);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizeNumber(const char* s) {
|
|
|
|
|
+ size_t nIndex = 0;
|
|
|
|
|
+ char number[64] = {};
|
|
|
|
|
+ number[nIndex++] = *s;
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *(++s);
|
|
|
|
|
+ if(isTokenEnd(c)) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else if(!isNumber(c) && c != '.') {
|
|
|
|
|
+ invalidToken(c, __LINE__);
|
|
|
|
|
+ } else if(nIndex >= sizeof(number) - 1) {
|
|
|
|
|
+ invalidNumber();
|
|
|
|
|
+ }
|
|
|
|
|
+ number[nIndex++] = c;
|
|
|
|
|
+ }
|
|
|
|
|
+ char* end = nullptr;
|
|
|
|
|
+ errno = 0;
|
|
|
|
|
+ i64 i = strtoll(number, &end, 10);
|
|
|
|
|
+ if(errno != 0) {
|
|
|
|
|
+ invalidNumber();
|
|
|
|
|
+ } else if(*end == '\0') {
|
|
|
|
|
+ addToken(INT64);
|
|
|
|
|
+ addTokenN(&i, sizeof(i));
|
|
|
|
|
+ return s;
|
|
|
|
|
+ }
|
|
|
|
|
+ double d = strtod(number, &end);
|
|
|
|
|
+ if(errno != 0) {
|
|
|
|
|
+ invalidNumber();
|
|
|
|
|
+ } else if(*end != '\0') {
|
|
|
|
|
+ invalidNumber();
|
|
|
|
|
+ }
|
|
|
|
|
+ addToken(DOUBLE);
|
|
|
|
|
+ addTokenN(&d, sizeof(d));
|
|
|
|
|
+ return s;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizeString(const char* s) {
|
|
|
|
|
+ addToken(STRING);
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *(++s);
|
|
|
|
|
+ if(c == '\0') {
|
|
|
|
|
+ THROW_ERROR("Unclosed string on line %zu", lineCounter);
|
|
|
|
|
+ } else if(c == '"') {
|
|
|
|
|
+ s++;
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ addChar(c);
|
|
|
|
|
+ }
|
|
|
|
|
+ addChar('\0');
|
|
|
|
|
+ return s;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizeLineString(const char* s) {
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *s;
|
|
|
|
|
+ if(isLetter(c)) {
|
|
|
|
|
+ s = tokenizeLiteral(s);
|
|
|
|
|
+ } else if(isNumber(c)) {
|
|
|
|
|
+ s = tokenizeNumber(s);
|
|
|
|
|
+ } else if(c == '"') {
|
|
|
|
|
+ s = tokenizeString(s);
|
|
|
|
|
+ } else if(c == '\n') {
|
|
|
|
|
+ addToken(NEWLINE);
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else if(c == ' ') {
|
|
|
|
|
+ s++;
|
|
|
|
|
+ } else if(c == '\0') {
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ invalidToken(c, __LINE__);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizeLine() {
|
|
|
|
|
+ tokenReadIndex = 0;
|
|
|
|
|
+ tokenWriteIndex = 0;
|
|
|
|
|
+ lineCounter++;
|
|
|
|
|
+ char line[256] = {};
|
|
|
|
|
+ if(fgets(line, sizeof(line), file) == nullptr) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ char c = line[sizeof(line) - 2];
|
|
|
|
|
+ if(c != '\n' && c != '\0') {
|
|
|
|
|
+ THROW_ERROR("Line %zu is too long", lineCounter);
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizeLineString(line);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static Token peekToken() {
|
|
|
|
|
+ if(tokenReadIndex < tokenWriteIndex) {
|
|
|
|
|
+ return tokens[tokenReadIndex];
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizeLine();
|
|
|
|
|
+ return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex] : END;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static Token nextToken() {
|
|
|
|
|
+ if(tokenReadIndex < tokenWriteIndex) {
|
|
|
|
|
+ return tokens[tokenReadIndex++];
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizeLine();
|
|
|
|
|
+ return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex++] : END;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* readString() {
|
|
|
|
|
+ if(tokenReadIndex >= tokenWriteIndex) {
|
|
|
|
|
+ THROW_ERROR("readString on empty buffer, line %zu", lineCounter);
|
|
|
|
|
+ }
|
|
|
|
|
+ const char* c = (char*)(tokens + tokenReadIndex);
|
|
|
|
|
+ while(tokenReadIndex < tokenWriteIndex && tokens[tokenReadIndex] != '\0') {
|
|
|
|
|
+ tokenReadIndex++;
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenReadIndex++;
|
|
|
|
|
+ return c;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static i64 readInt64() {
|
|
|
|
|
+ if(tokenReadIndex + sizeof(i64) >= tokenWriteIndex) {
|
|
|
|
|
+ THROW_ERROR("readInt64 on empty buffer, line %zu", lineCounter);
|
|
|
|
|
+ }
|
|
|
|
|
+ i64 i = 0;
|
|
|
|
|
+ memcpy(&i, tokens + tokenReadIndex, sizeof(i));
|
|
|
|
|
+ tokenReadIndex += sizeof(i);
|
|
|
|
|
+ return i;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static double readDouble() {
|
|
|
|
|
+ if(tokenReadIndex + sizeof(double) >= tokenWriteIndex) {
|
|
|
|
|
+ THROW_ERROR("readDouble on empty buffer, line %zu", lineCounter);
|
|
|
|
|
+ }
|
|
|
|
|
+ double d = 0;
|
|
|
|
|
+ memcpy(&d, tokens + tokenReadIndex, sizeof(d));
|
|
|
|
|
+ tokenReadIndex += sizeof(d);
|
|
|
|
|
+ return d;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+[[noreturn]] static void unexpectedToken(Token t) {
|
|
|
|
|
+ switch(t) {
|
|
|
|
|
+ case LITERAL:
|
|
|
|
|
+ THROW_ERROR(
|
|
|
|
|
+ "Unexpected literal(%s) on line %zu", readString(),
|
|
|
|
|
+ lineCounter);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case INT64:
|
|
|
|
|
+ THROW_ERROR(
|
|
|
|
|
+ "Unexpected int(%ld) on line %zu", readInt64(), lineCounter);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case DOUBLE:
|
|
|
|
|
+ THROW_ERROR(
|
|
|
|
|
+ "Unexpected double(%lf) on line %zu", readDouble(),
|
|
|
|
|
+ lineCounter);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case STRING:
|
|
|
|
|
+ THROW_ERROR(
|
|
|
|
|
+ "Unexpected string(%s) on line %zu", readString(), lineCounter);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case NEWLINE:
|
|
|
|
|
+ THROW_ERROR("Unexpected newline on line %zu", lineCounter);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case END: THROW_ERROR("Unexpected end on line %zu", lineCounter); break;
|
|
|
|
|
+ }
|
|
|
|
|
+ THROW_ERROR("Unexpected unknown token on line %zu", lineCounter);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void compileExpression() {
|
|
|
|
|
+ Token t = nextToken();
|
|
|
|
|
+ if(t == STRING) {
|
|
|
|
|
+ CODE(pushInstruction(PUSH_CONSTANT_STRING));
|
|
|
|
|
+ CODE(pushConstantString(readString()));
|
|
|
|
|
+ } else if(t == INT64) {
|
|
|
|
|
+ CODE(pushInstruction(PUSH_INT64));
|
|
|
|
|
+ CODE(pushI64(readInt64()));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ unexpectedToken(t);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void compileLine(Token t) {
|
|
|
|
|
+ if(t != LITERAL) {
|
|
|
|
|
+ unexpectedToken(t);
|
|
|
|
|
+ }
|
|
|
|
|
+ const char* s = readString();
|
|
|
|
|
+ if(strcmp(s, "print") == 0) {
|
|
|
|
|
+ while(peekToken() != NEWLINE) {
|
|
|
|
|
+ compileExpression();
|
|
|
|
|
+ CODE(pushInstruction(PRINT));
|
|
|
|
|
+ }
|
|
|
|
|
+ nextToken();
|
|
|
|
|
+ CODE(pushInstruction(PRINT_NEWLINE));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ THROW_ERROR("Unexpected literal(%s) on line %zu", s, lineCounter);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void parseTokens() {
|
|
|
|
|
+ resetCode();
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ Token t = nextToken();
|
|
|
|
|
+ if(t == END) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ compileLine(t);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const Error* compileFile(const char* path) {
|
|
|
|
|
+ reset();
|
|
|
|
|
+ if(setjmp(jumpPosition)) {
|
|
|
|
|
+ cleanup();
|
|
|
|
|
+ return &error;
|
|
|
|
|
+ }
|
|
|
|
|
+ file = fopen(path, "r");
|
|
|
|
|
+ if(file == nullptr) {
|
|
|
|
|
+ THROW_ERROR("Cannot read file '%s'", path);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ parseTokens();
|
|
|
|
|
+ }
|
|
|
|
|
+ cleanup();
|
|
|
|
|
+ return &error;
|
|
|
|
|
+}
|