|
@@ -0,0 +1,276 @@
|
|
|
|
|
+#include "Tokenizer.h"
|
|
|
|
|
+
|
|
|
|
|
+#include <errno.h>
|
|
|
|
|
+#include <stdarg.h>
|
|
|
|
|
+#include <stdlib.h>
|
|
|
|
|
+
|
|
|
|
|
+check_format(2, 3) static void tokenizerError(
|
|
|
|
|
+ Tokenizer* t, const char* format, ...) {
|
|
|
|
|
+ va_list args;
|
|
|
|
|
+ va_start(args, format);
|
|
|
|
|
+ vsnprintf(t->error, sizeof(t->error), format, args);
|
|
|
|
|
+ va_end(args);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerTooMuchTokens(Tokenizer* t) {
|
|
|
|
|
+ tokenizerError(t, "Line has too much tokens");
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerInvalidToken(Tokenizer* t, char c) {
|
|
|
|
|
+ tokenizerError(t, "Unexpected token '%c'", c);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerInvalidNumber(Tokenizer* t) {
|
|
|
|
|
+ tokenizerError(t, "Invalid number");
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerAddToken(Tokenizer* t, TokenType type) {
|
|
|
|
|
+ if(bufferWriteU8(&t->buffer, type)) {
|
|
|
|
|
+ tokenizerTooMuchTokens(t);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerAddChar(Tokenizer* t, char c) {
|
|
|
|
|
+ if(bufferWriteI8(&t->buffer, c)) {
|
|
|
|
|
+ tokenizerTooMuchTokens(t);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isLetter(char c) {
|
|
|
|
|
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isNumber(char c) {
|
|
|
|
|
+ return c >= '0' && c <= '9';
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isAlphaNumeric(char c) {
|
|
|
|
|
+ return isLetter(c) || isNumber(c);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool isTokenEnd(char c) {
|
|
|
|
|
+ return c == ' ' || c == '\0' || c == '\n';
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizerAddLiteral(Tokenizer* t, const char* s) {
|
|
|
|
|
+ tokenizerAddToken(t, LITERAL);
|
|
|
|
|
+ tokenizerAddChar(t, *s);
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *(++s);
|
|
|
|
|
+ if(isAlphaNumeric(c)) {
|
|
|
|
|
+ tokenizerAddChar(t, c);
|
|
|
|
|
+ } else if(isTokenEnd(c)) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ tokenizerInvalidToken(t, c);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizerAddChar(t, '\0');
|
|
|
|
|
+ return s;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
|
|
|
|
|
+ size_t nIndex = 0;
|
|
|
|
|
+ char number[64] = {};
|
|
|
|
|
+ number[nIndex++] = *s;
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *(++s);
|
|
|
|
|
+ if(isTokenEnd(c)) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else if(!isNumber(c)) {
|
|
|
|
|
+ tokenizerInvalidToken(t, c);
|
|
|
|
|
+ } else if(nIndex >= sizeof(number) - 1) {
|
|
|
|
|
+ tokenizerInvalidNumber(t);
|
|
|
|
|
+ }
|
|
|
|
|
+ number[nIndex++] = c;
|
|
|
|
|
+ }
|
|
|
|
|
+ char* end = nullptr;
|
|
|
|
|
+ errno = 0;
|
|
|
|
|
+ i64 i = strtoll(number, &end, 10);
|
|
|
|
|
+ if(errno != 0) {
|
|
|
|
|
+ tokenizerInvalidNumber(t);
|
|
|
|
|
+ } else if(*end == '\0') {
|
|
|
|
|
+ tokenizerAddToken(t, INT64);
|
|
|
|
|
+ if(bufferWriteI64(&t->buffer, i)) {
|
|
|
|
|
+ tokenizerTooMuchTokens(t);
|
|
|
|
|
+ }
|
|
|
|
|
+ return s;
|
|
|
|
|
+ }
|
|
|
|
|
+ return s;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizerAddString(Tokenizer* t, const char* s) {
|
|
|
|
|
+ tokenizerAddToken(t, STRING);
|
|
|
|
|
+ while(true) {
|
|
|
|
|
+ char c = *(++s);
|
|
|
|
|
+ if(c == '\0') {
|
|
|
|
|
+ tokenizerError(t, "Unclosed string");
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else if(c == '"') {
|
|
|
|
|
+ s++;
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizerAddChar(t, c);
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizerAddChar(t, '\0');
|
|
|
|
|
+ return s;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerParseLineString(Tokenizer* t, const char* s) {
|
|
|
|
|
+ while(!tokenizerHasError(t)) {
|
|
|
|
|
+ char c = *s;
|
|
|
|
|
+ if(isLetter(c)) {
|
|
|
|
|
+ s = tokenizerAddLiteral(t, s);
|
|
|
|
|
+ } else if(isNumber(c)) {
|
|
|
|
|
+ s = tokenizerAddNumber(t, s);
|
|
|
|
|
+ } else if(c == '"') {
|
|
|
|
|
+ s = tokenizerAddString(t, s);
|
|
|
|
|
+ } else if(c == '\n') {
|
|
|
|
|
+ tokenizerAddToken(t, NEWLINE);
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else if(c == ' ') {
|
|
|
|
|
+ s++;
|
|
|
|
|
+ } else if(c == '+') {
|
|
|
|
|
+ tokenizerAddToken(t, PLUS);
|
|
|
|
|
+ s++;
|
|
|
|
|
+ } else if(c == '\0') {
|
|
|
|
|
+ break;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ tokenizerInvalidToken(t, c);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void tokenizerParseLine(Tokenizer* t) {
|
|
|
|
|
+ bufferReset(&t->buffer);
|
|
|
|
|
+ t->line++;
|
|
|
|
|
+ char line[256] = {};
|
|
|
|
|
+ if(fgets(line, sizeof(line), t->file) == nullptr) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ char c = line[sizeof(line) - 2];
|
|
|
|
|
+ if(c != '\n' && c != '\0') {
|
|
|
|
|
+ tokenizerError(t, "Too long line");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ tokenizerParseLineString(t, line);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+//[[noreturn]] static void unexpectedToken(Token t) {
|
|
|
|
|
+// switch(t) {
|
|
|
|
|
+// case LITERAL:
|
|
|
|
|
+// THROW_ERROR(
|
|
|
|
|
+// "Unexpected literal(%s) on line %zu", readString(),
|
|
|
|
|
+// lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// case INT64:
|
|
|
|
|
+// THROW_ERROR(
|
|
|
|
|
+// "Unexpected int(%ld) on line %zu", readInt64(), lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// case DOUBLE:
|
|
|
|
|
+// THROW_ERROR(
|
|
|
|
|
+// "Unexpected double(%lf) on line %zu", readDouble(),
|
|
|
|
|
+// lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// case STRING:
|
|
|
|
|
+// THROW_ERROR(
|
|
|
|
|
+// "Unexpected string(%s) on line %zu", readString(),
|
|
|
|
|
+// lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// case PLUS:
|
|
|
|
|
+// THROW_ERROR("Unexpected plus on line %zu", lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// case NEWLINE:
|
|
|
|
|
+// THROW_ERROR("Unexpected newline on line %zu", lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// case END: THROW_ERROR("Unexpected end on line %zu", lineCounter);
|
|
|
|
|
+// break;
|
|
|
|
|
+// }
|
|
|
|
|
+// THROW_ERROR("Unexpected unknown token on line %zu", lineCounter);
|
|
|
|
|
+//}
|
|
|
|
|
+
|
|
|
|
|
+static const char* tokenizerReadString(Tokenizer* t) {
|
|
|
|
|
+ const char* c = (char*)(t->buffer.data + t->buffer.readIndex);
|
|
|
|
|
+ i8 i = 1;
|
|
|
|
|
+ while(i != 0) {
|
|
|
|
|
+ if(bufferReadI8(&t->buffer, &i)) {
|
|
|
|
|
+ tokenizerError(t, "empty buffer on readInt64");
|
|
|
|
|
+ return "";
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return c;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static i64 tokenizerReadInt64(Tokenizer* t) {
|
|
|
|
|
+ i64 i = 0;
|
|
|
|
|
+ if(bufferReadI64(&t->buffer, &i)) {
|
|
|
|
|
+ tokenizerError(t, "empty buffer on readInt64");
|
|
|
|
|
+ }
|
|
|
|
|
+ return i;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+Token tokenizerNext(Tokenizer* t) {
|
|
|
|
|
+ Token token = {.type = END};
|
|
|
|
|
+ if(bufferIsEmpty(&t->buffer)) {
|
|
|
|
|
+ tokenizerParseLine(t);
|
|
|
|
|
+ }
|
|
|
|
|
+ if(tokenizerHasError(t) || bufferReadU8(&t->buffer, &token.type)) {
|
|
|
|
|
+ return token;
|
|
|
|
|
+ }
|
|
|
|
|
+ switch(token.type) {
|
|
|
|
|
+ case STRING:
|
|
|
|
|
+ case LITERAL: token.stringValue = tokenizerReadString(t); break;
|
|
|
|
|
+ case INT64: token.intValue = tokenizerReadInt64(t); break;
|
|
|
|
|
+ default: break;
|
|
|
|
|
+ }
|
|
|
|
|
+ return token;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+Token tokenizerPeek(Tokenizer* t) {
|
|
|
|
|
+ size_t index = bufferGetReadIndex(&t->buffer);
|
|
|
|
|
+ Token token = tokenizerNext(t);
|
|
|
|
|
+ bufferSetReadIndex(&t->buffer, index);
|
|
|
|
|
+ return token;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+bool tokenizerInit(Tokenizer* t, const char* path, u8* tokens, size_t n) {
|
|
|
|
|
+ bufferInit(&t->buffer, tokens, n);
|
|
|
|
|
+ t->error[0] = '\0';
|
|
|
|
|
+ t->line = 0;
|
|
|
|
|
+ t->file = fopen(path, "r");
|
|
|
|
|
+ if(t->file == nullptr) {
|
|
|
|
|
+ tokenizerError(t, "Cannot read file '%s'", path);
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ return false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+void tokenizerDestroy(Tokenizer* t) {
|
|
|
|
|
+ fclose(t->file);
|
|
|
|
|
+ *t = (Tokenizer){};
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+bool tokenizerHasError(const Tokenizer* t) {
|
|
|
|
|
+ return t->error[0] != '\0';
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const char* tokenizerGetError(const Tokenizer* t) {
|
|
|
|
|
+ return t->error;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+void tokenizerPrintToken(const Token* token, char* buffer, size_t n) {
|
|
|
|
|
+ switch(token->type) {
|
|
|
|
|
+ case LITERAL:
|
|
|
|
|
+ snprintf(buffer, n, "Literal(%s)", token->stringValue);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case INT64: snprintf(buffer, n, "Int64(%ld)", token->intValue); break;
|
|
|
|
|
+ case STRING:
|
|
|
|
|
+ snprintf(buffer, n, "String(%s)", token->stringValue);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case PLUS: snprintf(buffer, n, "Plus"); break;
|
|
|
|
|
+ case NEWLINE: snprintf(buffer, n, "Newline"); break;
|
|
|
|
|
+ case END: snprintf(buffer, n, "End"); break;
|
|
|
|
|
+ default: snprintf(buffer, n, "Unknown"); break;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|