|
|
@@ -1,38 +1,47 @@
|
|
|
#include "Tokenizer.h"
|
|
|
|
|
|
+#include <assert.h>
|
|
|
#include <errno.h>
|
|
|
+#include <setjmp.h>
|
|
|
#include <stdarg.h>
|
|
|
+#include <stdio.h>
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
-check_format(2, 3) static void tokenizerError(
|
|
|
- Tokenizer* t, const char* format, ...) {
|
|
|
- va_list args;
|
|
|
- va_start(args, format);
|
|
|
- vsnprintf(t->error, sizeof(t->error), format, args);
|
|
|
- va_end(args);
|
|
|
-}
|
|
|
+typedef struct {
|
|
|
+ int line;
|
|
|
+ FILE* file;
|
|
|
+ jmp_buf jump;
|
|
|
+ Error error;
|
|
|
+ Tokenizer* tokenizer;
|
|
|
+} TState;
|
|
|
+
|
|
|
+#define THROW_ERROR(format, ...) \
|
|
|
+ snprintf( \
|
|
|
+ t->error.text, sizeof(t->error.text), "Line %d | " format, \
|
|
|
+ t->line __VA_OPT__(, ) __VA_ARGS__); \
|
|
|
+ longjmp(t->jump, 1)
|
|
|
|
|
|
-static void tokenizerTooMuchTokens(Tokenizer* t) {
|
|
|
- tokenizerError(t, "Line has too much tokens");
|
|
|
+[[noreturn]] static void tTooMuchTokens(TState* t) {
|
|
|
+ THROW_ERROR("Line has too much tokens");
|
|
|
}
|
|
|
|
|
|
-static void tokenizerInvalidToken(Tokenizer* t, char c) {
|
|
|
- tokenizerError(t, "Unexpected token '%c'", c);
|
|
|
+[[noreturn]] static void tInvalidToken(TState* t, char c) {
|
|
|
+ THROW_ERROR("Unexpected token '%c'", c);
|
|
|
}
|
|
|
|
|
|
-static void tokenizerInvalidNumber(Tokenizer* t) {
|
|
|
- tokenizerError(t, "Invalid number");
|
|
|
+[[noreturn]] static void tInvalidNumber(TState* t) {
|
|
|
+ THROW_ERROR("Invalid number");
|
|
|
}
|
|
|
|
|
|
-static void tokenizerAddToken(Tokenizer* t, TokenType type) {
|
|
|
- if(bufferWriteU8(&t->buffer, type)) {
|
|
|
- tokenizerTooMuchTokens(t);
|
|
|
+static void tAddToken(TState* t, TokenType type) {
|
|
|
+ if(bufferWriteU8(&t->tokenizer->buffer, type)) {
|
|
|
+ tTooMuchTokens(t);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void tokenizerAddChar(Tokenizer* t, char c) {
|
|
|
- if(bufferWriteI8(&t->buffer, c)) {
|
|
|
- tokenizerTooMuchTokens(t);
|
|
|
+static void tAddChar(TState* t, char c) {
|
|
|
+ if(bufferWriteI8(&t->tokenizer->buffer, c)) {
|
|
|
+ tTooMuchTokens(t);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -52,25 +61,24 @@ static bool isTokenEnd(char c) {
|
|
|
return c == ' ' || c == '\0' || c == '\n';
|
|
|
}
|
|
|
|
|
|
-static const char* tokenizerAddLiteral(Tokenizer* t, const char* s) {
|
|
|
- tokenizerAddToken(t, LITERAL);
|
|
|
- tokenizerAddChar(t, *s);
|
|
|
+static const char* tokenizerAddLiteral(TState* t, const char* s) {
|
|
|
+ tAddToken(t, LITERAL);
|
|
|
+ tAddChar(t, *s);
|
|
|
while(true) {
|
|
|
char c = *(++s);
|
|
|
if(isAlphaNumeric(c)) {
|
|
|
- tokenizerAddChar(t, c);
|
|
|
+ tAddChar(t, c);
|
|
|
} else if(isTokenEnd(c)) {
|
|
|
break;
|
|
|
} else {
|
|
|
- tokenizerInvalidToken(t, c);
|
|
|
- break;
|
|
|
+ tInvalidToken(t, c);
|
|
|
}
|
|
|
}
|
|
|
- tokenizerAddChar(t, '\0');
|
|
|
+ tAddChar(t, '\0');
|
|
|
return s;
|
|
|
}
|
|
|
|
|
|
-static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
|
|
|
+static const char* tokenizerAddNumber(TState* t, const char* s) {
|
|
|
size_t nIndex = 0;
|
|
|
char number[64] = {};
|
|
|
number[nIndex++] = *s;
|
|
|
@@ -79,9 +87,9 @@ static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
|
|
|
if(isTokenEnd(c)) {
|
|
|
break;
|
|
|
} else if(!isNumber(c)) {
|
|
|
- tokenizerInvalidToken(t, c);
|
|
|
+ tInvalidToken(t, c);
|
|
|
} else if(nIndex >= sizeof(number) - 1) {
|
|
|
- tokenizerInvalidNumber(t);
|
|
|
+ tInvalidNumber(t);
|
|
|
}
|
|
|
number[nIndex++] = c;
|
|
|
}
|
|
|
@@ -89,36 +97,35 @@ static const char* tokenizerAddNumber(Tokenizer* t, const char* s) {
|
|
|
errno = 0;
|
|
|
i64 i = strtoll(number, &end, 10);
|
|
|
if(errno != 0) {
|
|
|
- tokenizerInvalidNumber(t);
|
|
|
+ tInvalidNumber(t);
|
|
|
} else if(*end == '\0') {
|
|
|
- tokenizerAddToken(t, INT64);
|
|
|
- if(bufferWriteI64(&t->buffer, i)) {
|
|
|
- tokenizerTooMuchTokens(t);
|
|
|
+ tAddToken(t, INT64);
|
|
|
+ if(bufferWriteI64(&t->tokenizer->buffer, i)) {
|
|
|
+ tTooMuchTokens(t);
|
|
|
}
|
|
|
return s;
|
|
|
}
|
|
|
return s;
|
|
|
}
|
|
|
|
|
|
-static const char* tokenizerAddString(Tokenizer* t, const char* s) {
|
|
|
- tokenizerAddToken(t, STRING);
|
|
|
+static const char* tokenizerAddString(TState* t, const char* s) {
|
|
|
+ tAddToken(t, STRING);
|
|
|
while(true) {
|
|
|
char c = *(++s);
|
|
|
if(c == '\0') {
|
|
|
- tokenizerError(t, "Unclosed string");
|
|
|
- break;
|
|
|
+ THROW_ERROR("Unclosed string");
|
|
|
} else if(c == '"') {
|
|
|
s++;
|
|
|
break;
|
|
|
}
|
|
|
- tokenizerAddChar(t, c);
|
|
|
+ tAddChar(t, c);
|
|
|
}
|
|
|
- tokenizerAddChar(t, '\0');
|
|
|
+ tAddChar(t, '\0');
|
|
|
return s;
|
|
|
}
|
|
|
|
|
|
-static void tokenizerParseLineString(Tokenizer* t, const char* s) {
|
|
|
- while(!tokenizerHasError(t)) {
|
|
|
+static void tParseLineString(TState* t, const char* s) {
|
|
|
+ while(true) {
|
|
|
char c = *s;
|
|
|
if(isLetter(c)) {
|
|
|
s = tokenizerAddLiteral(t, s);
|
|
|
@@ -127,104 +134,100 @@ static void tokenizerParseLineString(Tokenizer* t, const char* s) {
|
|
|
} else if(c == '"') {
|
|
|
s = tokenizerAddString(t, s);
|
|
|
} else if(c == '\n') {
|
|
|
- tokenizerAddToken(t, NEWLINE);
|
|
|
+ tAddToken(t, NEWLINE);
|
|
|
break;
|
|
|
} else if(c == ' ') {
|
|
|
s++;
|
|
|
} else if(c == '+') {
|
|
|
- tokenizerAddToken(t, PLUS);
|
|
|
+ tAddToken(t, PLUS);
|
|
|
s++;
|
|
|
} else if(c == '\0') {
|
|
|
break;
|
|
|
} else {
|
|
|
- tokenizerInvalidToken(t, c);
|
|
|
+ tInvalidToken(t, c);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void tokenizerParseLine(Tokenizer* t) {
|
|
|
- bufferReset(&t->buffer);
|
|
|
- t->line++;
|
|
|
- char line[256] = {};
|
|
|
- if(fgets(line, sizeof(line), t->file) == nullptr) {
|
|
|
- return;
|
|
|
- }
|
|
|
- char c = line[sizeof(line) - 2];
|
|
|
- if(c != '\n' && c != '\0') {
|
|
|
- tokenizerError(t, "Too long line");
|
|
|
- return;
|
|
|
+static void tParseLines(TState* t) {
|
|
|
+ while(true) {
|
|
|
+ t->line++;
|
|
|
+ char line[256] = {};
|
|
|
+ if(fgets(line, sizeof(line), t->file) == nullptr) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ char c = line[sizeof(line) - 2];
|
|
|
+ if(c != '\n' && c != '\0') {
|
|
|
+ THROW_ERROR("Too long line");
|
|
|
+ }
|
|
|
+ tParseLineString(t, line);
|
|
|
}
|
|
|
- tokenizerParseLineString(t, line);
|
|
|
}
|
|
|
|
|
|
-static const char* tokenizerReadString(Tokenizer* t) {
|
|
|
+static const char* tReadString(Tokenizer* t) {
|
|
|
const char* c = (char*)(t->buffer.data + t->buffer.readIndex);
|
|
|
i8 i = 1;
|
|
|
while(i != 0) {
|
|
|
if(bufferReadI8(&t->buffer, &i)) {
|
|
|
- tokenizerError(t, "empty buffer on readInt64");
|
|
|
+ assert(false);
|
|
|
return "";
|
|
|
}
|
|
|
}
|
|
|
return c;
|
|
|
}
|
|
|
|
|
|
-static i64 tokenizerReadInt64(Tokenizer* t) {
|
|
|
+static i64 tReadInt64(Tokenizer* t) {
|
|
|
i64 i = 0;
|
|
|
if(bufferReadI64(&t->buffer, &i)) {
|
|
|
- tokenizerError(t, "empty buffer on readInt64");
|
|
|
+ assert(false);
|
|
|
}
|
|
|
return i;
|
|
|
}
|
|
|
|
|
|
Token tokenizerNext(Tokenizer* t) {
|
|
|
Token token = {.type = END};
|
|
|
- if(bufferIsEmpty(&t->buffer)) {
|
|
|
- tokenizerParseLine(t);
|
|
|
- }
|
|
|
- if(tokenizerHasError(t) || bufferReadU8(&t->buffer, &token.type)) {
|
|
|
+ if(bufferReadU8(&t->buffer, &token.type)) {
|
|
|
return token;
|
|
|
}
|
|
|
switch(token.type) {
|
|
|
case STRING:
|
|
|
- case LITERAL: token.stringValue = tokenizerReadString(t); break;
|
|
|
- case INT64: token.intValue = tokenizerReadInt64(t); break;
|
|
|
+ case LITERAL: token.stringValue = tReadString(t); break;
|
|
|
+ case INT64: token.intValue = tReadInt64(t); break;
|
|
|
default: break;
|
|
|
}
|
|
|
return token;
|
|
|
}
|
|
|
|
|
|
Token tokenizerPeek(Tokenizer* t) {
|
|
|
- size_t index =
|
|
|
- bufferIsEmpty(&t->buffer) ? 0 : bufferGetReadIndex(&t->buffer);
|
|
|
+ size_t index = bufferGetReadIndex(&t->buffer);
|
|
|
Token token = tokenizerNext(t);
|
|
|
bufferSetReadIndex(&t->buffer, index);
|
|
|
return token;
|
|
|
}
|
|
|
|
|
|
-bool tokenizerInit(Tokenizer* t, const char* path, u8* tokens, size_t n) {
|
|
|
- bufferInit(&t->buffer, tokens, n);
|
|
|
- t->error[0] = '\0';
|
|
|
- t->line = 0;
|
|
|
+static void tParseFile(TState* t, const char* path) {
|
|
|
t->file = fopen(path, "r");
|
|
|
if(t->file == nullptr) {
|
|
|
- tokenizerError(t, "Cannot read file '%s'", path);
|
|
|
- return true;
|
|
|
+ THROW_ERROR("Cannot read file '%s'", path);
|
|
|
}
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-void tokenizerDestroy(Tokenizer* t) {
|
|
|
- fclose(t->file);
|
|
|
- *t = (Tokenizer){};
|
|
|
+ tParseLines(t);
|
|
|
}
|
|
|
|
|
|
-bool tokenizerHasError(const Tokenizer* t) {
|
|
|
- return t->error[0] != '\0';
|
|
|
+Error tokenizerInit(Tokenizer* t, const char* path) {
|
|
|
+ bufferInit(&t->buffer);
|
|
|
+ TState state = {.tokenizer = t};
|
|
|
+ if(!setjmp(state.jump)) {
|
|
|
+ tParseFile(&state, path);
|
|
|
+ }
|
|
|
+ if(state.file != nullptr) {
|
|
|
+ fclose(state.file);
|
|
|
+ }
|
|
|
+ return state.error;
|
|
|
}
|
|
|
|
|
|
-const char* tokenizerGetError(const Tokenizer* t) {
|
|
|
- return t->error;
|
|
|
+void tokenizerDestroy(Tokenizer* t) {
|
|
|
+ bufferDestroy(&t->buffer);
|
|
|
+ *t = (Tokenizer){};
|
|
|
}
|
|
|
|
|
|
void tokenizerPrintToken(const Token* token, char* buffer, size_t n) {
|