#include #include #include #include #include #include #include "tokenizer/File.h" #include "tokenizer/Tokenizer.h" #include "utils/Utils.h" #define TOKEN_BUFFER_LENGTH (1024 * 1024) #define ERROR_LENGTH 256 static jmp_buf errorJump; static char tokenBuffer[TOKEN_BUFFER_LENGTH]; static int writeIndex = 0; static int readIndex = 0; static int16 line = 1; static char error[ERROR_LENGTH] = {'\0'}; static void tError(const char* format, ...) { va_list args; va_start(args, format); vsnprintf(error, ERROR_LENGTH, format, args); va_end(args); longjmp(errorJump, 0); } static void tAdd(const void* data, int length) { if(writeIndex + length > TOKEN_BUFFER_LENGTH) { tError("the token buffer is too small"); } memcpy(tokenBuffer + writeIndex, data, length); writeIndex += length; } static void tAddToken(Token token) { unsigned char c = token; tAdd(&c, 1); tAdd(&line, sizeof(line)); } static bool tReadTokens(void* dest, int length) { if(readIndex + length > writeIndex) { return false; } memcpy(dest, tokenBuffer + readIndex, length); readIndex += length; return true; } static void tParseLiteral(int c) { int index = 1; char buffer[64]; buffer[0] = c; while(isLetter(fPeek())) { if(index >= 63) { tError("literal is too long"); } buffer[index++] = fRead(); } buffer[index] = '\0'; Token t = tFromName(buffer); if(t != T_END) { tAddToken(t); } else { tAddToken(T_LITERAL); tAdd(buffer, index + 1); } } static void tParseNumber(int c) { int index = 1; char buffer[64]; buffer[0] = c; bool point = false; while(true) { int c = fPeek(); if(c == '.') { point = true; } else if(!isNumber(c)) { break; } else if(index >= 63) { tError("number is too long"); } buffer[index++] = fRead(); } buffer[index] = '\0'; if(point) { char* end = NULL; float f = strtof(buffer, &end); if(end[0] != '\0') { tError("invalid float on line %d", line); } tAddToken(T_CONST_FLOAT); tAdd(&f, sizeof(float)); } else { char* end = NULL; long l = strtol(buffer, &end, 10); if(end[0] != '\0' || l > INT_MAX) { tError("invalid int on line %d", line); } int i = l; tAddToken(T_CONST_INT); tAdd(&i, sizeof(int)); } } static void tAddString() { tAddToken(T_TEXT); while(true) { int c = fRead(); if(c == '"') { break; } else if(c == '\\') { switch(fRead()) { case '"': c = '"'; break; case '\\': c = '\\'; break; default: tError("unknown escaped character at line %d", line); } } else if(c == EOF) { tError("unclosed string starting at line %d", line); } tAdd(&c, 1); } char c = '\0'; tAdd(&c, 1); } static void tAddToken2(Token te, Token t) { if(fReadIf('=')) { tAddToken(te); } else { tAddToken(t); } } static void tAddToken3(int c, Token tc, Token te, Token t) { if(fReadIf(c)) { tAddToken(tc); } else { tAddToken2(te, t); } } static void tAddToken4(int c, Token tce, Token tc, Token te, Token t) { if(fReadIf(c)) { tAddToken2(tce, tc); } else { tAddToken2(te, t); } } static void tAddTokenMinus() { tAddToken3('-', T_DECREMENT, T_SUB_SET, fReadIf('>') ? T_ARROW : T_SUB); } static void tLineComment() { while(true) { int c = fRead(); if(c == EOF || c == '\n') { line++; return; } } } static void tMultipleLineComment() { while(true) { int c = fRead(); if(c == EOF) { tError("unclosed comment at line %d", line); } else if(c == '\n') { line++; } else if(c == '*' && fReadIf('/')) { return; } } } static void tSlash() { if(fReadIf('/')) { tLineComment(); } else if(fReadIf('*')) { tMultipleLineComment(); } else { tAddToken2(T_DIV_SET, T_DIV); } } static void tParseToken(int c) { if(isLetter(c)) { tParseLiteral(c); return; } else if(isNumber(c)) { tParseNumber(c); return; } switch(c) { case ' ': return; case '\n': line++; return; case '+': tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD); return; case '-': tAddTokenMinus(); return; case '*': tAddToken2(T_MUL_SET, T_MUL); return; case '/': tSlash(); return; case '%': tAddToken2(T_MOD_SET, T_MOD); return; case '<': tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL, T_LESS); return; case '>': tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT, T_GREATER_EQUAL, T_GREATER); return; case '=': tAddToken2(T_EQUAL, T_SET); return; case '!': tAddToken2(T_NOT_EQUAL, T_NOT); return; case '&': tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND); return; case '|': tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR); return; case '~': tAddToken(T_BIT_NOT); return; case '^': tAddToken2(T_BIT_XOR_SET, T_BIT_XOR); return; case ',': tAddToken(T_COMMA); return; case ';': tAddToken(T_SEMICOLON); return; case '(': tAddToken(T_OPEN_BRACKET); return; case ')': tAddToken(T_CLOSE_BRACKET); return; case '{': tAddToken(T_OPEN_CURVED_BRACKET); return; case '}': tAddToken(T_CLOSE_CURVED_BRACKET); return; case '"': tAddString(); return; case '.': tAddToken(T_POINT); return; case '[': tAddToken(T_OPEN_SQUARE_BRACKET); return; case ']': tAddToken(T_CLOSE_SQUARE_BRACKET); return; } tError("unknown character on line %d: %c", line, c); } static void tParseFile() { readIndex = 0; writeIndex = 0; line = 1; error[0] = '\0'; while(true) { int c = fRead(); if(c == EOF) { return; } tParseToken(c); } } bool tTokenize(const char* path) { if(!setjmp(errorJump)) { fOpen(path, tError); tParseFile(); } fClose(); return error[0] != '\0'; } const char* tGetError() { return error; } void tResetReader() { readIndex = 0; } Token tPeekToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex]; } Token tReadToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex++]; } bool tReadInt(int* i) { if(tReadTokens(i, sizeof(int))) { return true; } return false; } bool tReadInt16(int16* i) { if(tReadTokens(i, sizeof(int16))) { return true; } return false; } bool tReadFloat(float* f) { if(tReadTokens(f, sizeof(float))) { return true; } return false; } const char* tReadString() { const char* s = tokenBuffer + readIndex; while(readIndex <= writeIndex) { if(tokenBuffer[readIndex++] == '\0') { return s; } } return NULL; } int tGetMarker() { return readIndex; } void tReset(int marker) { readIndex = marker; }