#include #include #include #include #include #include "tokenizer/File.h" #include "tokenizer/Tokenizer.h" #include "utils/Utils.h" #define TOKEN_BUFFER_LENGTH (1024 * 1024) #define ERROR_LENGTH 256 static char tokenBuffer[TOKEN_BUFFER_LENGTH]; static int writeIndex = 0; static int readIndex = 0; static int16 line = 1; static char error[ERROR_LENGTH] = {'\0'}; static void tError(const char* format, ...) { va_list args; va_start(args, format); vsnprintf(error, ERROR_LENGTH, format, args); va_end(args); } static bool tAdd(const void* data, int length) { if(writeIndex + length > TOKEN_BUFFER_LENGTH) { tError("the token buffer is too small"); return false; } memcpy(tokenBuffer + writeIndex, data, length); writeIndex += length; return true; } static bool tAddToken(Token token) { unsigned char c = token; return tAdd(&c, 1) && tAdd(&line, sizeof(line)); } static bool tReadTokens(void* dest, int length) { if(readIndex + length > writeIndex) { return false; } memcpy(dest, tokenBuffer + readIndex, length); readIndex += length; return true; } static bool tParseLiteral(int c) { int index = 1; char buffer[64]; buffer[0] = c; while(isLetter(fPeek())) { if(index >= 63) { tError("literal is too long"); return false; } buffer[index++] = fRead(); } buffer[index] = '\0'; Token t = tFromName(buffer); if(t != T_END) { return tAddToken(t); } return tAddToken(T_LITERAL) && tAdd(buffer, index + 1); } static bool tParseNumber(int c) { int index = 1; char buffer[64]; buffer[0] = c; bool point = false; while(true) { int c = fPeek(); if(c == '.') { point = true; } else if(!isNumber(c)) { break; } else if(index >= 63) { tError("number is too long"); return false; } buffer[index++] = fRead(); } buffer[index] = '\0'; if(point) { char* end = NULL; float f = strtof(buffer, &end); if(end[0] != '\0') { tError("invalid float on line %d", line); return false; } return tAddToken(T_FLOAT) && tAdd(&f, sizeof(float)); } else { char* end = NULL; long l = strtol(buffer, &end, 10); if(end[0] != '\0' || l > INT_MAX) { tError("invalid int on line %d", line); return false; } int i = l; return tAddToken(T_INT) && tAdd(&i, sizeof(int)); } } static bool tAddString() { if(!tAddToken(T_TEXT)) { return false; } while(true) { int c = fRead(); if(c == '"') { break; } else if(c == '\\') { switch(fRead()) { case '"': c = '"'; break; case '\\': c = '\\'; break; default: tError("unknown escaped character at line %d", line); return false; } } else if(c == EOF) { tError("unclosed string starting at line %d", line); return false; } if(!tAdd(&c, 1)) { return false; } } char c = '\0'; return tAdd(&c, 1); } static bool tAddToken2(Token te, Token t) { return fReadIf('=') ? tAddToken(te) : tAddToken(t); } static bool tAddToken3(int c, Token tc, Token te, Token t) { return fReadIf(c) ? tAddToken(tc) : tAddToken2(te, t); } static bool tAddToken4(int c, Token tce, Token tc, Token te, Token t) { return fReadIf(c) ? tAddToken2(tce, tc) : tAddToken2(te, t); } static bool tLineComment() { while(true) { int c = fRead(); if(c == EOF || c == '\n') { line++; return true; } } } static bool tMultipleLineComment() { while(true) { int c = fRead(); if(c == EOF) { tError("unclosed comment at line %d", line); return false; } else if(c == '\n') { line++; } else if(c == '*' && fReadIf('/')) { return true; } } } static bool tSlash() { if(fReadIf('/')) { return tLineComment(); } else if(fReadIf('*')) { return tMultipleLineComment(); } return tAddToken2(T_DIV_SET, T_DIV); } static bool tParseToken() { int c = fRead(); if(c == EOF) { return false; } else if(isLetter(c)) { return tParseLiteral(c); } else if(isNumber(c)) { return tParseNumber(c); } switch(c) { case ' ': return true; case '\n': line++; return true; case '+': return tAddToken3('+', T_INCREMENT, T_ADD_SET, T_ADD); case '-': return tAddToken3('-', T_DECREMENT, T_SUB_SET, T_SUB); case '*': return tAddToken2(T_MUL_SET, T_MUL); case '/': return tSlash(); case '%': return tAddToken2(T_MOD_SET, T_MOD); case '<': return tAddToken4('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL, T_LESS); case '>': return tAddToken4('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT, T_GREATER_EQUAL, T_GREATER); case '=': return tAddToken2(T_EQUAL, T_SET); case '!': return tAddToken2(T_NOT_EQUAL, T_NOT); case '&': return tAddToken3('&', T_AND, T_BIT_AND_SET, T_BIT_AND); case '|': return tAddToken3('|', T_OR, T_BIT_OR_SET, T_BIT_OR); case '~': return tAddToken(T_BIT_NOT); case '^': return tAddToken2(T_BIT_XOR_SET, T_BIT_XOR); case ',': return tAddToken(T_COMMA); case ';': return tAddToken(T_SEMICOLON); case '(': return tAddToken(T_OPEN_BRACKET); case ')': return tAddToken(T_CLOSE_BRACKET); case '{': return tAddToken(T_OPEN_CURVED_BRACKET); case '}': return tAddToken(T_CLOSE_CURVED_BRACKET); case '"': return tAddString(); case '.': return tAddToken(T_POINT); case '[': return tAddToken(T_OPEN_SQUARE_BRACKET); case ']': return tAddToken(T_CLOSE_SQUARE_BRACKET); } tError("unknown character on line %d: %c", line, c); return false; } static void tParseFile() { readIndex = 0; writeIndex = 0; line = 1; error[0] = '\0'; while(tParseToken()) { } } bool tTokenize(const char* path) { if(fOpen(path)) { tError("cannot read file '%s'", path); return true; } tParseFile(); fClose(); return error[0] != '\0'; } const char* tGetError() { return error; } void tResetReader() { readIndex = 0; } Token tPeekToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex]; } Token tReadToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex++]; } bool tReadInt(int* i) { if(tReadTokens(i, sizeof(int))) { return true; } return false; } bool tReadInt16(int16* i) { if(tReadTokens(i, sizeof(int16))) { return true; } return false; } bool tReadFloat(float* f) { if(tReadTokens(f, sizeof(float))) { return true; } return false; } const char* tReadString(int* length) { *length = 0; const char* s = tokenBuffer + readIndex; while(readIndex <= writeIndex) { (*length)++; if(tokenBuffer[readIndex++] == '\0') { return s; } } return NULL; }