#include #include #include #include #include #include "Tokenizer.h" #include "Utils.h" #define TOKEN_BUFFER_LENGTH (1024 * 1024) #define ERROR_LENGTH 256 static char tokenBuffer[TOKEN_BUFFER_LENGTH]; static int writeIndex = 0; static int readIndex = 0; static int16 line = 1; static FILE* file = NULL; static char error[ERROR_LENGTH] = {'\0'}; typedef struct Literal { const char* name; Token token; } Literal; Literal LITERALS[] = {{"print", T_PRINT}, {"null", T_NULL}, {"true", T_TRUE}, {"false", T_FALSE}, {"function", T_FUNCTION}, {"return", T_RETURN}, {"if", T_IF}, {"else", T_ELSE}, {"while", T_WHILE}}; const int LITERAL_AMOUNT = sizeof(LITERALS) / sizeof(Literal); static void tError(const char* format, ...) { va_list args; va_start(args, format); vsnprintf(error, ERROR_LENGTH, format, args); va_end(args); } static bool tAdd(const void* data, int length) { if(writeIndex + length > TOKEN_BUFFER_LENGTH) { tError("the token buffer is too small"); return false; } memcpy(tokenBuffer + writeIndex, data, length); writeIndex += length; return true; } static bool tAddToken(Token token) { unsigned char c = token; return tAdd(&c, 1) && tAdd(&line, sizeof(line)); } static bool tReadTokens(void* dest, int length) { if(readIndex + length > writeIndex) { return false; } memcpy(dest, tokenBuffer + readIndex, length); readIndex += length; return true; } static int tRead() { return fgetc(file); } static int tPeek() { int c = tRead(); ungetc(c, file); return c; } static int tReadIf(int c) { if(tPeek() == c) { tRead(); return true; } return false; } static bool tParseLiteral(int c) { int index = 1; char buffer[64]; buffer[0] = c; while(isLetter(tPeek())) { if(index >= 63) { tError("literal is too long"); return false; } buffer[index++] = tRead(); } buffer[index] = '\0'; for(int i = 0; i < LITERAL_AMOUNT; i++) { if(strcmp(buffer, LITERALS[i].name) == 0) { return tAddToken(LITERALS[i].token); } } return tAddToken(T_LITERAL) && tAdd(buffer, index + 1); } static bool tParseNumber(int c) { int index = 1; char buffer[64]; buffer[0] = c; bool point = false; while(true) { int c = tPeek(); if(c == '.') { point = true; } else if(!isNumber(c)) { break; } else if(index >= 63) { tError("number is too long"); return false; } buffer[index++] = tRead(); } buffer[index] = '\0'; if(point) { char* end = NULL; float f = strtof(buffer, &end); if(end[0] != '\0') { tError("invalid float on line %d", line); return false; } return tAddToken(T_FLOAT) && tAdd(&f, sizeof(float)); } else { char* end = NULL; long l = strtol(buffer, &end, 10); if(end[0] != '\0' || l > INT_MAX) { tError("invalid int on line %d", line); return false; } int i = l; return tAddToken(T_INT) && tAdd(&i, sizeof(int)); } } static bool tParseToken() { int c = tRead(); if(c == EOF) { return false; } else if(isLetter(c)) { return tParseLiteral(c); } else if(isNumber(c)) { return tParseNumber(c); } switch(c) { case ' ': return true; case '\n': line++; return true; case '+': return tReadIf('+') ? tAddToken(T_INCREMENT) : tAddToken(T_ADD); case '-': return tReadIf('-') ? tAddToken(T_DECREMENT) : tAddToken(T_SUB); case '*': return tAddToken(T_MUL); case '/': return tAddToken(T_DIV); case '%': return tAddToken(T_MOD); case '<': return tReadIf('=') ? tAddToken(T_LESS_EQUAL) : tAddToken(T_LESS); case '>': return tReadIf('=') ? tAddToken(T_GREATER_EQUAL) : tAddToken(T_GREATER); case '=': return tReadIf('=') ? tAddToken(T_EQUAL) : tAddToken(T_SET); case '!': return tReadIf('=') ? tAddToken(T_NOT_EQUAL) : tAddToken(T_NOT); case '&': return tReadIf('&') ? tAddToken(T_AND) : tAddToken(T_BIT_AND); case '|': return tReadIf('|') ? tAddToken(T_OR) : tAddToken(T_BIT_OR); case ',': return tAddToken(T_COMMA); case ';': return tAddToken(T_SEMICOLON); case '(': return tAddToken(T_OPEN_BRACKET); case ')': return tAddToken(T_CLOSE_BRACKET); case '{': return tAddToken(T_OPEN_CURVED_BRACKET); case '}': return tAddToken(T_CLOSE_CURVED_BRACKET); } tError("unknown character on line %d: %c", line, c); return false; } static void tParseFile() { readIndex = 0; writeIndex = 0; line = 1; error[0] = '\0'; while(tParseToken()) { } } bool tTokenize(const char* path) { file = fopen(path, "r"); if(file == NULL) { tError("cannot read file '%s'", path); return true; } tParseFile(); fclose(file); return error[0] != '\0'; } const char* tGetError() { return error; } void tResetReader() { readIndex = 0; } Token tPeekToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex]; } Token tReadToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex++]; } bool tReadInt(int* i) { if(tReadTokens(i, sizeof(int))) { return true; } return false; } bool tReadInt16(int16* i) { if(tReadTokens(i, sizeof(int16))) { return true; } return false; } bool tReadFloat(float* f) { if(tReadTokens(f, sizeof(float))) { return true; } return false; } const char* tReadString() { const char* s = tokenBuffer + readIndex; while(readIndex <= writeIndex) { if(tokenBuffer[readIndex] == '\0') { readIndex++; return s; } readIndex++; } return NULL; } const char* tGetTokenName(Token token) { switch(token) { case T_INT: return "int"; case T_FLOAT: return "float"; case T_NULL: return "null"; case T_TRUE: return "true"; case T_FALSE: return "false"; case T_ADD: return "+"; case T_SUB: return "-"; case T_MUL: return "*"; case T_DIV: return "/"; case T_MOD: return "%"; case T_LESS: return "<"; case T_LESS_EQUAL: return "<="; case T_GREATER: return ">"; case T_GREATER_EQUAL: return ">="; case T_EQUAL: return "=="; case T_NOT_EQUAL: return "!="; case T_NOT: return "!"; case T_AND: return "&&"; case T_OR: return "||"; case T_BIT_AND: return "&"; case T_BIT_OR: return "|"; case T_SET: return "="; case T_INCREMENT: return "++"; case T_DECREMENT: return "--"; case T_LITERAL: return "literal"; case T_PRINT: return "print"; case T_IF: return "if"; case T_ELSE: return "else"; case T_WHILE: return "while"; case T_FUNCTION: return "function"; case T_RETURN: return "return"; case T_COMMA: return ","; case T_SEMICOLON: return ";"; case T_OPEN_BRACKET: return "("; case T_CLOSE_BRACKET: return ")"; case T_OPEN_CURVED_BRACKET: return "{"; case T_CLOSE_CURVED_BRACKET: return "}"; case T_END: return "end"; } return "Unknown"; } int tGetMarker() { return readIndex; } void tResetToMarker(int marker) { readIndex = marker; } void tPrint() { puts("----------------"); while(true) { Token t = tReadToken(); if(t == T_END) { break; } int line = 0; tReadInt(&line); printf("%d: %s\n", line, tGetTokenName(t)); if(t == T_INT) { tReadInt(&line); } } tResetReader(); }