#include #include #include #include #include #include "Tokenizer.h" #include "Utils.h" #define TOKEN_BUFFER_LENGTH (1024 * 1024) #define ERROR_LENGTH 256 static char tokenBuffer[TOKEN_BUFFER_LENGTH]; static int writeIndex = 0; static int readIndex = 0; static int16 line = 1; static FILE* file = NULL; static char error[ERROR_LENGTH] = {'\0'}; static void tError(const char* format, ...) { va_list args; va_start(args, format); vsnprintf(error, ERROR_LENGTH, format, args); va_end(args); } static bool tAdd(const void* data, int length) { if(writeIndex + length > TOKEN_BUFFER_LENGTH) { tError("the token buffer is too small"); return false; } memcpy(tokenBuffer + writeIndex, data, length); writeIndex += length; return true; } static bool tAddToken(Token token) { unsigned char c = token; return tAdd(&c, 1) && tAdd(&line, sizeof(line)); } static bool tReadTokens(void* dest, int length) { if(readIndex + length > writeIndex) { return false; } memcpy(dest, tokenBuffer + readIndex, length); readIndex += length; return true; } static int tRead() { return fgetc(file); } static int tPeek() { int c = tRead(); ungetc(c, file); return c; } static bool tParseLiteral(int c) { int index = 1; char buffer[64]; buffer[0] = c; while(isLetter(tPeek())) { if(index >= 63) { tError("literal is too long"); return false; } buffer[index++] = tRead(); } buffer[index] = '\0'; if(strcmp(buffer, "print") == 0) { return tAddToken(T_PRINT); } else if(strcmp(buffer, "null") == 0) { return tAddToken(T_NULL); } else if(strcmp(buffer, "true") == 0) { return tAddToken(T_TRUE); } else if(strcmp(buffer, "false") == 0) { return tAddToken(T_FALSE); } else if(strcmp(buffer, "function") == 0) { return tAddToken(T_FUNCTION); } else if(strcmp(buffer, "return") == 0) { return tAddToken(T_RETURN); } return tAddToken(T_LITERAL) && tAdd(buffer, index + 1); } static bool tParseNumber(int c) { int index = 1; char buffer[64]; buffer[0] = c; bool point = false; while(true) { int c = tPeek(); if(c == '.') { point = true; } else if(!isNumber(c)) { break; } else if(index >= 63) { tError("number is too long"); return false; } buffer[index++] = tRead(); } buffer[index] = '\0'; if(point) { char* end = NULL; float f = strtof(buffer, &end); if(end[0] != '\0') { tError("invalid float on line %d", line); return false; } return tAddToken(T_FLOAT) && tAdd(&f, sizeof(float)); } else { char* end = NULL; long l = strtol(buffer, &end, 10); if(end[0] != '\0' || l > INT_MAX) { tError("invalid int on line %d", line); return false; } int i = l; return tAddToken(T_INT) && tAdd(&i, sizeof(int)); } } static bool tParseToken() { int c = tRead(); if(c == EOF) { return false; } else if(isLetter(c)) { return tParseLiteral(c); } else if(isNumber(c)) { return tParseNumber(c); } switch(c) { case ' ': return true; case '\n': line++; return true; case '+': return tAddToken(T_ADD); case '*': return tAddToken(T_MUL); case '=': return tAddToken(T_SET); case ',': return tAddToken(T_COMMA); case ';': return tAddToken(T_SEMICOLON); case '(': return tAddToken(T_OPEN_BRACKET); case ')': return tAddToken(T_CLOSE_BRACKET); case '{': return tAddToken(T_OPEN_CURVED_BRACKET); case '}': return tAddToken(T_CLOSE_CURVED_BRACKET); } tError("unknown character on line %d: %c", line, c); return false; } static void tParseFile() { readIndex = 0; writeIndex = 0; line = 1; error[0] = '\0'; while(tParseToken()) { } } bool tTokenize(const char* path) { file = fopen(path, "r"); if(file == NULL) { tError("cannot read file '%s'", path); return true; } tParseFile(); fclose(file); return error[0] != '\0'; } const char* tGetError() { return error; } void tResetReader() { readIndex = 0; } Token tPeekToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex]; } Token tReadToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex++]; } bool tReadInt(int* i) { if(tReadTokens(i, sizeof(int))) { return true; } return false; } bool tReadInt16(int16* i) { if(tReadTokens(i, sizeof(int16))) { return true; } return false; } bool tReadFloat(float* f) { if(tReadTokens(f, sizeof(float))) { return true; } return false; } const char* tReadString() { const char* s = tokenBuffer + readIndex; while(readIndex <= writeIndex) { if(tokenBuffer[readIndex] == '\0') { readIndex++; return s; } readIndex++; } return NULL; } const char* tGetTokenName(Token token) { switch(token) { case T_INT: return "int"; case T_FLOAT: return "float"; case T_NULL: return "null"; case T_TRUE: return "true"; case T_FALSE: return "false"; case T_ADD: return "+"; case T_MUL: return "*"; case T_SET: return "="; case T_LITERAL: return "literal"; case T_PRINT: return "print"; case T_FUNCTION: return "function"; case T_RETURN: return "return"; case T_COMMA: return ","; case T_SEMICOLON: return ";"; case T_OPEN_BRACKET: return "("; case T_CLOSE_BRACKET: return ")"; case T_OPEN_CURVED_BRACKET: return "{"; case T_CLOSE_CURVED_BRACKET: return "}"; case T_END: return "end"; } return "Unknown"; } int tGetMarker() { return readIndex; } void tResetToMarker(int marker) { readIndex = marker; } void tPrint() { puts("----------------"); while(true) { Token t = tReadToken(); if(t == T_END) { break; } int line; tReadInt(&line); printf("%d: %s\n", line, tGetTokenName(t)); if(t == T_INT) { tReadInt(&line); } } tResetReader(); }