#include #include #include "Tokenizer.h" #include "Utils.h" #define TOKEN_BUFFER_LENGTH (1024 * 1024) static char tokenBuffer[TOKEN_BUFFER_LENGTH]; static int writeIndex = 0; static int readIndex = 0; static const char* TOO_LONG_LITERAL = "literal is too long"; static const char* UNKNOWN_CHARACTER = "unknown character"; static FILE* file = NULL; static const char* error = NULL; static bool tAdd(const void* data, int length) { if(writeIndex + length > TOKEN_BUFFER_LENGTH) { return false; } memcpy(tokenBuffer + writeIndex, data, length); writeIndex += length; return true; } static bool tAddToken(Token token) { unsigned char c = token; return tAdd(&c, 1); } static bool tReadTokens(void* dest, int length) { if(readIndex + length > writeIndex) { return false; } memcpy(dest, tokenBuffer + readIndex, length); readIndex += length; return true; } static int tRead() { return fgetc(file); } static int tPeek() { int c = tRead(); ungetc(c, file); return c; } static bool tParseLiteral(int c) { int index = 1; char buffer[64]; buffer[0] = c; while(isLetter(tPeek())) { if(index >= 63) { error = TOO_LONG_LITERAL; return false; } buffer[index++] = tRead(); } buffer[index] = '\0'; if(strcmp(buffer, "print") == 0) { return tAddToken(T_PRINT); } return true; } static bool tParseNumber(int c) { int sum = c - '0'; while(isNumber(tPeek())) { sum = sum * 10 + (tRead() - '0'); } return tAddToken(T_INT) && tAdd(&sum, sizeof(int)); } static bool tParseToken() { int c = tRead(); if(c == EOF) { return false; } else if(isLetter(c)) { return tParseLiteral(c); } else if(isNumber(c)) { return tParseNumber(c); } switch(c) { case ' ': case '\n': return true; case '+': return tAddToken(T_ADD); case ';': return tAddToken(T_SEMICOLON); } error = UNKNOWN_CHARACTER; return false; } static void tParseFile() { readIndex = 0; writeIndex = 0; error = NULL; while(tParseToken()) { } } bool tTokenize(const char* path) { file = fopen(path, "r"); if(file == NULL) { error = "cannot read file"; return true; } tParseFile(); fclose(file); return error != NULL; } const char* tGetError() { return error; } void tResetReader() { readIndex = 0; } Token tPeekToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex]; } Token tReadToken() { if(readIndex >= writeIndex) { return T_END; } return tokenBuffer[readIndex++]; } bool tReadInt(int* i) { if(tReadTokens(i, sizeof(int))) { return true; } return false; } const char* tGetTokenName(Token token) { switch(token) { case T_INT: return "int"; case T_ADD: return "+"; case T_PRINT: return "print"; case T_SEMICOLON: return ";"; case T_END: return "end"; } return "Unknown"; }