|
@@ -1,390 +0,0 @@
|
|
|
-#include <limits.h>
|
|
|
-#include <stdarg.h>
|
|
|
-#include <stdio.h>
|
|
|
-#include <stdlib.h>
|
|
|
-#include <string.h>
|
|
|
-
|
|
|
-#include "Tokenizer.h"
|
|
|
-#include "Utils.h"
|
|
|
-
|
|
|
-#define TOKEN_BUFFER_LENGTH (1024 * 1024)
|
|
|
-#define ERROR_LENGTH 256
|
|
|
-
|
|
|
-static char tokenBuffer[TOKEN_BUFFER_LENGTH];
|
|
|
-static int writeIndex = 0;
|
|
|
-static int readIndex = 0;
|
|
|
-static int16 line = 1;
|
|
|
-static FILE* file = NULL;
|
|
|
-static char error[ERROR_LENGTH] = {'\0'};
|
|
|
-
|
|
|
-typedef struct Literal {
|
|
|
- const char* name;
|
|
|
- Token token;
|
|
|
-} Literal;
|
|
|
-
|
|
|
-Literal LITERALS[] = {{"print", T_PRINT}, {"null", T_NULL}, {"true", T_TRUE}, {"false", T_FALSE},
|
|
|
- {"function", T_FUNCTION}, {"return", T_RETURN}, {"if", T_IF}, {"else", T_ELSE},
|
|
|
- {"while", T_WHILE}, {"for", T_FOR}, {"break", T_BREAK}, {"continue", T_CONTINUE},
|
|
|
- {"array", T_ARRAY}};
|
|
|
-const int LITERAL_AMOUNT = sizeof(LITERALS) / sizeof(Literal);
|
|
|
-
|
|
|
-static void tError(const char* format, ...) {
|
|
|
- va_list args;
|
|
|
- va_start(args, format);
|
|
|
- vsnprintf(error, ERROR_LENGTH, format, args);
|
|
|
- va_end(args);
|
|
|
-}
|
|
|
-
|
|
|
-static bool tAdd(const void* data, int length) {
|
|
|
- if(writeIndex + length > TOKEN_BUFFER_LENGTH) {
|
|
|
- tError("the token buffer is too small");
|
|
|
- return false;
|
|
|
- }
|
|
|
- memcpy(tokenBuffer + writeIndex, data, length);
|
|
|
- writeIndex += length;
|
|
|
- return true;
|
|
|
-}
|
|
|
-
|
|
|
-static bool tAddToken(Token token) {
|
|
|
- unsigned char c = token;
|
|
|
- return tAdd(&c, 1) && tAdd(&line, sizeof(line));
|
|
|
-}
|
|
|
-
|
|
|
-static bool tReadTokens(void* dest, int length) {
|
|
|
- if(readIndex + length > writeIndex) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- memcpy(dest, tokenBuffer + readIndex, length);
|
|
|
- readIndex += length;
|
|
|
- return true;
|
|
|
-}
|
|
|
-
|
|
|
-static int tRead() {
|
|
|
- return fgetc(file);
|
|
|
-}
|
|
|
-
|
|
|
-static int tPeek() {
|
|
|
- int c = tRead();
|
|
|
- ungetc(c, file);
|
|
|
- return c;
|
|
|
-}
|
|
|
-
|
|
|
-static int tReadIf(int c) {
|
|
|
- if(tPeek() == c) {
|
|
|
- tRead();
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-static bool tParseLiteral(int c) {
|
|
|
- int index = 1;
|
|
|
- char buffer[64];
|
|
|
- buffer[0] = c;
|
|
|
- while(isLetter(tPeek())) {
|
|
|
- if(index >= 63) {
|
|
|
- tError("literal is too long");
|
|
|
- return false;
|
|
|
- }
|
|
|
- buffer[index++] = tRead();
|
|
|
- }
|
|
|
- buffer[index] = '\0';
|
|
|
- for(int i = 0; i < LITERAL_AMOUNT; i++) {
|
|
|
- if(strcmp(buffer, LITERALS[i].name) == 0) {
|
|
|
- return tAddToken(LITERALS[i].token);
|
|
|
- }
|
|
|
- }
|
|
|
- return tAddToken(T_LITERAL) && tAdd(buffer, index + 1);
|
|
|
-}
|
|
|
-
|
|
|
-static bool tParseNumber(int c) {
|
|
|
- int index = 1;
|
|
|
- char buffer[64];
|
|
|
- buffer[0] = c;
|
|
|
- bool point = false;
|
|
|
- while(true) {
|
|
|
- int c = tPeek();
|
|
|
- if(c == '.') {
|
|
|
- point = true;
|
|
|
- } else if(!isNumber(c)) {
|
|
|
- break;
|
|
|
- } else if(index >= 63) {
|
|
|
- tError("number is too long");
|
|
|
- return false;
|
|
|
- }
|
|
|
- buffer[index++] = tRead();
|
|
|
- }
|
|
|
- buffer[index] = '\0';
|
|
|
- if(point) {
|
|
|
- char* end = NULL;
|
|
|
- float f = strtof(buffer, &end);
|
|
|
- if(end[0] != '\0') {
|
|
|
- tError("invalid float on line %d", line);
|
|
|
- return false;
|
|
|
- }
|
|
|
- return tAddToken(T_FLOAT) && tAdd(&f, sizeof(float));
|
|
|
- } else {
|
|
|
- char* end = NULL;
|
|
|
- long l = strtol(buffer, &end, 10);
|
|
|
- if(end[0] != '\0' || l > INT_MAX) {
|
|
|
- tError("invalid int on line %d", line);
|
|
|
- return false;
|
|
|
- }
|
|
|
- int i = l;
|
|
|
- return tAddToken(T_INT) && tAdd(&i, sizeof(int));
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-static bool tAddString() {
|
|
|
- if(!tAddToken(T_TEXT)) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- while(true) {
|
|
|
- int c = tRead();
|
|
|
- if(c == '"') {
|
|
|
- break;
|
|
|
- } else if(c == '\\') {
|
|
|
- switch(tRead()) {
|
|
|
- case '"': c = '"'; break;
|
|
|
- case '\\': c = '\\'; break;
|
|
|
- default: tError("unknown escaped character at line %d", line); return false;
|
|
|
- }
|
|
|
- } else if(c == EOF) {
|
|
|
- tError("unclosed string starting at line %d", line);
|
|
|
- return false;
|
|
|
- }
|
|
|
- if(!tAdd(&c, 1)) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- }
|
|
|
- char c = '\0';
|
|
|
- return tAdd(&c, 1);
|
|
|
-}
|
|
|
-
|
|
|
-static bool tAddTokenChecked(int c, Token tc, Token te, Token t) {
|
|
|
- if(tReadIf(c)) {
|
|
|
- return tAddToken(tc);
|
|
|
- } else if(tReadIf('=')) {
|
|
|
- return tAddToken(te);
|
|
|
- }
|
|
|
- return tAddToken(t);
|
|
|
-}
|
|
|
-
|
|
|
-static bool tAddLongTokenChecked(int c, Token tce, Token tc, Token te, Token t) {
|
|
|
- if(tReadIf(c)) {
|
|
|
- if(tReadIf('=')) {
|
|
|
- return tAddToken(tce);
|
|
|
- } else {
|
|
|
- return tAddToken(tc);
|
|
|
- }
|
|
|
- } else if(tReadIf('=')) {
|
|
|
- return tAddToken(te);
|
|
|
- }
|
|
|
- return tAddToken(t);
|
|
|
-}
|
|
|
-
|
|
|
-static bool tParseToken() {
|
|
|
- int c = tRead();
|
|
|
- if(c == EOF) {
|
|
|
- return false;
|
|
|
- } else if(isLetter(c)) {
|
|
|
- return tParseLiteral(c);
|
|
|
- } else if(isNumber(c)) {
|
|
|
- return tParseNumber(c);
|
|
|
- }
|
|
|
- switch(c) {
|
|
|
- case ' ': return true;
|
|
|
- case '\n': line++; return true;
|
|
|
- case '+': return tAddTokenChecked('+', T_INCREMENT, T_ADD_SET, T_ADD);
|
|
|
- case '-': return tAddTokenChecked('-', T_DECREMENT, T_SUB_SET, T_SUB);
|
|
|
- case '*': return tReadIf('=') ? tAddToken(T_MUL_SET) : tAddToken(T_MUL);
|
|
|
- case '/': return tReadIf('=') ? tAddToken(T_DIV_SET) : tAddToken(T_DIV);
|
|
|
- case '%': return tReadIf('=') ? tAddToken(T_MOD_SET) : tAddToken(T_MOD);
|
|
|
- case '<': return tAddLongTokenChecked('<', T_LEFT_SHIFT_SET, T_LEFT_SHIFT, T_LESS_EQUAL, T_LESS);
|
|
|
- case '>': return tAddLongTokenChecked('>', T_RIGHT_SHIFT_SET, T_RIGHT_SHIFT, T_GREATER_EQUAL, T_GREATER);
|
|
|
- case '=': return tReadIf('=') ? tAddToken(T_EQUAL) : tAddToken(T_SET);
|
|
|
- case '!': return tReadIf('=') ? tAddToken(T_NOT_EQUAL) : tAddToken(T_NOT);
|
|
|
- case '&': return tAddTokenChecked('&', T_AND, T_BIT_AND_SET, T_BIT_AND);
|
|
|
- case '|': return tAddTokenChecked('|', T_OR, T_BIT_OR_SET, T_BIT_OR);
|
|
|
- case '~': return tAddToken(T_BIT_NOT);
|
|
|
- case '^': return tReadIf('=') ? tAddToken(T_BIT_XOR_SET) : tAddToken(T_BIT_XOR);
|
|
|
- case ',': return tAddToken(T_COMMA);
|
|
|
- case ';': return tAddToken(T_SEMICOLON);
|
|
|
- case '(': return tAddToken(T_OPEN_BRACKET);
|
|
|
- case ')': return tAddToken(T_CLOSE_BRACKET);
|
|
|
- case '{': return tAddToken(T_OPEN_CURVED_BRACKET);
|
|
|
- case '}': return tAddToken(T_CLOSE_CURVED_BRACKET);
|
|
|
- case '"': return tAddString();
|
|
|
- case '.': return tAddToken(T_POINT);
|
|
|
- case '[': return tAddToken(T_OPEN_SQUARE_BRACKET);
|
|
|
- case ']': return tAddToken(T_CLOSE_SQUARE_BRACKET);
|
|
|
- }
|
|
|
- tError("unknown character on line %d: %c", line, c);
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-static void tParseFile() {
|
|
|
- readIndex = 0;
|
|
|
- writeIndex = 0;
|
|
|
- line = 1;
|
|
|
- error[0] = '\0';
|
|
|
- while(tParseToken()) {
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-bool tTokenize(const char* path) {
|
|
|
- file = fopen(path, "r");
|
|
|
- if(file == NULL) {
|
|
|
- tError("cannot read file '%s'", path);
|
|
|
- return true;
|
|
|
- }
|
|
|
- tParseFile();
|
|
|
- fclose(file);
|
|
|
- return error[0] != '\0';
|
|
|
-}
|
|
|
-
|
|
|
-const char* tGetError() {
|
|
|
- return error;
|
|
|
-}
|
|
|
-
|
|
|
-void tResetReader() {
|
|
|
- readIndex = 0;
|
|
|
-}
|
|
|
-
|
|
|
-Token tPeekToken() {
|
|
|
- if(readIndex >= writeIndex) {
|
|
|
- return T_END;
|
|
|
- }
|
|
|
- return tokenBuffer[readIndex];
|
|
|
-}
|
|
|
-
|
|
|
-Token tReadToken() {
|
|
|
- if(readIndex >= writeIndex) {
|
|
|
- return T_END;
|
|
|
- }
|
|
|
- return tokenBuffer[readIndex++];
|
|
|
-}
|
|
|
-
|
|
|
-bool tReadInt(int* i) {
|
|
|
- if(tReadTokens(i, sizeof(int))) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-bool tReadInt16(int16* i) {
|
|
|
- if(tReadTokens(i, sizeof(int16))) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-bool tReadFloat(float* f) {
|
|
|
- if(tReadTokens(f, sizeof(float))) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-const char* tReadString(int* length) {
|
|
|
- *length = 0;
|
|
|
- const char* s = tokenBuffer + readIndex;
|
|
|
- while(readIndex <= writeIndex) {
|
|
|
- (*length)++;
|
|
|
- if(tokenBuffer[readIndex++] == '\0') {
|
|
|
- return s;
|
|
|
- }
|
|
|
- }
|
|
|
- return NULL;
|
|
|
-}
|
|
|
-
|
|
|
-const char* tGetTokenName(Token token) {
|
|
|
- switch(token) {
|
|
|
- case T_INT: return "int";
|
|
|
- case T_FLOAT: return "float";
|
|
|
- case T_TEXT: return "text";
|
|
|
- case T_NULL: return "null";
|
|
|
- case T_TRUE: return "true";
|
|
|
- case T_FALSE: return "false";
|
|
|
- case T_ADD: return "+";
|
|
|
- case T_SUB: return "-";
|
|
|
- case T_MUL: return "*";
|
|
|
- case T_DIV: return "/";
|
|
|
- case T_MOD: return "%";
|
|
|
- case T_LESS: return "<";
|
|
|
- case T_LESS_EQUAL: return "<=";
|
|
|
- case T_GREATER: return ">";
|
|
|
- case T_GREATER_EQUAL: return ">=";
|
|
|
- case T_EQUAL: return "==";
|
|
|
- case T_NOT_EQUAL: return "!=";
|
|
|
- case T_NOT: return "!";
|
|
|
- case T_AND: return "&&";
|
|
|
- case T_OR: return "||";
|
|
|
- case T_BIT_NOT: return "~";
|
|
|
- case T_BIT_AND: return "&";
|
|
|
- case T_BIT_OR: return "|";
|
|
|
- case T_BIT_XOR: return "^";
|
|
|
- case T_LEFT_SHIFT: return "<<";
|
|
|
- case T_RIGHT_SHIFT: return ">>";
|
|
|
- case T_SET: return "=";
|
|
|
- case T_ADD_SET: return "+=";
|
|
|
- case T_SUB_SET: return "-=";
|
|
|
- case T_MUL_SET: return "*=";
|
|
|
- case T_DIV_SET: return "/=";
|
|
|
- case T_MOD_SET: return "%=";
|
|
|
- case T_BIT_AND_SET: return "&=";
|
|
|
- case T_BIT_OR_SET: return "|=";
|
|
|
- case T_BIT_XOR_SET: return "^=";
|
|
|
- case T_LEFT_SHIFT_SET: return "<<=";
|
|
|
- case T_RIGHT_SHIFT_SET: return ">>=";
|
|
|
- case T_INCREMENT: return "++";
|
|
|
- case T_DECREMENT: return "--";
|
|
|
- case T_LITERAL: return "literal";
|
|
|
- case T_PRINT: return "print";
|
|
|
- case T_IF: return "if";
|
|
|
- case T_ELSE: return "else";
|
|
|
- case T_WHILE: return "while";
|
|
|
- case T_FOR: return "for";
|
|
|
- case T_BREAK: return "break";
|
|
|
- case T_CONTINUE: return "continue";
|
|
|
- case T_FUNCTION: return "function";
|
|
|
- case T_RETURN: return "return";
|
|
|
- case T_COMMA: return ",";
|
|
|
- case T_SEMICOLON: return ";";
|
|
|
- case T_OPEN_BRACKET: return "(";
|
|
|
- case T_CLOSE_BRACKET: return ")";
|
|
|
- case T_OPEN_CURVED_BRACKET: return "{";
|
|
|
- case T_CLOSE_CURVED_BRACKET: return "}";
|
|
|
- case T_ARRAY: return "array";
|
|
|
- case T_POINT: return ".";
|
|
|
- case T_OPEN_SQUARE_BRACKET: return "[";
|
|
|
- case T_CLOSE_SQUARE_BRACKET: return "]";
|
|
|
- case T_END: return "end";
|
|
|
- }
|
|
|
- return "Unknown";
|
|
|
-}
|
|
|
-
|
|
|
-int tGetMarker() {
|
|
|
- return readIndex;
|
|
|
-}
|
|
|
-
|
|
|
-void tResetToMarker(int marker) {
|
|
|
- readIndex = marker;
|
|
|
-}
|
|
|
-
|
|
|
-void tPrint() {
|
|
|
- puts("----------------");
|
|
|
- while(true) {
|
|
|
- Token t = tReadToken();
|
|
|
- if(t == T_END) {
|
|
|
- break;
|
|
|
- }
|
|
|
- int line = 0;
|
|
|
- tReadInt(&line);
|
|
|
- printf("%d: %s\n", line, tGetTokenName(t));
|
|
|
- if(t == T_INT) {
|
|
|
- tReadInt(&line);
|
|
|
- }
|
|
|
- }
|
|
|
- tResetReader();
|
|
|
-}
|