|
@@ -1,380 +1,328 @@
|
|
|
-#include <stdio.h>
|
|
|
-#include <uchar.h>
|
|
|
-#include <string.h>
|
|
|
+#include <iostream>
|
|
|
+#include <fstream>
|
|
|
|
|
|
#include "tokenizer/Tokenizer.h"
|
|
|
+#include "utils/String.h"
|
|
|
|
|
|
-static FILE* input = NULL;
|
|
|
-static TokenStream* tokens = NULL;
|
|
|
-static unsigned int line = 1;
|
|
|
-static char32_t buffer = 0;
|
|
|
-
|
|
|
-static void tokenizer_onError(const char* message, unsigned int line) {
|
|
|
- printf("%s Line: %u\n", message, line);
|
|
|
-}
|
|
|
-
|
|
|
-static size_t tokenizer_printChar(char32_t c, char* buffer) {
|
|
|
- if(c <= 0x7F) {
|
|
|
- buffer[0] = (char) c;
|
|
|
- return 1;
|
|
|
- } else if(c < 0xE00000) {
|
|
|
- buffer[0] = (char) ((c >> 8) & 0xFF);
|
|
|
- buffer[1] = (char) ((c >> 0) & 0xFF);
|
|
|
- return 2;
|
|
|
- } else if(c <= 0xF0000000) {
|
|
|
- buffer[0] = (char) ((c >> 16) & 0xFF);
|
|
|
- buffer[1] = (char) ((c >> 8) & 0xFF);
|
|
|
- buffer[2] = (char) ((c >> 0) & 0xFF);
|
|
|
- return 3;
|
|
|
- }
|
|
|
- buffer[0] = (char) ((c >> 24) & 0xFF);
|
|
|
- buffer[1] = (char) ((c >> 16) & 0xFF);
|
|
|
- buffer[2] = (char) ((c >> 8) & 0xFF);
|
|
|
- buffer[3] = (char) ((c >> 0) & 0xFF);
|
|
|
- return 4;
|
|
|
+static void onError(const String& message, unsigned int line) {
|
|
|
+ std::cout << message << " Line: " << line << "\n";
|
|
|
}
|
|
|
|
|
|
-static bool tokenizer_isLetter(char32_t c) {
|
|
|
+static bool isLetter(char32_t c) {
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
}
|
|
|
|
|
|
-static bool tokenizer_isDigit(char32_t c) {
|
|
|
+static bool isDigit(char32_t c) {
|
|
|
return c >= '0' && c <= '9';
|
|
|
}
|
|
|
|
|
|
-static bool tokenizer_isValidNameStart(char32_t c) {
|
|
|
- return tokenizer_isLetter(c) || c == '.' || c == '_';
|
|
|
+static bool isValidNameStart(char32_t c) {
|
|
|
+ return isLetter(c) || c == '.' || c == '_';
|
|
|
}
|
|
|
|
|
|
-static bool tokenizer_isValidNamePart(char32_t c) {
|
|
|
- return tokenizer_isDigit(c) || tokenizer_isValidNameStart(c);
|
|
|
+static bool isValidNamePart(char32_t c) {
|
|
|
+ return isDigit(c) || isValidNameStart(c);
|
|
|
}
|
|
|
|
|
|
-static bool tokenizer_next(char32_t* c) {
|
|
|
- if(buffer != 0) {
|
|
|
- *c = buffer;
|
|
|
- buffer = 0;
|
|
|
- return true;
|
|
|
- }
|
|
|
- int in = fgetc(input);
|
|
|
- if(in == EOF) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- if((in & 0x80) == 0) {
|
|
|
- *c = in;
|
|
|
- return true;
|
|
|
- }
|
|
|
- if((in >> 5) == 0x6) {
|
|
|
- *c = (in << 8) | fgetc(input);
|
|
|
- return true;
|
|
|
- }
|
|
|
- if((in >> 4) == 0xE) {
|
|
|
- *c = (in << 16) | (fgetc(input) << 8) | fgetc(input);
|
|
|
- return true;
|
|
|
- }
|
|
|
- if((in >> 3) == 0x1E) {
|
|
|
- *c = (in << 24) | (fgetc(input) << 16) | (fgetc(input) << 8) | fgetc(input);
|
|
|
- return true;
|
|
|
- }
|
|
|
- return true;
|
|
|
-}
|
|
|
-
|
|
|
-static bool tokenizer_peek(char32_t* c) {
|
|
|
- if(buffer != 0 || tokenizer_next(&buffer)) {
|
|
|
- *c = buffer;
|
|
|
+class Data {
|
|
|
+public:
|
|
|
|
|
|
- return true;
|
|
|
+ Data(const char* inputPath, TokenStream& tokens) : tokens(tokens) {
|
|
|
+ stream.open(inputPath);
|
|
|
}
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-static bool tokenizer_nextIf(char32_t c) {
|
|
|
- char32_t nextChar;
|
|
|
- if(tokenizer_peek(&nextChar) && c == nextChar) {
|
|
|
- tokenizer_next(&nextChar);
|
|
|
|
|
|
- return true;
|
|
|
+ bool hasFileError() {
|
|
|
+ return !stream.good();
|
|
|
}
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-static void tokenizer_addToken(Token token) {
|
|
|
- tokens->add(token, line);
|
|
|
-}
|
|
|
|
|
|
-static void tokenizer_addStringToken(Token token, const char* text) {
|
|
|
- tokens->add(token, line, text);
|
|
|
-}
|
|
|
-
|
|
|
-static Token tokenizer_chooseToken(char c, Token aCharEqual, Token aChar, Token aEqual, Token other) {
|
|
|
- if(tokenizer_nextIf(c)) {
|
|
|
- if(tokenizer_nextIf('=')) {
|
|
|
- return aCharEqual;
|
|
|
+ bool next(char32_t& c) {
|
|
|
+ if(buffer != 0) {
|
|
|
+ c = buffer;
|
|
|
+ buffer = 0;
|
|
|
+ return true;
|
|
|
}
|
|
|
- return aChar;
|
|
|
- } else if(tokenizer_nextIf('=')) {
|
|
|
-
|
|
|
- return aEqual;
|
|
|
+ c = stream.get();
|
|
|
+ return stream.good();
|
|
|
}
|
|
|
- return other;
|
|
|
-}
|
|
|
|
|
|
-static bool tokenizer_handleLiteral(char32_t c, Token token) {
|
|
|
- const size_t bufferSize = 1024;
|
|
|
- char buffer[bufferSize];
|
|
|
- size_t index = 1;
|
|
|
- buffer[0] = c;
|
|
|
+ bool peek(char32_t& c) {
|
|
|
+ if(buffer != 0 || next(buffer)) {
|
|
|
+ c = buffer;
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- while(index < bufferSize - 1) {
|
|
|
- char32_t data;
|
|
|
- if(!tokenizer_peek(&data) || !tokenizer_isValidNamePart(data)) {
|
|
|
- break;
|
|
|
+ bool nextIf(char32_t c) {
|
|
|
+ char32_t nextChar;
|
|
|
+ if(peek(nextChar) && c == nextChar) {
|
|
|
+ next(nextChar);
|
|
|
+ return true;
|
|
|
}
|
|
|
- buffer[index++] = data;
|
|
|
- tokenizer_next(&data);
|
|
|
+ return false;
|
|
|
}
|
|
|
- buffer[index] = '\0';
|
|
|
|
|
|
- if(strcmp(buffer, "if") == 0) {
|
|
|
- tokenizer_addToken(Token::IF);
|
|
|
- } else if(strcmp(buffer, "else") == 0) {
|
|
|
- tokenizer_addToken(Token::ELSE);
|
|
|
- } else if(strcmp(buffer, "elseif") == 0) {
|
|
|
- tokenizer_addToken(Token::ELSEIF);
|
|
|
- } else if(strcmp(buffer, "while") == 0) {
|
|
|
- tokenizer_addToken(Token::WHILE);
|
|
|
- } else if(strcmp(buffer, "try") == 0) {
|
|
|
- tokenizer_addToken(Token::TRY);
|
|
|
- } else if(strcmp(buffer, "catch") == 0) {
|
|
|
- tokenizer_addToken(Token::CATCH);
|
|
|
- } else if(strcmp(buffer, "for") == 0) {
|
|
|
- tokenizer_addToken(Token::FOR);
|
|
|
- } else if(strcmp(buffer, "function") == 0) {
|
|
|
- tokenizer_addToken(Token::FUNCTION);
|
|
|
- } else if(strcmp(buffer, "break") == 0) {
|
|
|
- tokenizer_addToken(Token::BREAK);
|
|
|
- } else if(strcmp(buffer, "continue") == 0) {
|
|
|
- tokenizer_addToken(Token::CONTINUE);
|
|
|
- } else if(strcmp(buffer, "return") == 0) {
|
|
|
- tokenizer_addToken(Token::RETURN);
|
|
|
- } else if(strcmp(buffer, "true") == 0) {
|
|
|
- tokenizer_addToken(Token::TRUE);
|
|
|
- } else if(strcmp(buffer, "false") == 0) {
|
|
|
- tokenizer_addToken(Token::FALSE);
|
|
|
- } else if(strcmp(buffer, "null") == 0) {
|
|
|
- tokenizer_addToken(Token::NULL_TOKEN);
|
|
|
- } else {
|
|
|
+ void addToken(Token token) {
|
|
|
+ tokens.add(token, line);
|
|
|
+ }
|
|
|
|
|
|
- tokenizer_addStringToken(token, buffer);
|
|
|
+ void addToken(Token token, const char* text) {
|
|
|
+ tokens.add(token, line, text);
|
|
|
}
|
|
|
- return false;
|
|
|
-}
|
|
|
|
|
|
-static bool tokenizer_handleNumber(char32_t c) {
|
|
|
- double number = c - '0';
|
|
|
- char32_t data;
|
|
|
- while(tokenizer_peek(&data)) {
|
|
|
- if(!tokenizer_isDigit(data)) {
|
|
|
- if(data != '.') {
|
|
|
- break;
|
|
|
+ Token chooseToken(char c, Token aCharEqual, Token aChar, Token aEqual, Token other) {
|
|
|
+ if(nextIf(c)) {
|
|
|
+ if(nextIf('=')) {
|
|
|
+ return aCharEqual;
|
|
|
}
|
|
|
- tokenizer_next(&data);
|
|
|
- double factor = 10;
|
|
|
- while(tokenizer_peek(&data) && tokenizer_isDigit(data)) {
|
|
|
- number += (data - '0') / factor;
|
|
|
- factor *= 10;
|
|
|
- tokenizer_next(&data);
|
|
|
- }
|
|
|
- break;
|
|
|
+ return aChar;
|
|
|
+ } else if(nextIf('=')) {
|
|
|
+ return aEqual;
|
|
|
}
|
|
|
- number = (number * 10) + (data - '0');
|
|
|
- tokenizer_next(&data);
|
|
|
+ return other;
|
|
|
}
|
|
|
- tokens->add(Token::NUMBER, line, number);
|
|
|
- return false;
|
|
|
-}
|
|
|
|
|
|
-static bool tokenizer_handleString() {
|
|
|
- const size_t bufferSize = 1024;
|
|
|
- char buffer[bufferSize];
|
|
|
- size_t index = 0;
|
|
|
-
|
|
|
- unsigned int oldLine = line;
|
|
|
- while(index + 4 < bufferSize) {
|
|
|
- char32_t data;
|
|
|
- if(!tokenizer_next(&data)) {
|
|
|
- tokenizer_onError("non closed string literal", oldLine);
|
|
|
- return true;
|
|
|
+ bool handleLiteral(char32_t c, Token token) {
|
|
|
+ String s;
|
|
|
+ s += (char) c;
|
|
|
+ while(true) {
|
|
|
+ if(s.isFull()) {
|
|
|
+ onError("string buffer to small", line);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ char32_t data;
|
|
|
+ if(!peek(data) || !isValidNamePart(data)) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ s += (char) data;
|
|
|
+ next(data);
|
|
|
}
|
|
|
- if(data == '"') {
|
|
|
- buffer[index] = '\0';
|
|
|
- tokenizer_addStringToken(Token::STRING, buffer);
|
|
|
- return false;
|
|
|
+ if(s == "if") {
|
|
|
+ addToken(Token::IF);
|
|
|
+ } else if(s == "else") {
|
|
|
+ addToken(Token::ELSE);
|
|
|
+ } else if(s == "elseif") {
|
|
|
+ addToken(Token::ELSEIF);
|
|
|
+ } else if(s == "while") {
|
|
|
+ addToken(Token::WHILE);
|
|
|
+ } else if(s == "try") {
|
|
|
+ addToken(Token::TRY);
|
|
|
+ } else if(s == "catch") {
|
|
|
+ addToken(Token::CATCH);
|
|
|
+ } else if(s == "for") {
|
|
|
+ addToken(Token::FOR);
|
|
|
+ } else if(s == "function") {
|
|
|
+ addToken(Token::FUNCTION);
|
|
|
+ } else if(s == "break") {
|
|
|
+ addToken(Token::BREAK);
|
|
|
+ } else if(s == "continue") {
|
|
|
+ addToken(Token::CONTINUE);
|
|
|
+ } else if(s == "return") {
|
|
|
+ addToken(Token::RETURN);
|
|
|
+ } else if(s == "true") {
|
|
|
+ addToken(Token::TRUE);
|
|
|
+ } else if(s == "false") {
|
|
|
+ addToken(Token::FALSE);
|
|
|
+ } else if(s == "null") {
|
|
|
+ addToken(Token::NULL_TOKEN);
|
|
|
+ } else {
|
|
|
+ addToken(token, s);
|
|
|
}
|
|
|
- if(data == '\n') {
|
|
|
- line++;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ bool handleNumber(char32_t c) {
|
|
|
+ double number = c - '0';
|
|
|
+ char32_t data;
|
|
|
+ while(peek(data)) {
|
|
|
+ if(!isDigit(data)) {
|
|
|
+ if(data != '.') {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ next(data);
|
|
|
+ double factor = 10;
|
|
|
+ while(peek(data) && isDigit(data)) {
|
|
|
+ number += (data - '0') / factor;
|
|
|
+ factor *= 10;
|
|
|
+ next(data);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ number = (number * 10) + (data - '0');
|
|
|
+ next(data);
|
|
|
}
|
|
|
- if(data == '\\') {
|
|
|
- char32_t escape;
|
|
|
- if(!tokenizer_next(&escape)) {
|
|
|
- tokenizer_onError("missing escaped character", line);
|
|
|
+ tokens.add(Token::NUMBER, line, number);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ bool handleString() {
|
|
|
+ String s;
|
|
|
+ unsigned int oldLine = line;
|
|
|
+ while(!s.isFull()) {
|
|
|
+ char32_t data;
|
|
|
+ if(!next(data)) {
|
|
|
+ onError("non closed string literal", oldLine);
|
|
|
return true;
|
|
|
}
|
|
|
- switch(escape) {
|
|
|
- case 'n': data = '\n';
|
|
|
- break;
|
|
|
- case '\\': data = '\\';
|
|
|
- break;
|
|
|
- case '"': data = '"';
|
|
|
- break;
|
|
|
- default:
|
|
|
- tokenizer_onError("invalid escaped character", line);
|
|
|
+ if(data == '"') {
|
|
|
+ addToken(Token::STRING, s);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if(data == '\n') {
|
|
|
+ line++;
|
|
|
+ }
|
|
|
+ if(data == '\\') {
|
|
|
+ char32_t escape;
|
|
|
+ if(!next(escape)) {
|
|
|
+ onError("missing escaped character", line);
|
|
|
return true;
|
|
|
+ }
|
|
|
+ switch(escape) {
|
|
|
+ case 'n': data = '\n';
|
|
|
+ break;
|
|
|
+ case '\\': data = '\\';
|
|
|
+ break;
|
|
|
+ case '"': data = '"';
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ onError("invalid escaped character", line);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
}
|
|
|
+ s += data;
|
|
|
}
|
|
|
- index += tokenizer_printChar(data, buffer + index);
|
|
|
+ onError("string buffer to small", line);
|
|
|
+ return true;
|
|
|
}
|
|
|
- tokenizer_onError("string buffer to small", line);
|
|
|
-
|
|
|
- return true;
|
|
|
-}
|
|
|
-
|
|
|
-static bool tokenizer_handleOneLineComment() {
|
|
|
- char32_t data;
|
|
|
- while(tokenizer_next(&data) && data != '\n');
|
|
|
- line++;
|
|
|
|
|
|
- return false;
|
|
|
-}
|
|
|
+ bool handleOneLineComment() {
|
|
|
+ char32_t data;
|
|
|
+ while(next(data) && data != '\n');
|
|
|
+ line++;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
-static bool tokenizer_handleMultiLineComment() {
|
|
|
- char32_t first;
|
|
|
- char32_t sec = 0;
|
|
|
- unsigned int oldLine = line;
|
|
|
- while(true) {
|
|
|
- first = sec;
|
|
|
- if(!tokenizer_next(&sec)) {
|
|
|
- tokenizer_onError("unclosed multiline comment", oldLine);
|
|
|
- return true;
|
|
|
+ bool handleMultiLineComment() {
|
|
|
+ char32_t first;
|
|
|
+ char32_t sec = 0;
|
|
|
+ unsigned int oldLine = line;
|
|
|
+ while(true) {
|
|
|
+ first = sec;
|
|
|
+ if(!next(sec)) {
|
|
|
+ onError("unclosed multiline comment", oldLine);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ if(first == '*' && sec == '/') {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ line += (sec == '\n');
|
|
|
}
|
|
|
- if(first == '*' && sec == '/') {
|
|
|
+ }
|
|
|
|
|
|
+ bool handleSlash() {
|
|
|
+ if(nextIf('/')) {
|
|
|
+ return handleOneLineComment();
|
|
|
+ } else if(nextIf('*')) {
|
|
|
+ return handleMultiLineComment();
|
|
|
+ } else if(nextIf('=')) {
|
|
|
+ addToken(Token::DIV_SET);
|
|
|
return false;
|
|
|
}
|
|
|
- line += (sec == '\n');
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-static bool tokenizer_handleSlash() {
|
|
|
- if(tokenizer_nextIf('/')) {
|
|
|
- return tokenizer_handleOneLineComment();
|
|
|
- } else if(tokenizer_nextIf('*')) {
|
|
|
- return tokenizer_handleMultiLineComment();
|
|
|
- } else if(tokenizer_nextIf('=')) {
|
|
|
- tokenizer_addToken(Token::DIV_SET);
|
|
|
+ addToken(Token::DIV);
|
|
|
return false;
|
|
|
}
|
|
|
- tokenizer_addToken(Token::DIV);
|
|
|
-
|
|
|
- return false;
|
|
|
-}
|
|
|
|
|
|
-static bool tokenizer_handleSpecial(char32_t c) {
|
|
|
- switch(c) {
|
|
|
- case ' ':
|
|
|
- case '\t':
|
|
|
- case '\r':
|
|
|
- return false;
|
|
|
- case '\n': line++;
|
|
|
- return false;
|
|
|
- case '"':
|
|
|
- return tokenizer_handleString();
|
|
|
- case '(': tokenizer_addToken(Token::OPEN_BRACKET);
|
|
|
- return false;
|
|
|
- case ')': tokenizer_addToken(Token::CLOSE_BRACKET);
|
|
|
- return false;
|
|
|
- case '[': tokenizer_addToken(Token::OPEN_SQUARE_BRACKET);
|
|
|
- return false;
|
|
|
- case ']': tokenizer_addToken(Token::CLOSE_SQUARE_BRACKET);
|
|
|
- return false;
|
|
|
- case '{': tokenizer_addToken(Token::OPEN_CURVED_BRACKET);
|
|
|
- return false;
|
|
|
- case '}': tokenizer_addToken(Token::CLOSE_CURVED_BRACKET);
|
|
|
- return false;
|
|
|
- case '$':
|
|
|
- return tokenizer_handleLiteral(c, Token::LITERAL);
|
|
|
- case '@':
|
|
|
- return tokenizer_handleLiteral(c, Token::LABEL);
|
|
|
- case ';': tokenizer_addToken(Token::SEMICOLON);
|
|
|
- return false;
|
|
|
- case ',': tokenizer_addToken(Token::COMMA);
|
|
|
- return false;
|
|
|
- case '~': tokenizer_addToken(Token::BIT_INVERT);
|
|
|
- return false;
|
|
|
- case '+': tokenizer_addToken(tokenizer_nextIf('=') ? Token::ADD_SET: (tokenizer_nextIf('+') ? Token::INC: Token::ADD));
|
|
|
- return false;
|
|
|
- case '-': tokenizer_addToken(tokenizer_nextIf('=') ? Token::SUB_SET: (tokenizer_nextIf('-') ? Token::DEC: Token::SUB));
|
|
|
- return false;
|
|
|
- case '!': tokenizer_addToken(tokenizer_nextIf('=') ? Token::NOT_EQUAL: Token::INVERT);
|
|
|
- break;
|
|
|
- case '=': tokenizer_addToken(tokenizer_nextIf('=') ? Token::EQUAL: Token::SET);
|
|
|
- return false;
|
|
|
- case '*': tokenizer_addToken(tokenizer_nextIf('=') ? Token::MUL_SET: Token::MUL);
|
|
|
- return false;
|
|
|
- case '/':
|
|
|
- return tokenizer_handleSlash();
|
|
|
- case '%': tokenizer_addToken(tokenizer_nextIf('=') ? Token::MOD_SET: Token::MOD);
|
|
|
- return false;
|
|
|
- case '&': tokenizer_addToken(tokenizer_nextIf('=') ? Token::BIT_AND_SET: (tokenizer_nextIf('&') ? Token::AND: Token::BIT_AND));
|
|
|
- return false;
|
|
|
- case '|': tokenizer_addToken(tokenizer_nextIf('=') ? Token::BIT_OR_SET: (tokenizer_nextIf('|') ? Token::OR: Token::BIT_OR));
|
|
|
- return false;
|
|
|
- case '^': tokenizer_addToken(tokenizer_nextIf('=') ? Token::BIT_XOR_SET: Token::BIT_XOR);
|
|
|
- return false;
|
|
|
- case '<': tokenizer_addToken(tokenizer_chooseToken('<', Token::LEFT_SHIFT_SET, Token::LEFT_SHIFT, Token::LESS_EQUAL, Token::LESS));
|
|
|
- return false;
|
|
|
- case '>': tokenizer_addToken(tokenizer_chooseToken('>', Token::RIGHT_SHIFT_SET, Token::RIGHT_SHIFT, Token::GREATER_EQUAL, Token::GREATER));
|
|
|
- return false;
|
|
|
+ bool handleSpecial(char32_t c) {
|
|
|
+ switch(c) {
|
|
|
+ case ' ':
|
|
|
+ case '\t':
|
|
|
+ case '\r':
|
|
|
+ return false;
|
|
|
+ case '\n': line++;
|
|
|
+ return false;
|
|
|
+ case '"':
|
|
|
+ return handleString();
|
|
|
+ case '(': addToken(Token::OPEN_BRACKET);
|
|
|
+ return false;
|
|
|
+ case ')': addToken(Token::CLOSE_BRACKET);
|
|
|
+ return false;
|
|
|
+ case '[': addToken(Token::OPEN_SQUARE_BRACKET);
|
|
|
+ return false;
|
|
|
+ case ']': addToken(Token::CLOSE_SQUARE_BRACKET);
|
|
|
+ return false;
|
|
|
+ case '{': addToken(Token::OPEN_CURVED_BRACKET);
|
|
|
+ return false;
|
|
|
+ case '}': addToken(Token::CLOSE_CURVED_BRACKET);
|
|
|
+ return false;
|
|
|
+ case '$':
|
|
|
+ return handleLiteral(c, Token::LITERAL);
|
|
|
+ case '@':
|
|
|
+ return handleLiteral(c, Token::LABEL);
|
|
|
+ case ';': addToken(Token::SEMICOLON);
|
|
|
+ return false;
|
|
|
+ case ',': addToken(Token::COMMA);
|
|
|
+ return false;
|
|
|
+ case '~': addToken(Token::BIT_INVERT);
|
|
|
+ return false;
|
|
|
+ case '+': addToken(nextIf('=') ? Token::ADD_SET: (nextIf('+') ? Token::INC: Token::ADD));
|
|
|
+ return false;
|
|
|
+ case '-': addToken(nextIf('=') ? Token::SUB_SET: (nextIf('-') ? Token::DEC: Token::SUB));
|
|
|
+ return false;
|
|
|
+ case '!': addToken(nextIf('=') ? Token::NOT_EQUAL: Token::INVERT);
|
|
|
+ break;
|
|
|
+ case '=': addToken(nextIf('=') ? Token::EQUAL: Token::SET);
|
|
|
+ return false;
|
|
|
+ case '*': addToken(nextIf('=') ? Token::MUL_SET: Token::MUL);
|
|
|
+ return false;
|
|
|
+ case '/':
|
|
|
+ return handleSlash();
|
|
|
+ case '%': addToken(nextIf('=') ? Token::MOD_SET: Token::MOD);
|
|
|
+ return false;
|
|
|
+ case '&': addToken(nextIf('=') ? Token::BIT_AND_SET: (nextIf('&') ? Token::AND: Token::BIT_AND));
|
|
|
+ return false;
|
|
|
+ case '|': addToken(nextIf('=') ? Token::BIT_OR_SET: (nextIf('|') ? Token::OR: Token::BIT_OR));
|
|
|
+ return false;
|
|
|
+ case '^': addToken(nextIf('=') ? Token::BIT_XOR_SET: Token::BIT_XOR);
|
|
|
+ return false;
|
|
|
+ case '<': addToken(chooseToken('<', Token::LEFT_SHIFT_SET, Token::LEFT_SHIFT, Token::LESS_EQUAL, Token::LESS));
|
|
|
+ return false;
|
|
|
+ case '>': addToken(chooseToken('>', Token::RIGHT_SHIFT_SET, Token::RIGHT_SHIFT, Token::GREATER_EQUAL, Token::GREATER));
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ String s("unknown token '");
|
|
|
+ s += c;
|
|
|
+ s += '\'';
|
|
|
+ onError(s, line);
|
|
|
+ return true;
|
|
|
}
|
|
|
- char buffer[32];
|
|
|
- strncpy(buffer, "unknown token '", 32);
|
|
|
- size_t index = strlen(buffer);
|
|
|
- index += tokenizer_printChar(c, buffer + index);
|
|
|
- buffer[index] = '\'';
|
|
|
- buffer[index + 1] = '\0';
|
|
|
- tokenizer_onError(buffer, line);
|
|
|
-
|
|
|
- return true;
|
|
|
-}
|
|
|
|
|
|
-static bool tokenizer_handleChar(char32_t c) {
|
|
|
- if(tokenizer_isValidNameStart(c)) {
|
|
|
- return tokenizer_handleLiteral(c, Token::LITERAL);
|
|
|
- } else if(tokenizer_isDigit(c)) {
|
|
|
-
|
|
|
- return tokenizer_handleNumber(c);
|
|
|
+ bool handleChar(char32_t c) {
|
|
|
+ if(isValidNameStart(c)) {
|
|
|
+ return handleLiteral(c, Token::LITERAL);
|
|
|
+ } else if(isDigit(c)) {
|
|
|
+ return handleNumber(c);
|
|
|
+ }
|
|
|
+ return handleSpecial(c);
|
|
|
}
|
|
|
- return tokenizer_handleSpecial(c);
|
|
|
-}
|
|
|
|
|
|
-bool tokenize(TokenStream* tokenStream, const char* inputPath) {
|
|
|
- input = fopen(inputPath, "r");
|
|
|
- if(input == NULL) {
|
|
|
+private:
|
|
|
+ std::basic_ifstream<char32_t> stream;
|
|
|
+ TokenStream& tokens;
|
|
|
+ unsigned int line = 1;
|
|
|
+ char32_t buffer = 0;
|
|
|
+};
|
|
|
+
|
|
|
+bool Tokenizer::tokenize(TokenStream& tokenStream, const char* inputPath) {
|
|
|
+ Data d(inputPath, tokenStream);
|
|
|
+ if(d.hasFileError()) {
|
|
|
return true;
|
|
|
}
|
|
|
- tokens = tokenStream;
|
|
|
- line = 1;
|
|
|
- buffer = 0;
|
|
|
-
|
|
|
char32_t c;
|
|
|
- while(tokenizer_next(&c)) {
|
|
|
- if(tokenizer_handleChar(c)) {
|
|
|
+ while(d.next(c)) {
|
|
|
+ if(d.handleChar(c)) {
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
- tokenizer_addToken(Token::EOF_TOKEN);
|
|
|
-
|
|
|
- fclose(input);
|
|
|
- input = NULL;
|
|
|
+ d.addToken(Token::EOF_TOKEN);
|
|
|
return false;
|
|
|
-}
|
|
|
+}
|