|
@@ -1,220 +1,198 @@
|
|
#include <sstream>
|
|
#include <sstream>
|
|
|
|
|
|
#include "tokenizer/Tokenizer.h"
|
|
#include "tokenizer/Tokenizer.h"
|
|
-#include "exceptions/PreScriptException.h"
|
|
|
|
|
|
|
|
static unsigned int line = 1;
|
|
static unsigned int line = 1;
|
|
-static std::vector<Token>* tokens = nullptr;
|
|
|
|
-static std::istream* input = nullptr;
|
|
|
|
-static int buffer = -1;
|
|
|
|
|
|
+static TokenStream* tokens = nullptr;
|
|
|
|
+static Tokenizer::i32stream* input = nullptr;
|
|
|
|
+static char32_t buffer = 0;
|
|
|
|
|
|
-static bool isLetter(int c)
|
|
|
|
-{
|
|
|
|
|
|
+static void onError(const std::string& message, unsigned int line) {
|
|
|
|
+ std::cout << message << " Line: " << line << std::endl;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void convertChar(char32_t c, char* buffer) {
|
|
|
|
+ if(c <= 0x7F) {
|
|
|
|
+ buffer[0] = (char) c;
|
|
|
|
+ buffer[1] = '\0';
|
|
|
|
+ } else if(c <= 0x7FF) {
|
|
|
|
+ buffer[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
|
|
|
|
+ buffer[1] = (char) (0x80 | ((c >> 0) & 0x3F));
|
|
|
|
+ buffer[2] = '\0';
|
|
|
|
+ } else if(c <= 0xFFFF) {
|
|
|
|
+ buffer[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
|
|
|
|
+ buffer[1] = (char) (0x80 | ((c >> 6) & 0x3F));
|
|
|
|
+ buffer[2] = (char) (0x80 | ((c >> 0) & 0x3F));
|
|
|
|
+ buffer[3] = '\0';
|
|
|
|
+ } else {
|
|
|
|
+ buffer[0] = (char) (0xF0 | ((c >> 18) & 0x07));
|
|
|
|
+ buffer[1] = (char) (0x80 | ((c >> 12) & 0x3F));
|
|
|
|
+ buffer[2] = (char) (0x80 | ((c >> 6) & 0x3F));
|
|
|
|
+ buffer[3] = (char) (0x80 | ((c >> 0) & 0x3F));
|
|
|
|
+ buffer[4] = '\0';
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool isLetter(char32_t c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
}
|
|
}
|
|
|
|
|
|
-static bool isDigit(int c)
|
|
|
|
-{
|
|
|
|
|
|
+static bool isDigit(char32_t c) {
|
|
return c >= '0' && c <= '9';
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
}
|
|
|
|
|
|
-static bool isValidNamePart(int c)
|
|
|
|
-{
|
|
|
|
- return isLetter(c) || isDigit(c) || c == '.' || c == '_';
|
|
|
|
|
|
+static bool isValidNameStart(char32_t c) {
|
|
|
|
+ return isLetter(c) || c == '.' || c == '_';
|
|
}
|
|
}
|
|
|
|
|
|
-static int next()
|
|
|
|
-{
|
|
|
|
- if(buffer != -1)
|
|
|
|
- {
|
|
|
|
- int r = buffer;
|
|
|
|
- buffer = -1;
|
|
|
|
- return r;
|
|
|
|
- }
|
|
|
|
- int data = input->get();
|
|
|
|
- if(!input->good())
|
|
|
|
- {
|
|
|
|
- return -1;
|
|
|
|
- }
|
|
|
|
- if((data & 0x80) != 0 && data != -1) // special char
|
|
|
|
- {
|
|
|
|
- if((data & 0x40) != 0) // this should always be true
|
|
|
|
- {
|
|
|
|
- if((data & 0x20) != 0) // 3 byte unicode
|
|
|
|
- {
|
|
|
|
- int a = input->get();
|
|
|
|
- int b = input->get();
|
|
|
|
- data = ((data & 0xFF) << 16) | ((a & 0xFF) << 8) | (b & 0xFF);
|
|
|
|
- }
|
|
|
|
- else // 2 byte unicode
|
|
|
|
- {
|
|
|
|
- data = ((data & 0xFF) << 8) | (input->get() & 0xFF);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- else
|
|
|
|
- {
|
|
|
|
- // should not happen as unicode starts with 11
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return data;
|
|
|
|
|
|
+static bool isValidNamePart(char32_t c) {
|
|
|
|
+ return isDigit(c) || isValidNameStart(c);
|
|
}
|
|
}
|
|
|
|
|
|
-static int peek()
|
|
|
|
-{
|
|
|
|
- if(buffer == -1)
|
|
|
|
- {
|
|
|
|
- buffer = next();
|
|
|
|
- return buffer;
|
|
|
|
|
|
+static bool next(char32_t& c) {
|
|
|
|
+ if(buffer != 0) {
|
|
|
|
+ c = buffer;
|
|
|
|
+ buffer = 0;
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
- return buffer;
|
|
|
|
|
|
+ c = input->get();
|
|
|
|
+ return input->good();
|
|
}
|
|
}
|
|
|
|
|
|
-static bool next(char c)
|
|
|
|
-{
|
|
|
|
- if(peek() == c)
|
|
|
|
- {
|
|
|
|
- next();
|
|
|
|
|
|
+static bool peek(char32_t& c) {
|
|
|
|
+ if(buffer != 0 || next(buffer)) {
|
|
|
|
+ c = buffer;
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
-static void add(TokenType type)
|
|
|
|
-{
|
|
|
|
- tokens->push_back(Token(type, line));
|
|
|
|
|
|
+static bool nextIf(char32_t c) {
|
|
|
|
+ char32_t nextChar;
|
|
|
|
+ if(peek(nextChar) && c == nextChar) {
|
|
|
|
+ next(nextChar);
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
|
|
-static void add(TokenType type, double number)
|
|
|
|
-{
|
|
|
|
- tokens->push_back(Token(type, line, number));
|
|
|
|
|
|
+static void add(TokenType type) {
|
|
|
|
+ tokens->add(type, line);
|
|
}
|
|
}
|
|
|
|
|
|
-static void add(TokenType type, const std::string& text)
|
|
|
|
-{
|
|
|
|
- tokens->push_back(Token(type, line, text));
|
|
|
|
|
|
+static void add(TokenType type, const std::string& text) {
|
|
|
|
+ tokens->add(type, line, text);
|
|
}
|
|
}
|
|
|
|
|
|
-static void add(char c, TokenType t1, TokenType t2, TokenType t3, TokenType t4)
|
|
|
|
-{
|
|
|
|
- int p = peek();
|
|
|
|
- if(p == c)
|
|
|
|
- {
|
|
|
|
- next();
|
|
|
|
- if(peek() == '=')
|
|
|
|
- {
|
|
|
|
- next();
|
|
|
|
- add(t1);
|
|
|
|
|
|
+static TokenType chooseTokenType(char c, TokenType aCharEqual, TokenType aChar, TokenType aEqual, TokenType other) {
|
|
|
|
+ if(nextIf(c)) {
|
|
|
|
+ if(nextIf('=')) {
|
|
|
|
+ return aCharEqual;
|
|
}
|
|
}
|
|
- else
|
|
|
|
- {
|
|
|
|
- add(t2);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- else if(p == '=')
|
|
|
|
- {
|
|
|
|
- next();
|
|
|
|
- add(t3);
|
|
|
|
- }
|
|
|
|
- else
|
|
|
|
- {
|
|
|
|
- add(t4);
|
|
|
|
|
|
+ return aChar;
|
|
|
|
+ } else if(nextIf('=')) {
|
|
|
|
+ return aEqual;
|
|
}
|
|
}
|
|
|
|
+ return other;
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleLiteral(int c, TokenType type)
|
|
|
|
-{
|
|
|
|
|
|
+static bool handleLiteral(char32_t c, TokenType type) {
|
|
std::stringstream sBuilder;
|
|
std::stringstream sBuilder;
|
|
sBuilder << (char) c;
|
|
sBuilder << (char) c;
|
|
|
|
|
|
- while(true)
|
|
|
|
- {
|
|
|
|
- int data = peek();
|
|
|
|
- if(!isValidNamePart(data))
|
|
|
|
- {
|
|
|
|
|
|
+ while(true) {
|
|
|
|
+ char32_t data;
|
|
|
|
+ if(!peek(data) || !isValidNamePart(data)) {
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
sBuilder << (char) data;
|
|
sBuilder << (char) data;
|
|
- next();
|
|
|
|
|
|
+ next(data);
|
|
}
|
|
}
|
|
|
|
|
|
std::string s = sBuilder.str();
|
|
std::string s = sBuilder.str();
|
|
- if(s == "if") { add(TokenType::IF); }
|
|
|
|
- else if(s == "if") { add(TokenType::IF); }
|
|
|
|
- else if(s == "else") { add(TokenType::ELSE); }
|
|
|
|
- else if(s == "elseif") { add(TokenType::ELSEIF); }
|
|
|
|
- else if(s == "while") { add(TokenType::WHILE); }
|
|
|
|
- else if(s == "try") { add(TokenType::TRY); }
|
|
|
|
- else if(s == "catch") { add(TokenType::CATCH); }
|
|
|
|
- else if(s == "for") { add(TokenType::FOR); }
|
|
|
|
- else if(s == "function") { add(TokenType::FUNCTION); }
|
|
|
|
- else if(s == "break") { add(TokenType::BREAK); }
|
|
|
|
- else if(s == "continue") { add(TokenType::CONTINUE); }
|
|
|
|
- else if(s == "return") { add(TokenType::RETURN); }
|
|
|
|
- else if(s == "true") { add(TokenType::TRUE); }
|
|
|
|
- else if(s == "false") { add(TokenType::FALSE); }
|
|
|
|
- else if(s == "null") { add(TokenType::NULL_TOKEN); }
|
|
|
|
- else { add(type, s); };
|
|
|
|
-
|
|
|
|
|
|
+ if(s == "if") {
|
|
|
|
+ add(TokenType::IF);
|
|
|
|
+ } else if(s == "if") {
|
|
|
|
+ add(TokenType::IF);
|
|
|
|
+ } else if(s == "else") {
|
|
|
|
+ add(TokenType::ELSE);
|
|
|
|
+ } else if(s == "elseif") {
|
|
|
|
+ add(TokenType::ELSEIF);
|
|
|
|
+ } else if(s == "while") {
|
|
|
|
+ add(TokenType::WHILE);
|
|
|
|
+ } else if(s == "try") {
|
|
|
|
+ add(TokenType::TRY);
|
|
|
|
+ } else if(s == "catch") {
|
|
|
|
+ add(TokenType::CATCH);
|
|
|
|
+ } else if(s == "for") {
|
|
|
|
+ add(TokenType::FOR);
|
|
|
|
+ } else if(s == "function") {
|
|
|
|
+ add(TokenType::FUNCTION);
|
|
|
|
+ } else if(s == "break") {
|
|
|
|
+ add(TokenType::BREAK);
|
|
|
|
+ } else if(s == "continue") {
|
|
|
|
+ add(TokenType::CONTINUE);
|
|
|
|
+ } else if(s == "return") {
|
|
|
|
+ add(TokenType::RETURN);
|
|
|
|
+ } else if(s == "true") {
|
|
|
|
+ add(TokenType::TRUE);
|
|
|
|
+ } else if(s == "false") {
|
|
|
|
+ add(TokenType::FALSE);
|
|
|
|
+ } else if(s == "null") {
|
|
|
|
+ add(TokenType::NULL_TOKEN);
|
|
|
|
+ } else {
|
|
|
|
+ add(type, s);
|
|
|
|
+ }
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleNumber(int c)
|
|
|
|
-{
|
|
|
|
- double d = c - '0';
|
|
|
|
- while(true)
|
|
|
|
- {
|
|
|
|
- int data = peek();
|
|
|
|
- if(!isDigit(data))
|
|
|
|
- {
|
|
|
|
- if(data == '.')
|
|
|
|
- {
|
|
|
|
- next();
|
|
|
|
- double factor = 10;
|
|
|
|
- while(true)
|
|
|
|
- {
|
|
|
|
- int data = peek();
|
|
|
|
- if(!isDigit(data))
|
|
|
|
- {
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- d += (data - '0') / factor;
|
|
|
|
- factor *= 10;
|
|
|
|
- next();
|
|
|
|
- }
|
|
|
|
|
|
+static bool handleNumber(char32_t c) {
|
|
|
|
+ double number = c - '0';
|
|
|
|
+ char32_t data;
|
|
|
|
+ while(peek(data)) {
|
|
|
|
+ if(!isDigit(data)) {
|
|
|
|
+ if(data != '.') {
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ next(data);
|
|
|
|
+ double factor = 10;
|
|
|
|
+ while(peek(data) && isDigit(data)) {
|
|
|
|
+ number += (data - '0') / factor;
|
|
|
|
+ factor *= 10;
|
|
|
|
+ next(data);
|
|
}
|
|
}
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
- d = (d * 10) + (data - '0');
|
|
|
|
- next();
|
|
|
|
|
|
+ number = (number * 10) + (data - '0');
|
|
|
|
+ next(data);
|
|
}
|
|
}
|
|
-
|
|
|
|
- add(NUMBER, d);
|
|
|
|
|
|
+ tokens->add(NUMBER, line, number);
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleString()
|
|
|
|
-{
|
|
|
|
|
|
+static bool handleString() {
|
|
std::stringstream ss;
|
|
std::stringstream ss;
|
|
- int oldLine = line;
|
|
|
|
- while(true)
|
|
|
|
- {
|
|
|
|
- int data = next();
|
|
|
|
- if(data == -1)
|
|
|
|
- {
|
|
|
|
- throw PreScriptException("non closed string literal", oldLine);
|
|
|
|
|
|
+ unsigned int oldLine = line;
|
|
|
|
+ while(true) {
|
|
|
|
+ char32_t data;
|
|
|
|
+ if(!next(data)) {
|
|
|
|
+ onError("non closed string literal", oldLine);
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
- if(data == '"')
|
|
|
|
- {
|
|
|
|
|
|
+ if(data == '"') {
|
|
add(STRING, ss.str());
|
|
add(STRING, ss.str());
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
- if(data == '\n')
|
|
|
|
- {
|
|
|
|
|
|
+ if(data == '\n') {
|
|
line++;
|
|
line++;
|
|
}
|
|
}
|
|
- if(data == '\\')
|
|
|
|
- {
|
|
|
|
- int escape = next();
|
|
|
|
- switch(escape)
|
|
|
|
- {
|
|
|
|
|
|
+ if(data == '\\') {
|
|
|
|
+ char32_t escape;
|
|
|
|
+ if(!next(escape)) {
|
|
|
|
+ onError("missing escaped character", line);
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
+ switch(escape) {
|
|
case 'n': data = '\n';
|
|
case 'n': data = '\n';
|
|
break;
|
|
break;
|
|
case '\\': data = '\\';
|
|
case '\\': data = '\\';
|
|
@@ -222,168 +200,136 @@ static void handleString()
|
|
case '"': data = '"';
|
|
case '"': data = '"';
|
|
break;
|
|
break;
|
|
default:
|
|
default:
|
|
- throw PreScriptException("invalid escaped character", line);
|
|
|
|
|
|
+ onError("invalid escaped character", line);
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- if(data > 0xFFFF)
|
|
|
|
- {
|
|
|
|
- ss << (char) ((data & 0xFF0000) >> 16);
|
|
|
|
- ss << (char) ((data & 0xFF00) >> 8);
|
|
|
|
- ss << (char) (data & 0xFF);
|
|
|
|
- }
|
|
|
|
- else if(data > 0xFF)
|
|
|
|
- {
|
|
|
|
- ss << (char) ((data & 0xFF00) >> 8);
|
|
|
|
- ss << (char) (data & 0xFF);
|
|
|
|
- }
|
|
|
|
- else
|
|
|
|
- {
|
|
|
|
- ss << (char) data;
|
|
|
|
- }
|
|
|
|
|
|
+ char buffer[5];
|
|
|
|
+ convertChar(data, buffer);
|
|
|
|
+ ss << buffer;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleOneLineComment()
|
|
|
|
-{
|
|
|
|
- while(true)
|
|
|
|
- {
|
|
|
|
- int data = next();
|
|
|
|
- if(data == -1 || data == '\n')
|
|
|
|
- {
|
|
|
|
- line++;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+static bool handleOneLineComment() {
|
|
|
|
+ char32_t data;
|
|
|
|
+ while(next(data) && data != '\n');
|
|
|
|
+ line++;
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleMultiLineComment()
|
|
|
|
-{
|
|
|
|
- int first;
|
|
|
|
- int sec = -1;
|
|
|
|
- while(true)
|
|
|
|
- {
|
|
|
|
|
|
+static bool handleMultiLineComment() {
|
|
|
|
+ char32_t first;
|
|
|
|
+ char32_t sec = 0;
|
|
|
|
+ unsigned int oldLine = line;
|
|
|
|
+ while(true) {
|
|
first = sec;
|
|
first = sec;
|
|
- sec = next();
|
|
|
|
- if(sec == -1 || (first == '*' && sec == '/'))
|
|
|
|
- {
|
|
|
|
- break;
|
|
|
|
|
|
+ if(!next(sec)) {
|
|
|
|
+ onError("unclosed multiline comment", oldLine);
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
- if(sec == '\n')
|
|
|
|
- {
|
|
|
|
- line++;
|
|
|
|
|
|
+ if(first == '*' && sec == '/') {
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
+ line += (sec == '\n');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleSlash()
|
|
|
|
-{
|
|
|
|
- switch(peek())
|
|
|
|
- {
|
|
|
|
- case '/':
|
|
|
|
- next();
|
|
|
|
- handleOneLineComment();
|
|
|
|
- break;
|
|
|
|
- case '*':
|
|
|
|
- next();
|
|
|
|
- handleMultiLineComment();
|
|
|
|
- break;
|
|
|
|
- case '=':
|
|
|
|
- next();
|
|
|
|
- add(DIV_SET);
|
|
|
|
- break;
|
|
|
|
- default:
|
|
|
|
- add(DIV);
|
|
|
|
|
|
+static bool handleSlash() {
|
|
|
|
+ if(nextIf('/')) {
|
|
|
|
+ return handleOneLineComment();
|
|
|
|
+ } else if(nextIf('*')) {
|
|
|
|
+ return handleMultiLineComment();
|
|
|
|
+ } else if(nextIf('=')) {
|
|
|
|
+ add(DIV_SET);
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
+ add(DIV);
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleSpecial(int c)
|
|
|
|
-{
|
|
|
|
- switch(c)
|
|
|
|
- {
|
|
|
|
|
|
+static bool handleSpecial(char32_t c) {
|
|
|
|
+ switch(c) {
|
|
case ' ':
|
|
case ' ':
|
|
case '\t':
|
|
case '\t':
|
|
- case '\r': break;
|
|
|
|
|
|
+ case '\r':
|
|
|
|
+ return false;
|
|
case '\n': line++;
|
|
case '\n': line++;
|
|
- break;
|
|
|
|
- case '"': handleString();
|
|
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
|
|
+ case '"':
|
|
|
|
+ return handleString();
|
|
case '(': add(OPEN_BRACKET);
|
|
case '(': add(OPEN_BRACKET);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case ')': add(CLOSE_BRACKET);
|
|
case ')': add(CLOSE_BRACKET);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case '[': add(OPEN_SQUARE_BRACKET);
|
|
case '[': add(OPEN_SQUARE_BRACKET);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case ']': add(CLOSE_SQUARE_BRACKET);
|
|
case ']': add(CLOSE_SQUARE_BRACKET);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case '{': add(OPEN_CURVED_BRACKET);
|
|
case '{': add(OPEN_CURVED_BRACKET);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case '}': add(CLOSE_CURVED_BRACKET);
|
|
case '}': add(CLOSE_CURVED_BRACKET);
|
|
- break;
|
|
|
|
- case '$': handleLiteral(c, LITERAL);
|
|
|
|
- break;
|
|
|
|
- case '@': handleLiteral(c, LABEL);
|
|
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
|
|
+ case '$':
|
|
|
|
+ return handleLiteral(c, LITERAL);
|
|
|
|
+ case '@':
|
|
|
|
+ return handleLiteral(c, LABEL);
|
|
case ';': add(SEMICOLON);
|
|
case ';': add(SEMICOLON);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case ',': add(COMMA);
|
|
case ',': add(COMMA);
|
|
- break;
|
|
|
|
|
|
+ return false;
|
|
case '~': add(BIT_INVERT);
|
|
case '~': add(BIT_INVERT);
|
|
- break;
|
|
|
|
- case '+': add(next('=') ? ADD_SET: (next('+') ? INC: ADD));
|
|
|
|
- break;
|
|
|
|
- case '-': add(next('=') ? SUB_SET: (next('-') ? DEC: SUB));
|
|
|
|
- break;
|
|
|
|
- case '!': add(next('=') ? NOT_EQUAL: INVERT);
|
|
|
|
- break;
|
|
|
|
- case '=': add(next('=') ? EQUAL: SET);
|
|
|
|
- break;
|
|
|
|
- case '*': add(next('=') ? MUL_SET: MUL);
|
|
|
|
- break;
|
|
|
|
- case '/': handleSlash();
|
|
|
|
- break;
|
|
|
|
- case '%': add(next('=') ? MOD_SET: MOD);
|
|
|
|
- break;
|
|
|
|
- case '&': add(next('=') ? BIT_AND_SET: (next('&') ? AND: BIT_AND));
|
|
|
|
- break;
|
|
|
|
- case '|': add(next('=') ? BIT_OR_SET: (next('|') ? OR: BIT_OR));
|
|
|
|
- break;
|
|
|
|
- case '^': add(next('=') ? BIT_XOR_SET: BIT_XOR);
|
|
|
|
- break;
|
|
|
|
- case '<': add('<', LEFT_SHIFT_SET, LEFT_SHIFT, LESS_EQUAL, LESS);
|
|
|
|
- break;
|
|
|
|
- case '>': add('>', RIGHT_SHIFT_SET, RIGHT_SHIFT, GREATER_EQUAL, GREATER);
|
|
|
|
- break;
|
|
|
|
- default: throw PreScriptException("unknown token " + c, line);
|
|
|
|
|
|
+ return false;
|
|
|
|
+ case '+': add(nextIf('=') ? ADD_SET: (nextIf('+') ? INC: ADD));
|
|
|
|
+ return false;
|
|
|
|
+ case '-': add(nextIf('=') ? SUB_SET: (nextIf('-') ? DEC: SUB));
|
|
|
|
+ return false;
|
|
|
|
+ case '!': add(nextIf('=') ? NOT_EQUAL: INVERT);
|
|
|
|
+ break;
|
|
|
|
+ case '=': add(nextIf('=') ? EQUAL: SET);
|
|
|
|
+ return false;
|
|
|
|
+ case '*': add(nextIf('=') ? MUL_SET: MUL);
|
|
|
|
+ return false;
|
|
|
|
+ case '/':
|
|
|
|
+ return handleSlash();
|
|
|
|
+ case '%': add(nextIf('=') ? MOD_SET: MOD);
|
|
|
|
+ return false;
|
|
|
|
+ case '&': add(nextIf('=') ? BIT_AND_SET: (nextIf('&') ? AND: BIT_AND));
|
|
|
|
+ return false;
|
|
|
|
+ case '|': add(nextIf('=') ? BIT_OR_SET: (nextIf('|') ? OR: BIT_OR));
|
|
|
|
+ return false;
|
|
|
|
+ case '^': add(nextIf('=') ? BIT_XOR_SET: BIT_XOR);
|
|
|
|
+ return false;
|
|
|
|
+ case '<': add(chooseTokenType('<', LEFT_SHIFT_SET, LEFT_SHIFT, LESS_EQUAL, LESS));
|
|
|
|
+ return false;
|
|
|
|
+ case '>': add(chooseTokenType('>', RIGHT_SHIFT_SET, RIGHT_SHIFT, GREATER_EQUAL, GREATER));
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
+ char buffer[5];
|
|
|
|
+ convertChar(c, buffer);
|
|
|
|
+ onError(std::string("unknown token '") + buffer + "'", line);
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
|
|
|
|
-static void handleChar(int c)
|
|
|
|
-{
|
|
|
|
- if(isLetter(c) || c == '_' || c == '.')
|
|
|
|
- {
|
|
|
|
- handleLiteral(c, TokenType::LITERAL);
|
|
|
|
- }
|
|
|
|
- else if(isDigit(c))
|
|
|
|
- {
|
|
|
|
- handleNumber(c);
|
|
|
|
- }
|
|
|
|
- else
|
|
|
|
- {
|
|
|
|
- handleSpecial(c);
|
|
|
|
|
|
+static bool handleChar(char32_t c) {
|
|
|
|
+ if(isValidNameStart(c)) {
|
|
|
|
+ return handleLiteral(c, TokenType::LITERAL);
|
|
|
|
+ } else if(isDigit(c)) {
|
|
|
|
+ return handleNumber(c);
|
|
}
|
|
}
|
|
|
|
+ return handleSpecial(c);
|
|
}
|
|
}
|
|
|
|
|
|
-void Tokenizer::tokenize(std::vector<Token>& inTokens, std::istream& inInput)
|
|
|
|
-{
|
|
|
|
|
|
+bool Tokenizer::tokenize(TokenStream& inTokens, i32stream& inInput) {
|
|
tokens = &inTokens;
|
|
tokens = &inTokens;
|
|
input = &inInput;
|
|
input = &inInput;
|
|
-
|
|
|
|
line = 1;
|
|
line = 1;
|
|
- buffer = -1;
|
|
|
|
- int c;
|
|
|
|
- while((c = next()) != -1)
|
|
|
|
- {
|
|
|
|
- handleChar(c);
|
|
|
|
|
|
+ buffer = 0;
|
|
|
|
+ char32_t c;
|
|
|
|
+ while(next(c)) {
|
|
|
|
+ if(handleChar(c)) {
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
add(EOF_TOKEN);
|
|
add(EOF_TOKEN);
|
|
|
|
+ return false;
|
|
}
|
|
}
|