|
@@ -1,328 +0,0 @@
|
|
|
-#include <iostream>
|
|
|
-#include <fstream>
|
|
|
-
|
|
|
-#include "tokenizer/Tokenizer.h"
|
|
|
-#include "utils/String.h"
|
|
|
-
|
|
|
-static void onError(const String& message, unsigned int line) {
|
|
|
- std::cout << message << " Line: " << line << "\n";
|
|
|
-}
|
|
|
-
|
|
|
-static bool isLetter(char32_t c) {
|
|
|
- return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
-}
|
|
|
-
|
|
|
-static bool isDigit(char32_t c) {
|
|
|
- return c >= '0' && c <= '9';
|
|
|
-}
|
|
|
-
|
|
|
-static bool isValidNameStart(char32_t c) {
|
|
|
- return isLetter(c) || c == '.' || c == '_';
|
|
|
-}
|
|
|
-
|
|
|
-static bool isValidNamePart(char32_t c) {
|
|
|
- return isDigit(c) || isValidNameStart(c);
|
|
|
-}
|
|
|
-
|
|
|
-class Data {
|
|
|
-public:
|
|
|
-
|
|
|
- Data(const char* inputPath, TokenStream& tokens) : tokens(tokens) {
|
|
|
- stream.open(inputPath);
|
|
|
- }
|
|
|
-
|
|
|
- bool hasFileError() {
|
|
|
- return !stream.good();
|
|
|
- }
|
|
|
-
|
|
|
- bool next(char32_t& c) {
|
|
|
- if(buffer != 0) {
|
|
|
- c = buffer;
|
|
|
- buffer = 0;
|
|
|
- return true;
|
|
|
- }
|
|
|
- c = stream.get();
|
|
|
- return stream.good();
|
|
|
- }
|
|
|
-
|
|
|
- bool peek(char32_t& c) {
|
|
|
- if(buffer != 0 || next(buffer)) {
|
|
|
- c = buffer;
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- bool nextIf(char32_t c) {
|
|
|
- char32_t nextChar;
|
|
|
- if(peek(nextChar) && c == nextChar) {
|
|
|
- next(nextChar);
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- void addToken(Token token) {
|
|
|
- tokens.add(token, line);
|
|
|
- }
|
|
|
-
|
|
|
- void addToken(Token token, const char* text) {
|
|
|
- tokens.add(token, line, text);
|
|
|
- }
|
|
|
-
|
|
|
- Token chooseToken(char c, Token aCharEqual, Token aChar, Token aEqual, Token other) {
|
|
|
- if(nextIf(c)) {
|
|
|
- if(nextIf('=')) {
|
|
|
- return aCharEqual;
|
|
|
- }
|
|
|
- return aChar;
|
|
|
- } else if(nextIf('=')) {
|
|
|
- return aEqual;
|
|
|
- }
|
|
|
- return other;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleLiteral(char32_t c, Token token) {
|
|
|
- String s;
|
|
|
- s += (char) c;
|
|
|
- while(true) {
|
|
|
- if(s.isFull()) {
|
|
|
- onError("string buffer to small", line);
|
|
|
- return true;
|
|
|
- }
|
|
|
- char32_t data;
|
|
|
- if(!peek(data) || !isValidNamePart(data)) {
|
|
|
- break;
|
|
|
- }
|
|
|
- s += (char) data;
|
|
|
- next(data);
|
|
|
- }
|
|
|
- if(s == "if") {
|
|
|
- addToken(Token::IF);
|
|
|
- } else if(s == "else") {
|
|
|
- addToken(Token::ELSE);
|
|
|
- } else if(s == "elseif") {
|
|
|
- addToken(Token::ELSEIF);
|
|
|
- } else if(s == "while") {
|
|
|
- addToken(Token::WHILE);
|
|
|
- } else if(s == "try") {
|
|
|
- addToken(Token::TRY);
|
|
|
- } else if(s == "catch") {
|
|
|
- addToken(Token::CATCH);
|
|
|
- } else if(s == "for") {
|
|
|
- addToken(Token::FOR);
|
|
|
- } else if(s == "function") {
|
|
|
- addToken(Token::FUNCTION);
|
|
|
- } else if(s == "break") {
|
|
|
- addToken(Token::BREAK);
|
|
|
- } else if(s == "continue") {
|
|
|
- addToken(Token::CONTINUE);
|
|
|
- } else if(s == "return") {
|
|
|
- addToken(Token::RETURN);
|
|
|
- } else if(s == "true") {
|
|
|
- addToken(Token::TRUE);
|
|
|
- } else if(s == "false") {
|
|
|
- addToken(Token::FALSE);
|
|
|
- } else if(s == "null") {
|
|
|
- addToken(Token::NULL_TOKEN);
|
|
|
- } else {
|
|
|
- addToken(token, s);
|
|
|
- }
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleNumber(char32_t c) {
|
|
|
- double number = c - '0';
|
|
|
- char32_t data;
|
|
|
- while(peek(data)) {
|
|
|
- if(!isDigit(data)) {
|
|
|
- if(data != '.') {
|
|
|
- break;
|
|
|
- }
|
|
|
- next(data);
|
|
|
- double factor = 10;
|
|
|
- while(peek(data) && isDigit(data)) {
|
|
|
- number += (data - '0') / factor;
|
|
|
- factor *= 10;
|
|
|
- next(data);
|
|
|
- }
|
|
|
- break;
|
|
|
- }
|
|
|
- number = (number * 10) + (data - '0');
|
|
|
- next(data);
|
|
|
- }
|
|
|
- tokens.add(Token::NUMBER, line, number);
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleString() {
|
|
|
- String s;
|
|
|
- unsigned int oldLine = line;
|
|
|
- while(!s.isFull()) {
|
|
|
- char32_t data;
|
|
|
- if(!next(data)) {
|
|
|
- onError("non closed string literal", oldLine);
|
|
|
- return true;
|
|
|
- }
|
|
|
- if(data == '"') {
|
|
|
- addToken(Token::STRING, s);
|
|
|
- return false;
|
|
|
- }
|
|
|
- if(data == '\n') {
|
|
|
- line++;
|
|
|
- }
|
|
|
- if(data == '\\') {
|
|
|
- char32_t escape;
|
|
|
- if(!next(escape)) {
|
|
|
- onError("missing escaped character", line);
|
|
|
- return true;
|
|
|
- }
|
|
|
- switch(escape) {
|
|
|
- case 'n': data = '\n';
|
|
|
- break;
|
|
|
- case '\\': data = '\\';
|
|
|
- break;
|
|
|
- case '"': data = '"';
|
|
|
- break;
|
|
|
- default:
|
|
|
- onError("invalid escaped character", line);
|
|
|
- return true;
|
|
|
- }
|
|
|
- }
|
|
|
- s += data;
|
|
|
- }
|
|
|
- onError("string buffer to small", line);
|
|
|
- return true;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleOneLineComment() {
|
|
|
- char32_t data;
|
|
|
- while(next(data) && data != '\n');
|
|
|
- line++;
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleMultiLineComment() {
|
|
|
- char32_t first;
|
|
|
- char32_t sec = 0;
|
|
|
- unsigned int oldLine = line;
|
|
|
- while(true) {
|
|
|
- first = sec;
|
|
|
- if(!next(sec)) {
|
|
|
- onError("unclosed multiline comment", oldLine);
|
|
|
- return true;
|
|
|
- }
|
|
|
- if(first == '*' && sec == '/') {
|
|
|
- return false;
|
|
|
- }
|
|
|
- line += (sec == '\n');
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- bool handleSlash() {
|
|
|
- if(nextIf('/')) {
|
|
|
- return handleOneLineComment();
|
|
|
- } else if(nextIf('*')) {
|
|
|
- return handleMultiLineComment();
|
|
|
- } else if(nextIf('=')) {
|
|
|
- addToken(Token::DIV_SET);
|
|
|
- return false;
|
|
|
- }
|
|
|
- addToken(Token::DIV);
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleSpecial(char32_t c) {
|
|
|
- switch(c) {
|
|
|
- case ' ':
|
|
|
- case '\t':
|
|
|
- case '\r':
|
|
|
- return false;
|
|
|
- case '\n': line++;
|
|
|
- return false;
|
|
|
- case '"':
|
|
|
- return handleString();
|
|
|
- case '(': addToken(Token::OPEN_BRACKET);
|
|
|
- return false;
|
|
|
- case ')': addToken(Token::CLOSE_BRACKET);
|
|
|
- return false;
|
|
|
- case '[': addToken(Token::OPEN_SQUARE_BRACKET);
|
|
|
- return false;
|
|
|
- case ']': addToken(Token::CLOSE_SQUARE_BRACKET);
|
|
|
- return false;
|
|
|
- case '{': addToken(Token::OPEN_CURVED_BRACKET);
|
|
|
- return false;
|
|
|
- case '}': addToken(Token::CLOSE_CURVED_BRACKET);
|
|
|
- return false;
|
|
|
- case '$':
|
|
|
- return handleLiteral(c, Token::LITERAL);
|
|
|
- case '@':
|
|
|
- return handleLiteral(c, Token::LABEL);
|
|
|
- case ';': addToken(Token::SEMICOLON);
|
|
|
- return false;
|
|
|
- case ',': addToken(Token::COMMA);
|
|
|
- return false;
|
|
|
- case '~': addToken(Token::BIT_INVERT);
|
|
|
- return false;
|
|
|
- case '+': addToken(nextIf('=') ? Token::ADD_SET: (nextIf('+') ? Token::INC: Token::ADD));
|
|
|
- return false;
|
|
|
- case '-': addToken(nextIf('=') ? Token::SUB_SET: (nextIf('-') ? Token::DEC: Token::SUB));
|
|
|
- return false;
|
|
|
- case '!': addToken(nextIf('=') ? Token::NOT_EQUAL: Token::INVERT);
|
|
|
- break;
|
|
|
- case '=': addToken(nextIf('=') ? Token::EQUAL: Token::SET);
|
|
|
- return false;
|
|
|
- case '*': addToken(nextIf('=') ? Token::MUL_SET: Token::MUL);
|
|
|
- return false;
|
|
|
- case '/':
|
|
|
- return handleSlash();
|
|
|
- case '%': addToken(nextIf('=') ? Token::MOD_SET: Token::MOD);
|
|
|
- return false;
|
|
|
- case '&': addToken(nextIf('=') ? Token::BIT_AND_SET: (nextIf('&') ? Token::AND: Token::BIT_AND));
|
|
|
- return false;
|
|
|
- case '|': addToken(nextIf('=') ? Token::BIT_OR_SET: (nextIf('|') ? Token::OR: Token::BIT_OR));
|
|
|
- return false;
|
|
|
- case '^': addToken(nextIf('=') ? Token::BIT_XOR_SET: Token::BIT_XOR);
|
|
|
- return false;
|
|
|
- case '<': addToken(chooseToken('<', Token::LEFT_SHIFT_SET, Token::LEFT_SHIFT, Token::LESS_EQUAL, Token::LESS));
|
|
|
- return false;
|
|
|
- case '>': addToken(chooseToken('>', Token::RIGHT_SHIFT_SET, Token::RIGHT_SHIFT, Token::GREATER_EQUAL, Token::GREATER));
|
|
|
- return false;
|
|
|
- }
|
|
|
- String s("unknown token '");
|
|
|
- s += c;
|
|
|
- s += '\'';
|
|
|
- onError(s, line);
|
|
|
- return true;
|
|
|
- }
|
|
|
-
|
|
|
- bool handleChar(char32_t c) {
|
|
|
- if(isValidNameStart(c)) {
|
|
|
- return handleLiteral(c, Token::LITERAL);
|
|
|
- } else if(isDigit(c)) {
|
|
|
- return handleNumber(c);
|
|
|
- }
|
|
|
- return handleSpecial(c);
|
|
|
- }
|
|
|
-
|
|
|
-private:
|
|
|
- std::basic_ifstream<char32_t> stream;
|
|
|
- TokenStream& tokens;
|
|
|
- unsigned int line = 1;
|
|
|
- char32_t buffer = 0;
|
|
|
-};
|
|
|
-
|
|
|
-bool Tokenizer::tokenize(TokenStream& tokenStream, const char* inputPath) {
|
|
|
- Data d(inputPath, tokenStream);
|
|
|
- if(d.hasFileError()) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- char32_t c;
|
|
|
- while(d.next(c)) {
|
|
|
- if(d.handleChar(c)) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- }
|
|
|
- d.addToken(Token::EOF_TOKEN);
|
|
|
- return false;
|
|
|
-}
|