#include "Tokenizer.h" #include "../Utils.h" #include "../Exception.h" Tokenizer::Tokenizer(string s) { data = s; line = 0; pos = 0; length = 0; } Tokenizer::Tokenizer(const Tokenizer& orig) { } Tokenizer::~Tokenizer() { } void Tokenizer::tokenize(TokenList& tokens, char c, TokenType type1, TokenType type2) { if(pos + 1 < length && data[pos + 1] == c) { tokens.add(new Token(type1, line)); pos++; } else { tokens.add(new Token(type2, line)); } } void Tokenizer::tokenize(TokenList& tokens, TokenType type1, char c2, TokenType type2, char c3, TokenType type3) { if(pos + 1 >= length) { tokens.add(new Token(type1, line)); } else if(data[pos + 1] == c2) { tokens.add(new Token(type2, line)); pos++; } else if(data[pos + 1] == c3) { tokens.add(new Token(type3, line)); pos++; } else { tokens.add(new Token(type1, line)); } } void Tokenizer::tokenize(TokenList& tokens, TokenType type1, char c2, char c3, TokenType type2, TokenType type3, char c4, TokenType type4) { if(pos + 1 >= length) { tokens.add(new Token(type1, line)); } else if(data[pos + 1] == c2) { if(pos + 2 < length && data[pos + 2] == c3) { tokens.add(new Token(type2, line)); pos += 2; } else { tokens.add(new Token(type3, line)); pos++; } } else if(data[pos + 1] == c4) { tokens.add(new Token(type4, line)); pos++; } else { tokens.add(new Token(type1, line)); } } void Tokenizer::tokenize(TokenList& tokens) { line = 1; pos = 0; length = data.size(); while(pos < length) { if(isLetter(data[pos])) { int old = pos; pos++; while(pos < length && isAllowedInName(data[pos])) { pos++; } string s = data.substr(old, pos - old); if(s == "if") {tokens.add(new Token(Tokens::IF, line));} else if(s == "elseif") {tokens.add(new Token(Tokens::ELSE_IF, line));} else if(s == "else") {tokens.add(new Token(Tokens::ELSE, line));} else if(s == "for") {tokens.add(new Token(Tokens::FOR, line));} else if(s == "while") {tokens.add(new Token(Tokens::WHILE, line));} else if(s == "function") {tokens.add(new Token(Tokens::FUNCTION, line));} else if(s == "break") {tokens.add(new Token(Tokens::BREAK, line));} else if(s == "continue") {tokens.add(new Token(Tokens::CONTINUE, line));} else if(s == "return") {tokens.add(new Token(Tokens::RETURN, line));} else if(s == "try") {tokens.add(new Token(Tokens::TRY, line));} else if(s == "catch") {tokens.add(new Token(Tokens::CATCH, line));} else if(s == "true") {tokens.add(new Token(Tokens::TRUE, line));} else if(s == "false") {tokens.add(new Token(Tokens::FALSE, line));} else if(s == "null") {tokens.add(new Token(Tokens::TNULL, line));} else { Token* t = new Token(Tokens::VAR, line); t->setString(s); tokens.add(t); } pos--; } else if(isDigit(data[pos])) { int old = pos; pos++; while(pos < length && isDigit(data[pos])) { pos++; } if(pos < length && data[pos] == '.') { pos++; while(pos < length && isDigit(data[pos])) { pos++; } } string s = data.substr(old, pos - old); try { float f = stof(s); Token* t = new Token(Tokens::FLOAT, line); t->setFloat(f); tokens.add(t); } catch(std::out_of_range ex) { throw Exception("invalid float", line); } catch(std::invalid_argument ex) { throw Exception("invalid float", line); } pos--; } else { switch(data[pos]) { case '@': { int old = pos; pos++; while(pos < length && isAllowedInName(data[pos])) { pos++; } string s = data.substr(old, pos - old); Token* t = new Token(Tokens::LABEL, line); t->setString(s); tokens.add(t); pos--; break; } case '"': { pos++; int old = pos; while(pos < length && data[pos] != '"') { pos++; } string s = data.substr(old, pos - old); Token* t = new Token(Tokens::TEXT, line); t->setString(s); tokens.add(t); break; } case '/': { if(pos + 1 >= length) { tokens.add(new Token(Tokens::DIV, line)); } else { switch(data[pos + 1]) { case '/': pos += 2; while(pos < length && data[pos] != '\n') { pos++; } pos--; break; case '*': pos += 2; while(pos + 1 < length && (data[pos] != '*' || data[pos + 1] != '/')) { if(data[pos] == '\n') { line++; } pos++; } pos++; break; case '=': tokens.add(new Token(Tokens::DIV_SET, line)); pos++; break; default: tokens.add(new Token(Tokens::DIV, line)); } } break; } case '<': tokenize(tokens, Tokens::LESS, '<', '=', Tokens::LEFT_SHIFT_SET, Tokens::LEFT_SHIFT, '=', Tokens::LESS_EQUAL); break; case '>': tokenize(tokens, Tokens::GREATER, '>', '=', Tokens::RIGHT_SHIFT_SET, Tokens::RIGHT_SHIFT, '=', Tokens::GREATER_EQUAL); break; case '&': tokenize(tokens, Tokens::BIT_AND, '&', Tokens::AND, '=', Tokens::BIT_AND_SET); break; case '|': tokenize(tokens, Tokens::BIT_OR, '|', Tokens::OR, '=', Tokens::BIT_OR_SET); break; case '+': tokenize(tokens, Tokens::ADD, '+', Tokens::INC, '=', Tokens::ADD_SET); break; case '-': tokenize(tokens, Tokens::SUB, '-', Tokens::DEC, '=', Tokens::SUB_SET); break; case '*': tokenize(tokens, '=', Tokens::MUL_SET, Tokens::MUL); break; case '\n': line++; break; case '!': tokenize(tokens, '=', Tokens::NOT_EQUAL, Tokens::INVERT); break; case '%': tokenize(tokens, '=', Tokens::MOD_SET, Tokens::MOD); break; case '=': tokenize(tokens, '=', Tokens::EQUAL, Tokens::SET); break; case '^': tokenize(tokens, '=', Tokens::BIT_XOR_SET, Tokens::BIT_XOR); break; case '~': tokens.add(new Token(Tokens::BIT_INVERT, line)); break; case ',': tokens.add(new Token(Tokens::COMMA, line)); break; case '(': tokens.add(new Token(Tokens::OPEN_BRACKET, line)); break; case ')': tokens.add(new Token(Tokens::CLOSE_BRACKET, line)); break; case '[': tokens.add(new Token(Tokens::OPEN_SQUARE_BRACKET, line)); break; case ']': tokens.add(new Token(Tokens::CLOSE_SQUARE_BRACKET, line)); break; case '{': tokens.add(new Token(Tokens::OPEN_CURVED_BRACKET, line)); break; case '}': tokens.add(new Token(Tokens::CLOSE_CURVED_BRACKET, line)); break; case ';': tokens.add(new Token(Tokens::SEMICOLON, line)); break; case '$': tokens.add(new Token(Tokens::GLOBAL, line)); break; case ' ': break; default: throw Exception(string("invalid token ") + data[pos], line); } } pos++; } tokens.add(new Token(Tokens::END_OF_FILE, line)); }