|
@@ -1,182 +1,264 @@
|
|
package me.hammerle.snuviscript.token;
|
|
package me.hammerle.snuviscript.token;
|
|
|
|
|
|
-import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
|
import java.io.InputStream;
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
import me.hammerle.snuviscript.exceptions.PreScriptException;
|
|
import me.hammerle.snuviscript.exceptions.PreScriptException;
|
|
|
|
+import static me.hammerle.snuviscript.token.TokenType.*;
|
|
|
|
|
|
public class Tokenizer
|
|
public class Tokenizer
|
|
{
|
|
{
|
|
|
|
+ private StreamCharReader stream = null;
|
|
private final ArrayList<Token> tokens = new ArrayList<>();
|
|
private final ArrayList<Token> tokens = new ArrayList<>();
|
|
- private String code = "";
|
|
|
|
- private int index = 0;
|
|
|
|
- private int old = 0;
|
|
|
|
private int line = 1;
|
|
private int line = 1;
|
|
|
|
|
|
- private boolean isLiteralCharacter(char c)
|
|
+ private int next()
|
|
{
|
|
{
|
|
- return Character.isLetterOrDigit(c) || c == '_' || c == '.';
|
|
+ return stream.readChar();
|
|
}
|
|
}
|
|
|
|
|
|
- private char charAt(int index)
|
|
+ private int peek()
|
|
{
|
|
{
|
|
- if(index < 0 || index >= code.length())
|
|
+ return stream.peekChar();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private boolean next(char c)
|
|
|
|
+ {
|
|
|
|
+ if(peek() == c)
|
|
{
|
|
{
|
|
- return ' ';
|
|
+ next();
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
- return code.charAt(index);
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private void add(TokenType type)
|
|
|
|
+ {
|
|
|
|
+ tokens.add(new Token(type, line));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private void add(TokenType type, Object data)
|
|
|
|
+ {
|
|
|
|
+ tokens.add(new DataToken(type, line, data));
|
|
}
|
|
}
|
|
|
|
|
|
- private void add(int from, int to)
|
|
+ private void add(char c, TokenType t1, TokenType t2, TokenType t3, TokenType t4)
|
|
{
|
|
{
|
|
- String part = code.substring(from, to);
|
|
+ int peek = peek();
|
|
- try
|
|
+ if(peek == c)
|
|
|
|
+ {
|
|
|
|
+ next();
|
|
|
|
+ if(peek() == '=')
|
|
|
|
+ {
|
|
|
|
+ next();
|
|
|
|
+ add(t1);
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ add(t2);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ else if(peek == '=')
|
|
{
|
|
{
|
|
- double d = Double.parseDouble(part);
|
|
+ next();
|
|
- tokens.add(new DataToken(TokenType.NUMBER, line, d));
|
|
+ add(t3);
|
|
}
|
|
}
|
|
- catch(NumberFormatException ex)
|
|
+ else
|
|
{
|
|
{
|
|
- tokens.add(new DataToken(TokenType.LITERAL, line, part));
|
|
+ add(t4);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private void addIfNotEmpty(int from, int to)
|
|
+ public Token[] tokenize(InputStream in)
|
|
{
|
|
{
|
|
- if(to - from <= 0)
|
|
+ stream = new StreamCharReader(in);
|
|
|
|
+ tokens.clear();
|
|
|
|
+ line = 1;
|
|
|
|
+ int c;
|
|
|
|
+ while((c = Tokenizer.this.next()) != -1)
|
|
{
|
|
{
|
|
- return;
|
|
+ handleChar(c);
|
|
}
|
|
}
|
|
- add(from, to);
|
|
+ return tokens.toArray(new Token[tokens.size()]);
|
|
}
|
|
}
|
|
|
|
|
|
- private void skipOneLineComment()
|
|
+ private void handleChar(int c)
|
|
{
|
|
{
|
|
- while(index < code.length() && code.charAt(index) != '\n')
|
|
+ if(Character.isLetter(c) || c == '_')
|
|
|
|
+ {
|
|
|
|
+ handleLiteral(c, TokenType.LITERAL);
|
|
|
|
+ }
|
|
|
|
+ else if(Character.isDigit(c))
|
|
{
|
|
{
|
|
- index++;
|
|
+ handleNumber(c);
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ handleSpecial(c);
|
|
}
|
|
}
|
|
- line++;
|
|
|
|
- old = index + 1;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
- private void skipComment()
|
|
+ private void handleLiteral(int c, TokenType type)
|
|
{
|
|
{
|
|
- while(index < code.length())
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
+ sb.append((char) c);
|
|
|
|
+
|
|
|
|
+ while(true)
|
|
{
|
|
{
|
|
- if(code.charAt(index) == '\n')
|
|
+ int data = peek();
|
|
- {
|
|
+ if(!Character.isLetterOrDigit(data) && data != '_')
|
|
- line++;
|
|
|
|
- }
|
|
|
|
- else if(code.charAt(index) == '*' && charAt(index + 1) == '/')
|
|
|
|
{
|
|
{
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
- index++;
|
|
+ sb.append((char) data);
|
|
|
|
+ next();
|
|
}
|
|
}
|
|
- old = index + 2;
|
|
+
|
|
- index++;
|
|
+ String s = sb.toString();
|
|
|
|
+ switch(s)
|
|
|
|
+ {
|
|
|
|
+ case "if": add(IF); break;
|
|
|
|
+ case "else": add(ELSE); break;
|
|
|
|
+ case "elseif": add(ELSEIF); break;
|
|
|
|
+ case "while": add(WHILE); break;
|
|
|
|
+ case "try": add(TRY); break;
|
|
|
|
+ case "catch": add(CATCH); break;
|
|
|
|
+ case "for": add(FOR); break;
|
|
|
|
+ case "function": add(FUNCTION); break;
|
|
|
|
+ case "break": add(BREAK); break;
|
|
|
|
+ case "continue": add(CONTINUE); break;
|
|
|
|
+ case "return": add(RETURN); break;
|
|
|
|
+ case "true": add(TRUE); break;
|
|
|
|
+ case "false": add(FALSE); break;
|
|
|
|
+ case "null": add(NULL); break;
|
|
|
|
+ default: add(type, s);
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
|
|
- private void handleString()
|
|
+ private void handleNumber(int c)
|
|
{
|
|
{
|
|
- addIfNotEmpty(old, index);
|
|
+ StringBuilder sb = new StringBuilder();
|
|
- old = index + 1;
|
|
+ sb.append((char) c);
|
|
- index++;
|
|
+
|
|
- while(index < code.length() && code.charAt(index) != '"')
|
|
+ while(true)
|
|
{
|
|
{
|
|
- if(code.charAt(index) == '\n')
|
|
+ int data = peek();
|
|
|
|
+ if(!Character.isLetterOrDigit(data) && data != '.')
|
|
{
|
|
{
|
|
- line++;
|
|
+ break;
|
|
}
|
|
}
|
|
- index++;
|
|
+ next();
|
|
|
|
+ sb.append((char) data);
|
|
}
|
|
}
|
|
- add(old, index);
|
|
+
|
|
- old = index + 1;
|
|
+ add(NUMBER, Double.parseDouble(sb.toString()));
|
|
}
|
|
}
|
|
|
|
|
|
- private void handleNewLine()
|
|
+ private void handleSpecial(int c)
|
|
{
|
|
{
|
|
- addIfNotEmpty(old, index);
|
|
+ switch(c)
|
|
- old = index + 1;
|
|
+ {
|
|
- line++;
|
|
+ case ' ':
|
|
|
|
+ case '\t':
|
|
|
|
+ case '\r': break;
|
|
|
|
+ case '\n': line++; break;
|
|
|
|
+ case '"': handleString(); break;
|
|
|
|
+ case '(': add(OPEN_BRACKET); break;
|
|
|
|
+ case ')': add(CLOSE_BRACKET); break;
|
|
|
|
+ case '[': add(OPEN_SQUARE_BRACKET); break;
|
|
|
|
+ case ']': add(CLOSE_SQUARE_BRACKET); break;
|
|
|
|
+ case '{': add(OPEN_CURVED_BRACKET); break;
|
|
|
|
+ case '}': add(CLOSE_CURVED_BRACKET); break;
|
|
|
|
+ case '$': handleLiteral(c, LITERAL); break;
|
|
|
|
+ case '@': handleLiteral(c, LABEL); break;
|
|
|
|
+ case ';': add(SEMICOLON); break;
|
|
|
|
+ case ',': add(COMMA); break;
|
|
|
|
+ case '~': add(BIT_INVERT); break;
|
|
|
|
+ case '+': add(next('=') ? ADD_SET : (next('+') ? INC : ADD)); break;
|
|
|
|
+ case '-': add(next('=') ? SUB_SET : (next('-') ? DEC : SUB)); break;
|
|
|
|
+ case '!': add(next('=') ? NOT_EQUAL : INVERT); break;
|
|
|
|
+ case '=': add(next('=') ? EQUAL : SET); break;
|
|
|
|
+ case '*': add(next('=') ? MUL_SET : MUL); break;
|
|
|
|
+ case '/': handleSlash(); break;
|
|
|
|
+ case '%': add(next('=') ? MOD_SET : MOD); break;
|
|
|
|
+ case '&': add(next('=') ? BIT_AND_SET : (next('&') ? AND : BIT_AND)); break;
|
|
|
|
+ case '|': add(next('=') ? BIT_OR_SET : (next('|') ? OR : BIT_OR)); break;
|
|
|
|
+ case '^': add(next('=') ? BIT_XOR_SET : BIT_XOR); break;
|
|
|
|
+ case '<': add('<', LEFT_SHIFT_SET, LEFT_SHIFT, LESS_EQUAL, LESS); break;
|
|
|
|
+ case '>': add('>', RIGHT_SHIFT_SET, RIGHT_SHIFT, GREATER_EQUAL, GREATER); break;
|
|
|
|
+ default: throw new PreScriptException("unknown token " + c, line);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- private void handleSpace()
|
|
+ private void handleString()
|
|
{
|
|
{
|
|
- addIfNotEmpty(old, index);
|
|
+ StringBuilder sb = new StringBuilder();
|
|
- old = index + 1;
|
|
+ while(true)
|
|
|
|
+ {
|
|
|
|
+ int data = next();
|
|
|
|
+ if(data == '"')
|
|
|
|
+ {
|
|
|
|
+ add(STRING, sb.toString());
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ if(data == '\n')
|
|
|
|
+ {
|
|
|
|
+ line++;
|
|
|
|
+ }
|
|
|
|
+ sb.append((char) data);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- private void handleSpecialCharacter()
|
|
+ private void handleSlash()
|
|
{
|
|
{
|
|
- addIfNotEmpty(old, index);
|
|
+ switch(peek())
|
|
-
|
|
|
|
- TokenType type = TokenType.getMatching(code.charAt(index), charAt(index + 1), charAt(index + 2));
|
|
|
|
- switch(type)
|
|
|
|
{
|
|
{
|
|
- case UNKNOWN: throw new PreScriptException("unknown token", line);
|
|
+ case '/':
|
|
- case ONE_LINE_COMMENT:
|
|
+ next();
|
|
- skipOneLineComment();
|
|
+ handleOneLineComment();
|
|
break;
|
|
break;
|
|
- case COMMENT:
|
|
+ case '*':
|
|
- skipComment();
|
|
+ next();
|
|
|
|
+ handleMultiLineComment();
|
|
|
|
+ break;
|
|
|
|
+ case '=':
|
|
|
|
+ next();
|
|
|
|
+ add(DIV_SET);
|
|
break;
|
|
break;
|
|
default:
|
|
default:
|
|
- tokens.add(new Token(type, line));
|
|
+ add(DIV);
|
|
- old = index + type.getLength();
|
|
|
|
- index = old - 1;
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- public Token[] tokenize(InputStream in)
|
|
+ private void handleOneLineComment()
|
|
{
|
|
{
|
|
- code = readStream(in);
|
|
+ while(true)
|
|
- index = 0;
|
|
|
|
- old = 0;
|
|
|
|
- tokens.clear();
|
|
|
|
-
|
|
|
|
- for(; index < code.length(); index++)
|
|
|
|
{
|
|
{
|
|
- char c = code.charAt(index);
|
|
+ int data = next();
|
|
- if(isLiteralCharacter(c))
|
|
+ if(data == -1 || data == '\n')
|
|
{
|
|
{
|
|
- continue;
|
|
+ line++;
|
|
- }
|
|
+ break;
|
|
- switch(c)
|
|
|
|
- {
|
|
|
|
- case '\n': handleNewLine(); break;
|
|
|
|
- case '"': handleString(); break;
|
|
|
|
- case '\t':
|
|
|
|
- case ' ': handleSpace(); break;
|
|
|
|
- default: handleSpecialCharacter();
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- return tokens.toArray(new Token[tokens.size()]);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
- private String readStream(InputStream in)
|
|
+ private void handleMultiLineComment()
|
|
{
|
|
{
|
|
- try
|
|
+ int first;
|
|
|
|
+ int sec = -1;
|
|
|
|
+ while(true)
|
|
{
|
|
{
|
|
- int bufferSize = in.available();
|
|
+ first = sec;
|
|
- StringBuilder sb = new StringBuilder(bufferSize);
|
|
+ sec = next();
|
|
-
|
|
+ if(sec == -1 || (first == '*' && sec == '/'))
|
|
- while(in.available() > 0)
|
|
|
|
{
|
|
{
|
|
- int data = in.read();
|
|
+ break;
|
|
- if((data & 0x80) != 0)
|
|
+ }
|
|
- {
|
|
+ if(sec == '\n')
|
|
- data = ((data & 0x1F) << 6) | (in.read() & 0x3F);
|
|
+ {
|
|
- }
|
|
+ line++;
|
|
- sb.append((char) data);
|
|
|
|
}
|
|
}
|
|
-
|
|
|
|
- return sb.toString();
|
|
|
|
- }
|
|
|
|
- catch(IOException ex)
|
|
|
|
- {
|
|
|
|
- throw new PreScriptException("cannot read stream - " + ex.getMessage(), -1);
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|