Tokenizer.java 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. package me.hammerle.snuviscript.token;
  2. import java.io.InputStream;
  3. import java.util.ArrayList;
  4. import me.hammerle.snuviscript.exceptions.PreScriptException;
  5. import static me.hammerle.snuviscript.token.TokenType.*;
  6. public class Tokenizer
  7. {
  8. private StreamCharReader stream = null;
  9. private final ArrayList<Token> tokens = new ArrayList<>();
  10. private int line = 1;
  11. private int next()
  12. {
  13. return stream.readChar();
  14. }
  15. private int peek()
  16. {
  17. return stream.peekChar();
  18. }
  19. private boolean next(char c)
  20. {
  21. if(peek() == c)
  22. {
  23. next();
  24. return true;
  25. }
  26. return false;
  27. }
  28. private void add(TokenType type)
  29. {
  30. tokens.add(new Token(type, line));
  31. }
  32. private void add(TokenType type, Object data)
  33. {
  34. tokens.add(new DataToken(type, line, data));
  35. }
  36. private void add(char c, TokenType t1, TokenType t2, TokenType t3, TokenType t4)
  37. {
  38. int peek = peek();
  39. if(peek == c)
  40. {
  41. next();
  42. if(peek() == '=')
  43. {
  44. next();
  45. add(t1);
  46. }
  47. else
  48. {
  49. add(t2);
  50. }
  51. }
  52. else if(peek == '=')
  53. {
  54. next();
  55. add(t3);
  56. }
  57. else
  58. {
  59. add(t4);
  60. }
  61. }
  62. public Token[] tokenize(InputStream in)
  63. {
  64. stream = new StreamCharReader(in);
  65. tokens.clear();
  66. line = 1;
  67. int c;
  68. while((c = Tokenizer.this.next()) != -1)
  69. {
  70. handleChar(c);
  71. }
  72. return tokens.toArray(new Token[tokens.size()]);
  73. }
  74. private void handleChar(int c)
  75. {
  76. if(Character.isLetter(c) || c == '_')
  77. {
  78. handleLiteral(c, TokenType.LITERAL);
  79. }
  80. else if(Character.isDigit(c))
  81. {
  82. handleNumber(c);
  83. }
  84. else
  85. {
  86. handleSpecial(c);
  87. }
  88. }
  89. private void handleLiteral(int c, TokenType type)
  90. {
  91. StringBuilder sb = new StringBuilder();
  92. sb.append((char) c);
  93. while(true)
  94. {
  95. int data = peek();
  96. if(!Character.isLetterOrDigit(data) && data != '_')
  97. {
  98. break;
  99. }
  100. sb.append((char) data);
  101. next();
  102. }
  103. String s = sb.toString();
  104. switch(s)
  105. {
  106. case "if": add(IF); break;
  107. case "else": add(ELSE); break;
  108. case "elseif": add(ELSEIF); break;
  109. case "while": add(WHILE); break;
  110. case "try": add(TRY); break;
  111. case "catch": add(CATCH); break;
  112. case "for": add(FOR); break;
  113. case "function": add(FUNCTION); break;
  114. case "break": add(BREAK); break;
  115. case "continue": add(CONTINUE); break;
  116. case "return": add(RETURN); break;
  117. case "true": add(TRUE); break;
  118. case "false": add(FALSE); break;
  119. case "null": add(NULL); break;
  120. default: add(type, s);
  121. }
  122. }
  123. private void handleNumber(int c)
  124. {
  125. StringBuilder sb = new StringBuilder();
  126. sb.append((char) c);
  127. while(true)
  128. {
  129. int data = peek();
  130. if(!Character.isLetterOrDigit(data) && data != '.')
  131. {
  132. break;
  133. }
  134. next();
  135. sb.append((char) data);
  136. }
  137. add(NUMBER, Double.parseDouble(sb.toString()));
  138. }
  139. private void handleSpecial(int c)
  140. {
  141. switch(c)
  142. {
  143. case ' ':
  144. case '\t':
  145. case '\r': break;
  146. case '\n': line++; break;
  147. case '"': handleString(); break;
  148. case '(': add(OPEN_BRACKET); break;
  149. case ')': add(CLOSE_BRACKET); break;
  150. case '[': add(OPEN_SQUARE_BRACKET); break;
  151. case ']': add(CLOSE_SQUARE_BRACKET); break;
  152. case '{': add(OPEN_CURVED_BRACKET); break;
  153. case '}': add(CLOSE_CURVED_BRACKET); break;
  154. case '$': handleLiteral(c, LITERAL); break;
  155. case '@': handleLiteral(c, LABEL); break;
  156. case ';': add(SEMICOLON); break;
  157. case ',': add(COMMA); break;
  158. case '~': add(BIT_INVERT); break;
  159. case '+': add(next('=') ? ADD_SET : (next('+') ? INC : ADD)); break;
  160. case '-': add(next('=') ? SUB_SET : (next('-') ? DEC : SUB)); break;
  161. case '!': add(next('=') ? NOT_EQUAL : INVERT); break;
  162. case '=': add(next('=') ? EQUAL : SET); break;
  163. case '*': add(next('=') ? MUL_SET : MUL); break;
  164. case '/': handleSlash(); break;
  165. case '%': add(next('=') ? MOD_SET : MOD); break;
  166. case '&': add(next('=') ? BIT_AND_SET : (next('&') ? AND : BIT_AND)); break;
  167. case '|': add(next('=') ? BIT_OR_SET : (next('|') ? OR : BIT_OR)); break;
  168. case '^': add(next('=') ? BIT_XOR_SET : BIT_XOR); break;
  169. case '<': add('<', LEFT_SHIFT_SET, LEFT_SHIFT, LESS_EQUAL, LESS); break;
  170. case '>': add('>', RIGHT_SHIFT_SET, RIGHT_SHIFT, GREATER_EQUAL, GREATER); break;
  171. default: throw new PreScriptException("unknown token " + c, line);
  172. }
  173. }
  174. private void handleString()
  175. {
  176. StringBuilder sb = new StringBuilder();
  177. while(true)
  178. {
  179. int data = next();
  180. if(data == '"')
  181. {
  182. add(STRING, sb.toString());
  183. break;
  184. }
  185. if(data == '\n')
  186. {
  187. line++;
  188. }
  189. sb.append((char) data);
  190. }
  191. }
  192. private void handleSlash()
  193. {
  194. switch(peek())
  195. {
  196. case '/':
  197. next();
  198. handleOneLineComment();
  199. break;
  200. case '*':
  201. next();
  202. handleMultiLineComment();
  203. break;
  204. case '=':
  205. next();
  206. add(DIV_SET);
  207. break;
  208. default:
  209. add(DIV);
  210. }
  211. }
  212. private void handleOneLineComment()
  213. {
  214. while(true)
  215. {
  216. int data = next();
  217. if(data == -1 || data == '\n')
  218. {
  219. line++;
  220. break;
  221. }
  222. }
  223. }
  224. private void handleMultiLineComment()
  225. {
  226. int first;
  227. int sec = -1;
  228. while(true)
  229. {
  230. first = sec;
  231. sec = next();
  232. if(sec == -1 || (first == '*' && sec == '/'))
  233. {
  234. break;
  235. }
  236. if(sec == '\n')
  237. {
  238. line++;
  239. }
  240. }
  241. }
  242. }