Tokenizer.java 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. package me.hammerle.snuviscript.tokenizer;
  2. import java.io.InputStream;
  3. import java.util.ArrayList;
  4. import me.hammerle.snuviscript.exceptions.PreScriptException;
  5. import static me.hammerle.snuviscript.tokenizer.TokenType.*;
  6. public class Tokenizer
  7. {
  8. private StreamCharReader stream = null;
  9. private final ArrayList<Token> tokens = new ArrayList<>();
  10. private int line = 1;
  11. private int next()
  12. {
  13. return stream.readChar();
  14. }
  15. private int peek()
  16. {
  17. return stream.peekChar();
  18. }
  19. private boolean next(char c)
  20. {
  21. if(peek() == c)
  22. {
  23. next();
  24. return true;
  25. }
  26. return false;
  27. }
  28. private void add(TokenType type)
  29. {
  30. tokens.add(new Token(type, line));
  31. }
  32. private void add(TokenType type, Object data)
  33. {
  34. tokens.add(new DataToken(type, line, data));
  35. }
  36. private void add(char c, TokenType t1, TokenType t2, TokenType t3, TokenType t4)
  37. {
  38. int peek = peek();
  39. if(peek == c)
  40. {
  41. next();
  42. if(peek() == '=')
  43. {
  44. next();
  45. add(t1);
  46. }
  47. else
  48. {
  49. add(t2);
  50. }
  51. }
  52. else if(peek == '=')
  53. {
  54. next();
  55. add(t3);
  56. }
  57. else
  58. {
  59. add(t4);
  60. }
  61. }
  62. public Token[] tokenize(InputStream... streams)
  63. {
  64. tokens.clear();
  65. for(InputStream in : streams)
  66. {
  67. line = 1;
  68. stream = new StreamCharReader(in);
  69. int c;
  70. while((c = Tokenizer.this.next()) != -1)
  71. {
  72. handleChar(c);
  73. }
  74. }
  75. add(EOF);
  76. return tokens.toArray(new Token[tokens.size()]);
  77. }
  78. private void handleChar(int c)
  79. {
  80. if(Character.isLetter(c) || c == '_' || c == '.')
  81. {
  82. handleLiteral(c, TokenType.LITERAL);
  83. }
  84. else if(Character.isDigit(c))
  85. {
  86. handleNumber(c);
  87. }
  88. else
  89. {
  90. handleSpecial(c);
  91. }
  92. }
  93. private void handleLiteral(int c, TokenType type)
  94. {
  95. StringBuilder sb = new StringBuilder();
  96. sb.append((char) c);
  97. while(true)
  98. {
  99. int data = peek();
  100. if(!Character.isLetterOrDigit(data) && data != '_' && data != '.')
  101. {
  102. break;
  103. }
  104. sb.append((char) data);
  105. next();
  106. }
  107. String s = sb.toString();
  108. switch(s)
  109. {
  110. case "if": add(IF); break;
  111. case "else": add(ELSE); break;
  112. case "elseif": add(ELSEIF); break;
  113. case "while": add(WHILE); break;
  114. case "try": add(TRY); break;
  115. case "catch": add(CATCH); break;
  116. case "for": add(FOR); break;
  117. case "function": add(FUNCTION); break;
  118. case "break": add(BREAK); break;
  119. case "continue": add(CONTINUE); break;
  120. case "return": add(RETURN); break;
  121. case "true": add(TRUE); break;
  122. case "false": add(FALSE); break;
  123. case "null": add(NULL); break;
  124. default: add(type, s);
  125. }
  126. }
  127. private void handleNumber(int c)
  128. {
  129. StringBuilder sb = new StringBuilder();
  130. sb.append((char) c);
  131. while(true)
  132. {
  133. int data = peek();
  134. if(!Character.isLetterOrDigit(data) && data != '.')
  135. {
  136. break;
  137. }
  138. next();
  139. sb.append((char) data);
  140. }
  141. add(NUMBER, Double.parseDouble(sb.toString()));
  142. }
  143. private void handleSpecial(int c)
  144. {
  145. switch(c)
  146. {
  147. case ' ':
  148. case '\t':
  149. case '\r': break;
  150. case '\n': line++; break;
  151. case '"': handleString(); break;
  152. case '(': add(OPEN_BRACKET); break;
  153. case ')': add(CLOSE_BRACKET); break;
  154. case '[': add(OPEN_SQUARE_BRACKET); break;
  155. case ']': add(CLOSE_SQUARE_BRACKET); break;
  156. case '{': add(OPEN_CURVED_BRACKET); break;
  157. case '}': add(CLOSE_CURVED_BRACKET); break;
  158. case '$': handleLiteral(c, LITERAL); break;
  159. case '@': handleLiteral(c, LABEL); break;
  160. case ';': add(SEMICOLON); break;
  161. case ',': add(COMMA); break;
  162. case '~': add(BIT_INVERT); break;
  163. case '+': add(next('=') ? ADD_SET : (next('+') ? INC : ADD)); break;
  164. case '-': add(next('=') ? SUB_SET : (next('-') ? DEC : SUB)); break;
  165. case '!': add(next('=') ? NOT_EQUAL : INVERT); break;
  166. case '=': add(next('=') ? EQUAL : SET); break;
  167. case '*': add(next('=') ? MUL_SET : MUL); break;
  168. case '/': handleSlash(); break;
  169. case '%': add(next('=') ? MOD_SET : MOD); break;
  170. case '&': add(next('=') ? BIT_AND_SET : (next('&') ? AND : BIT_AND)); break;
  171. case '|': add(next('=') ? BIT_OR_SET : (next('|') ? OR : BIT_OR)); break;
  172. case '^': add(next('=') ? BIT_XOR_SET : BIT_XOR); break;
  173. case '<': add('<', LEFT_SHIFT_SET, LEFT_SHIFT, LESS_EQUAL, LESS); break;
  174. case '>': add('>', RIGHT_SHIFT_SET, RIGHT_SHIFT, GREATER_EQUAL, GREATER); break;
  175. default: throw new PreScriptException("unknown token " + c, line);
  176. }
  177. }
  178. private void handleString()
  179. {
  180. StringBuilder sb = new StringBuilder();
  181. while(true)
  182. {
  183. int data = next();
  184. if(data == '"')
  185. {
  186. add(STRING, sb.toString());
  187. break;
  188. }
  189. if(data == '\n')
  190. {
  191. line++;
  192. }
  193. sb.append((char) data);
  194. }
  195. }
  196. private void handleSlash()
  197. {
  198. switch(peek())
  199. {
  200. case '/':
  201. next();
  202. handleOneLineComment();
  203. break;
  204. case '*':
  205. next();
  206. handleMultiLineComment();
  207. break;
  208. case '=':
  209. next();
  210. add(DIV_SET);
  211. break;
  212. default:
  213. add(DIV);
  214. }
  215. }
  216. private void handleOneLineComment()
  217. {
  218. while(true)
  219. {
  220. int data = next();
  221. if(data == -1 || data == '\n')
  222. {
  223. line++;
  224. break;
  225. }
  226. }
  227. }
  228. private void handleMultiLineComment()
  229. {
  230. int first;
  231. int sec = -1;
  232. while(true)
  233. {
  234. first = sec;
  235. sec = next();
  236. if(sec == -1 || (first == '*' && sec == '/'))
  237. {
  238. break;
  239. }
  240. if(sec == '\n')
  241. {
  242. line++;
  243. }
  244. }
  245. }
  246. }