Compiler.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. #include "Compiler.h"
  2. #include <errno.h>
  3. #include <setjmp.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "Code.h"
  8. #include "Constants.h"
  9. static Error error = {};
  10. static FILE* file = nullptr;
  11. static size_t lineCounter = 0;
  12. static jmp_buf jumpPosition = {};
  13. static u8 tokens[MAX_TOKENS] = {};
  14. static size_t tokenReadIndex = 0;
  15. static size_t tokenWriteIndex = 0;
  16. typedef enum : u8 { LITERAL, INT64, DOUBLE, STRING, PLUS, NEWLINE, END } Token;
  17. #define THROW_ERROR(...) \
  18. snprintf(error.text, sizeof(error.text), __VA_ARGS__); \
  19. longjmp(jumpPosition, 1)
  20. #define CODE(command) \
  21. if(command) { \
  22. THROW_ERROR("Code overflow on line %zu", lineCounter); \
  23. }
  24. static void cleanup() {
  25. if(file != nullptr) {
  26. fclose(file);
  27. file = nullptr;
  28. }
  29. }
  30. static void reset() {
  31. error.text[0] = '\0';
  32. lineCounter = 0;
  33. tokenReadIndex = 0;
  34. tokenWriteIndex = 0;
  35. }
  36. static void addTokenN(const void* p, size_t n) {
  37. if(tokenWriteIndex + n > MAX_TOKENS) {
  38. THROW_ERROR("Line %zu has too much tokens", lineCounter);
  39. }
  40. memcpy(tokens + tokenWriteIndex, p, n);
  41. tokenWriteIndex += n;
  42. }
  43. static void addToken(Token t) {
  44. addTokenN(&t, sizeof(t));
  45. }
  46. static void addChar(char c) {
  47. addTokenN(&c, sizeof(c));
  48. }
  49. static bool isLetter(char c) {
  50. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
  51. }
  52. static bool isNumber(char c) {
  53. return c >= '0' && c <= '9';
  54. }
  55. static bool isAlphaNumeric(char c) {
  56. return isLetter(c) || isNumber(c);
  57. }
  58. [[noreturn]] static void invalidToken(char c, int line) {
  59. THROW_ERROR("Unexpected token '%c' on line %zu %d", c, lineCounter, line);
  60. }
  61. static bool isTokenEnd(char c) {
  62. return c == ' ' || c == '\0' || c == '\n';
  63. }
  64. static const char* tokenizeLiteral(const char* s) {
  65. addToken(LITERAL);
  66. addChar(*s);
  67. while(true) {
  68. char c = *(++s);
  69. if(isAlphaNumeric(c)) {
  70. addChar(c);
  71. } else if(isTokenEnd(c)) {
  72. break;
  73. } else {
  74. invalidToken(c, __LINE__);
  75. }
  76. }
  77. addChar('\0');
  78. return s;
  79. }
  80. [[noreturn]] static void invalidNumber() {
  81. THROW_ERROR("Invalid number on line %zu", lineCounter);
  82. }
  83. static const char* tokenizeNumber(const char* s) {
  84. size_t nIndex = 0;
  85. char number[64] = {};
  86. number[nIndex++] = *s;
  87. while(true) {
  88. char c = *(++s);
  89. if(isTokenEnd(c)) {
  90. break;
  91. } else if(!isNumber(c) && c != '.') {
  92. invalidToken(c, __LINE__);
  93. } else if(nIndex >= sizeof(number) - 1) {
  94. invalidNumber();
  95. }
  96. number[nIndex++] = c;
  97. }
  98. char* end = nullptr;
  99. errno = 0;
  100. i64 i = strtoll(number, &end, 10);
  101. if(errno != 0) {
  102. invalidNumber();
  103. } else if(*end == '\0') {
  104. addToken(INT64);
  105. addTokenN(&i, sizeof(i));
  106. return s;
  107. }
  108. double d = strtod(number, &end);
  109. if(errno != 0) {
  110. invalidNumber();
  111. } else if(*end != '\0') {
  112. invalidNumber();
  113. }
  114. addToken(DOUBLE);
  115. addTokenN(&d, sizeof(d));
  116. return s;
  117. }
  118. static const char* tokenizeString(const char* s) {
  119. addToken(STRING);
  120. while(true) {
  121. char c = *(++s);
  122. if(c == '\0') {
  123. THROW_ERROR("Unclosed string on line %zu", lineCounter);
  124. } else if(c == '"') {
  125. s++;
  126. break;
  127. }
  128. addChar(c);
  129. }
  130. addChar('\0');
  131. return s;
  132. }
  133. static void tokenizeLineString(const char* s) {
  134. while(true) {
  135. char c = *s;
  136. if(isLetter(c)) {
  137. s = tokenizeLiteral(s);
  138. } else if(isNumber(c)) {
  139. s = tokenizeNumber(s);
  140. } else if(c == '"') {
  141. s = tokenizeString(s);
  142. } else if(c == '\n') {
  143. addToken(NEWLINE);
  144. break;
  145. } else if(c == ' ') {
  146. s++;
  147. } else if(c == '+') {
  148. addToken(PLUS);
  149. s++;
  150. } else if(c == '\0') {
  151. break;
  152. } else {
  153. invalidToken(c, __LINE__);
  154. }
  155. }
  156. }
  157. static void tokenizeLine() {
  158. tokenReadIndex = 0;
  159. tokenWriteIndex = 0;
  160. lineCounter++;
  161. char line[256] = {};
  162. if(fgets(line, sizeof(line), file) == nullptr) {
  163. return;
  164. }
  165. char c = line[sizeof(line) - 2];
  166. if(c != '\n' && c != '\0') {
  167. THROW_ERROR("Line %zu is too long", lineCounter);
  168. }
  169. tokenizeLineString(line);
  170. }
  171. static Token peekToken() {
  172. if(tokenReadIndex < tokenWriteIndex) {
  173. return tokens[tokenReadIndex];
  174. }
  175. tokenizeLine();
  176. return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex] : END;
  177. }
  178. static Token nextToken() {
  179. if(tokenReadIndex < tokenWriteIndex) {
  180. return tokens[tokenReadIndex++];
  181. }
  182. tokenizeLine();
  183. return tokenReadIndex < tokenWriteIndex ? tokens[tokenReadIndex++] : END;
  184. }
  185. static const char* peekLiteral() {
  186. Token t = peekToken();
  187. if(t != LITERAL) {
  188. return nullptr;
  189. }
  190. return (char*)(tokens + tokenReadIndex + sizeof(Token));
  191. }
  192. static const char* readString() {
  193. if(tokenReadIndex >= tokenWriteIndex) {
  194. THROW_ERROR("readString on empty buffer, line %zu", lineCounter);
  195. }
  196. const char* c = (char*)(tokens + tokenReadIndex);
  197. while(tokenReadIndex < tokenWriteIndex && tokens[tokenReadIndex] != '\0') {
  198. tokenReadIndex++;
  199. }
  200. tokenReadIndex++;
  201. return c;
  202. }
  203. static i64 readInt64() {
  204. if(tokenReadIndex + sizeof(i64) >= tokenWriteIndex) {
  205. THROW_ERROR("readInt64 on empty buffer, line %zu", lineCounter);
  206. }
  207. i64 i = 0;
  208. memcpy(&i, tokens + tokenReadIndex, sizeof(i));
  209. tokenReadIndex += sizeof(i);
  210. return i;
  211. }
  212. static double readDouble() {
  213. if(tokenReadIndex + sizeof(double) >= tokenWriteIndex) {
  214. THROW_ERROR("readDouble on empty buffer, line %zu", lineCounter);
  215. }
  216. double d = 0;
  217. memcpy(&d, tokens + tokenReadIndex, sizeof(d));
  218. tokenReadIndex += sizeof(d);
  219. return d;
  220. }
  221. [[noreturn]] static void unexpectedToken(Token t) {
  222. switch(t) {
  223. case LITERAL:
  224. THROW_ERROR(
  225. "Unexpected literal(%s) on line %zu", readString(),
  226. lineCounter);
  227. break;
  228. case INT64:
  229. THROW_ERROR(
  230. "Unexpected int(%ld) on line %zu", readInt64(), lineCounter);
  231. break;
  232. case DOUBLE:
  233. THROW_ERROR(
  234. "Unexpected double(%lf) on line %zu", readDouble(),
  235. lineCounter);
  236. break;
  237. case STRING:
  238. THROW_ERROR(
  239. "Unexpected string(%s) on line %zu", readString(), lineCounter);
  240. break;
  241. case PLUS:
  242. THROW_ERROR("Unexpected plus on line %zu", lineCounter);
  243. break;
  244. case NEWLINE:
  245. THROW_ERROR("Unexpected newline on line %zu", lineCounter);
  246. break;
  247. case END: THROW_ERROR("Unexpected end on line %zu", lineCounter); break;
  248. }
  249. THROW_ERROR("Unexpected unknown token on line %zu", lineCounter);
  250. }
  251. static void consumeToken(Token t) {
  252. Token actual = nextToken();
  253. if(t != actual) {
  254. unexpectedToken(actual);
  255. }
  256. }
  257. static void consumeLiteral(const char* name) {
  258. consumeToken(LITERAL);
  259. const char* actual = readString();
  260. if(strcmp(actual, name) != 0) {
  261. THROW_ERROR("Unexpected literal(%s) on line %zu", actual, lineCounter);
  262. }
  263. }
  264. static void compileConstant() {
  265. Token t = nextToken();
  266. if(t == STRING) {
  267. CODE(codePushInstruction(PUSH_CONSTANT_STRING));
  268. CODE(codePushConstantString(readString()));
  269. } else if(t == INT64) {
  270. CODE(codePushInstruction(PUSH_INT64));
  271. CODE(codePushI64(readInt64()));
  272. } else {
  273. unexpectedToken(t);
  274. }
  275. }
  276. static void compileAdd() {
  277. compileConstant();
  278. while(peekToken() == PLUS) {
  279. nextToken();
  280. compileConstant();
  281. CODE(codePushInstruction(ADD));
  282. }
  283. }
  284. static void compileExpression() {
  285. compileAdd();
  286. }
  287. static void compileLine(Token t);
  288. static void compileIf() {
  289. compileExpression();
  290. CODE(codePushInstruction(JUMP_ON_0));
  291. size_t posIndex = codeGetWritePosition();
  292. CODE(codePushSize(0));
  293. consumeLiteral("then");
  294. consumeToken(NEWLINE);
  295. while(true) {
  296. const char* s = peekLiteral();
  297. if(s == nullptr || strcmp(s, "endif") != 0) {
  298. compileLine(nextToken());
  299. continue;
  300. } else {
  301. break;
  302. }
  303. }
  304. consumeLiteral("endif");
  305. consumeToken(NEWLINE);
  306. size_t endIndex = codeGetWritePosition();
  307. codeSetWritePosition(posIndex);
  308. CODE(codePushSize(endIndex));
  309. codeSetWritePosition(endIndex);
  310. }
  311. static void compileLine(Token t) {
  312. if(t == NEWLINE) {
  313. return;
  314. }
  315. if(t != LITERAL) {
  316. unexpectedToken(t);
  317. }
  318. const char* s = readString();
  319. if(strcmp(s, "print") == 0) {
  320. while(peekToken() != NEWLINE) {
  321. compileExpression();
  322. CODE(codePushInstruction(PRINT));
  323. }
  324. nextToken();
  325. CODE(codePushInstruction(PRINT_NEWLINE));
  326. } else if(strcmp(s, "if") == 0) {
  327. compileIf();
  328. } else {
  329. THROW_ERROR("Unexpected literal(%s) on line %zu", s, lineCounter);
  330. }
  331. }
  332. static void parseTokens() {
  333. codeReset();
  334. while(true) {
  335. Token t = nextToken();
  336. if(t == END) {
  337. break;
  338. }
  339. compileLine(t);
  340. }
  341. }
  342. const Error* compileFile(const char* path) {
  343. reset();
  344. if(setjmp(jumpPosition)) {
  345. cleanup();
  346. return &error;
  347. }
  348. file = fopen(path, "r");
  349. if(file == nullptr) {
  350. THROW_ERROR("Cannot read file '%s'", path);
  351. } else {
  352. parseTokens();
  353. }
  354. cleanup();
  355. return &error;
  356. }