Compiler.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. #include "Compiler.h"
  2. #include <setjmp.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "Code.h"
  7. #include "Memory.h"
  8. typedef struct Variable Variable;
  9. struct Variable {
  10. Variable* next;
  11. i32 index;
  12. char name[];
  13. };
  14. typedef struct Function Function;
  15. struct Function {
  16. Function* next;
  17. i32 address;
  18. i32 valueAddress;
  19. i32 arguments;
  20. char name[];
  21. };
  22. typedef struct Return Return;
  23. struct Return {
  24. Return* next;
  25. i32 address;
  26. bool hasArgument;
  27. };
  28. typedef struct {
  29. Tokenizer* tokenizer;
  30. Code* code;
  31. Error error;
  32. int line;
  33. jmp_buf jump;
  34. Variable* variables;
  35. Function* functions;
  36. Return* returns;
  37. bool inFunction;
  38. } Context;
  39. #define THROW_ERROR(format, ...) \
  40. snprintf( \
  41. c->error.text, sizeof(c->error.text), "Line %d | " format, \
  42. c->line __VA_OPT__(, ) __VA_ARGS__); \
  43. longjmp(c->jump, 1)
  44. [[noreturn]] static void throwOutOfCodeMemory(Context* c) {
  45. THROW_ERROR("Too less memory for code");
  46. }
  47. static void codePushInstruction(Context* c, Instruction i) {
  48. if(bufferWriteU8(&c->code->code, i)) {
  49. throwOutOfCodeMemory(c);
  50. }
  51. }
  52. static void codePushI32(Context* c, i32 i) {
  53. if(bufferWriteI32(&c->code->code, i)) {
  54. throwOutOfCodeMemory(c);
  55. }
  56. }
  57. static void codePushConstantString(Context* c, const char* s) {
  58. if(bufferWriteString(&c->code->code, s)) {
  59. throwOutOfCodeMemory(c);
  60. }
  61. }
  62. static size_t codeGetWritePosition(const Context* c) {
  63. return c->code->code.writeIndex;
  64. }
  65. static size_t codePushInstructionI32(Context* c, Instruction i, i32 v) {
  66. codePushInstruction(c, i);
  67. size_t pos = codeGetWritePosition(c);
  68. codePushI32(c, v);
  69. return pos;
  70. }
  71. static void codeRewriteI32(Context* c, size_t pos, i32 i) {
  72. size_t oldPos = codeGetWritePosition(c);
  73. c->code->code.writeIndex = pos;
  74. codePushI32(c, i);
  75. c->code->code.writeIndex = oldPos;
  76. }
  77. static bool hasLocalVar(const Context* c) {
  78. return c->variables != nullptr && c->variables->index < 0;
  79. }
  80. static i32 addVariable(Context* c, const char* name, bool forceGlobal) {
  81. for(Variable* v = c->variables; v != nullptr; v = v->next) {
  82. if(!forceGlobal && c->inFunction && v->index >= 0) {
  83. break;
  84. }
  85. if(strcmp(v->name, name) == 0) {
  86. return v->index;
  87. }
  88. }
  89. size_t l = strlen(name) + 1;
  90. Variable* v = memoryAllocate(sizeof(Variable) + l);
  91. if(v == nullptr) {
  92. THROW_ERROR("Too less memory for variables");
  93. }
  94. v->next = c->variables;
  95. if(c->inFunction) {
  96. v->index = hasLocalVar(c) ? c->variables->index - 1 : -1;
  97. } else {
  98. v->index = c->variables != nullptr ? c->variables->index + 1 : 0;
  99. }
  100. memcpy(v->name, name, l);
  101. c->variables = v;
  102. return v->index;
  103. }
  104. static void addRawFunction(
  105. Context* c, const char* name, i32 address, i32 valueAddress,
  106. i32 arguments) {
  107. size_t l = strlen(name) + 1;
  108. Function* f = memoryAllocate(sizeof(Function) + l);
  109. if(f == nullptr) {
  110. THROW_ERROR("Too less memory for functions");
  111. }
  112. f->next = c->functions;
  113. f->address = address;
  114. f->valueAddress = valueAddress;
  115. f->arguments = arguments;
  116. memcpy(f->name, name, l);
  117. c->functions = f;
  118. }
  119. static void addFunction(
  120. Context* c, const char* name, i32 address, i32 arguments) {
  121. for(Function* f = c->functions; f != nullptr; f = f->next) {
  122. if(strcmp(f->name, name) != 0) {
  123. continue;
  124. } else if(f->address >= 0) {
  125. THROW_ERROR("Function '%s' registered again", name);
  126. } else if(f->arguments != arguments) {
  127. THROW_ERROR("Function '%s' with inconsistent arguments", name);
  128. }
  129. f->address = address;
  130. codeRewriteI32(c, (size_t)f->valueAddress, address);
  131. }
  132. addRawFunction(c, name, address, -1, arguments);
  133. }
  134. static i32 addCallFunction(
  135. Context* c, const char* name, i32 address, i32 arguments) {
  136. for(Function* f = c->functions; f != nullptr; f = f->next) {
  137. if(strcmp(f->name, name) == 0 && f->address >= 0) {
  138. if(f->arguments != arguments) {
  139. THROW_ERROR("Function '%s' with inconsistent arguments", name);
  140. }
  141. return f->address;
  142. }
  143. }
  144. addRawFunction(c, name, -1, address, arguments);
  145. return -1;
  146. }
  147. static void checkForInvalidFunctions(Context* c) {
  148. for(Function* f = c->functions; f != nullptr; f = f->next) {
  149. if(f->address < 0) {
  150. THROW_ERROR("Unmapped function call '%s'", f->name);
  151. }
  152. }
  153. }
  154. static void addReturn(Context* c, i32 address, bool hasArgument) {
  155. if(c->returns != nullptr && c->returns->hasArgument != hasArgument) {
  156. THROW_ERROR("Inconsistent returns in function");
  157. }
  158. Return* r = memoryAllocate(sizeof(Return));
  159. if(r == nullptr) {
  160. THROW_ERROR("Too less memory for returns");
  161. }
  162. r->next = c->returns;
  163. r->hasArgument = hasArgument;
  164. r->address = address;
  165. c->returns = r;
  166. }
  167. #define CLEAN_LIST(Type, field) \
  168. do { \
  169. Type* v = c->field; \
  170. while(v != nullptr) { \
  171. Type* next = v->next; \
  172. memoryFree(v); \
  173. v = next; \
  174. } \
  175. c->field = nullptr; \
  176. } while(false)
  177. static void cleanReturns(Context* c) {
  178. CLEAN_LIST(Return, returns);
  179. }
  180. static void cleanContext(Context* c) {
  181. CLEAN_LIST(Variable, variables);
  182. CLEAN_LIST(Function, functions);
  183. cleanReturns(c);
  184. }
  185. [[noreturn]] static void unexpectedToken(Context* c, Token token, int line) {
  186. char buffer[128];
  187. tokenizerPrintToken(&token, buffer, sizeof(buffer));
  188. THROW_ERROR("Unexpected %s token %d", buffer, line);
  189. }
  190. static Token consumeToken(Context* c, TokenType type) {
  191. Token actual = tokenizerNext(c->tokenizer);
  192. if(actual.type != type) {
  193. unexpectedToken(c, actual, __LINE__);
  194. }
  195. return actual;
  196. }
  197. static bool peekToken(Context* c, TokenType type) {
  198. return tokenizerPeek(c->tokenizer).type == type;
  199. }
  200. static bool consumeTokenIf(Context* c, TokenType type) {
  201. if(peekToken(c, type)) {
  202. consumeToken(c, type);
  203. return true;
  204. }
  205. return false;
  206. }
  207. static void consumeNewline(Context* c) {
  208. if(peekToken(c, TT_INVALID)) {
  209. return;
  210. }
  211. consumeToken(c, TT_NEWLINE);
  212. c->line++;
  213. }
  214. static void compileReadVariable(Context* c, Token token, bool forceGlobal) {
  215. i32 index = addVariable(c, token.stringValue, forceGlobal);
  216. codePushInstructionI32(c, READ_VARIABLE, index);
  217. }
  218. static void compileCallFunction(Context* c, Token t);
  219. static void compileConstant(Context* c) {
  220. Token token = tokenizerNext(c->tokenizer);
  221. if(token.type == TT_STRING) {
  222. codePushInstruction(c, PUSH_CONSTANT_STRING);
  223. codePushConstantString(c, token.stringValue);
  224. } else if(token.type == TT_INT32) {
  225. codePushInstructionI32(c, PUSH_INT32, token.intValue);
  226. } else if(token.type == TT_LITERAL) {
  227. if(peekToken(c, TT_OPEN_ROUND_BRACKET)) {
  228. compileCallFunction(c, token);
  229. } else {
  230. compileReadVariable(c, token, false);
  231. }
  232. } else if(token.type == TT_DOLLAR) {
  233. token = consumeToken(c, TT_LITERAL);
  234. compileReadVariable(c, token, true);
  235. } else {
  236. unexpectedToken(c, token, __LINE__);
  237. }
  238. }
  239. static void compileUnary(Context* c) {
  240. if(consumeTokenIf(c, TT_NOT)) {
  241. compileUnary(c);
  242. codePushInstruction(c, NOT);
  243. } else {
  244. compileConstant(c);
  245. }
  246. }
  247. static void compileMul(Context* c) {
  248. compileUnary(c);
  249. while(true) {
  250. if(consumeTokenIf(c, TT_MULTIPLY)) {
  251. compileUnary(c);
  252. codePushInstruction(c, MUL);
  253. } else if(consumeTokenIf(c, TT_DIVIDE)) {
  254. compileUnary(c);
  255. codePushInstruction(c, DIV);
  256. } else {
  257. break;
  258. }
  259. }
  260. }
  261. static void compileAdd(Context* c) {
  262. compileMul(c);
  263. while(true) {
  264. if(consumeTokenIf(c, TT_ADD)) {
  265. compileMul(c);
  266. codePushInstruction(c, ADD);
  267. } else if(consumeTokenIf(c, TT_SUBTRACT)) {
  268. compileMul(c);
  269. codePushInstruction(c, SUB);
  270. } else {
  271. break;
  272. }
  273. }
  274. }
  275. static void compileCompare(Context* c) {
  276. compileAdd(c);
  277. while(true) {
  278. if(consumeTokenIf(c, TT_EQUAL)) {
  279. compileAdd(c);
  280. codePushInstruction(c, EQUAL);
  281. } else if(consumeTokenIf(c, TT_NOT_EQUAL)) {
  282. compileAdd(c);
  283. codePushInstruction(c, NOT_EQUAL);
  284. } else if(consumeTokenIf(c, TT_GREATER)) {
  285. compileAdd(c);
  286. codePushInstruction(c, GREATER);
  287. } else if(consumeTokenIf(c, TT_SMALLER)) {
  288. compileAdd(c);
  289. codePushInstruction(c, SMALLER);
  290. } else if(consumeTokenIf(c, TT_GREATER_OR_EQUAL)) {
  291. compileAdd(c);
  292. codePushInstruction(c, GREATER_OR_EQUAL);
  293. } else if(consumeTokenIf(c, TT_SMALLER_OR_EQUAL)) {
  294. compileAdd(c);
  295. codePushInstruction(c, SMALLER_OR_EQUAL);
  296. } else {
  297. break;
  298. }
  299. }
  300. }
  301. static void compileLogical(Context* c) {
  302. compileCompare(c);
  303. while(true) {
  304. if(consumeTokenIf(c, TT_AND)) {
  305. compileCompare(c);
  306. codePushInstruction(c, AND);
  307. } else if(consumeTokenIf(c, TT_OR)) {
  308. compileCompare(c);
  309. codePushInstruction(c, OR);
  310. } else {
  311. break;
  312. }
  313. }
  314. }
  315. static void compileExpression(Context* c) {
  316. compileLogical(c);
  317. }
  318. static void compileLine(Context* c, Token token);
  319. static void compileIf(Context* c) {
  320. compileExpression(c);
  321. size_t posIndex = codePushInstructionI32(c, JUMP_ON_0, 0);
  322. consumeNewline(c);
  323. while(!peekToken(c, TT_END)) {
  324. compileLine(c, tokenizerNext(c->tokenizer));
  325. }
  326. consumeToken(c, TT_END);
  327. consumeNewline(c);
  328. codeRewriteI32(c, posIndex, (i32)codeGetWritePosition(c));
  329. }
  330. static void compileSetVariable(Context* c, const char* name, bool forceGlobal) {
  331. consumeToken(c, TT_ASSIGN);
  332. compileExpression(c);
  333. codePushInstructionI32(c, SET_VARIABLE, addVariable(c, name, forceGlobal));
  334. }
  335. static i32 compileFunctionArguments(Context* c) {
  336. consumeToken(c, TT_OPEN_ROUND_BRACKET);
  337. i32 vars = 0;
  338. while(!peekToken(c, TT_CLOSE_ROUND_BRACKET)) {
  339. if(vars > 0) {
  340. consumeToken(c, TT_COMMA);
  341. }
  342. Token varToken = consumeToken(c, TT_LITERAL);
  343. addVariable(c, varToken.stringValue, false);
  344. vars++;
  345. }
  346. consumeToken(c, TT_CLOSE_ROUND_BRACKET);
  347. return vars;
  348. }
  349. static void rewriteReturns(Context* c, i32 address) {
  350. for(Return* r = c->returns; r != nullptr; r = r->next) {
  351. codeRewriteI32(c, (size_t)r->address, address);
  352. }
  353. cleanReturns(c);
  354. }
  355. static void compileFunction(Context* c) {
  356. if(c->inFunction) {
  357. THROW_ERROR("Functions in functions are not allowed");
  358. }
  359. c->inFunction = true;
  360. size_t functionEnd = codePushInstructionI32(c, JUMP, 0);
  361. i32 functionStart = (i32)codeGetWritePosition(c);
  362. Token t = consumeToken(c, TT_LITERAL);
  363. size_t stackVarsLoc = codePushInstructionI32(c, PUSH_STACK_VARIABLES, 0);
  364. Variable* resetVar = c->variables;
  365. i32 vars = compileFunctionArguments(c);
  366. addFunction(c, t.stringValue, functionStart, vars);
  367. while(!peekToken(c, TT_END)) {
  368. compileLine(c, tokenizerNext(c->tokenizer));
  369. }
  370. consumeToken(c, TT_END);
  371. i32 popVars = hasLocalVar(c) ? -c->variables->index : 0;
  372. codePushInstructionI32(c, PUSH_INT32, 0);
  373. codePushInstructionI32(c, RETURN_VALUE, popVars);
  374. rewriteReturns(c, popVars);
  375. codeRewriteI32(c, stackVarsLoc, popVars - vars);
  376. while(c->variables != resetVar) {
  377. Variable* next = c->variables->next;
  378. memoryFree(c->variables);
  379. c->variables = next;
  380. }
  381. codeRewriteI32(c, functionEnd, (i32)codeGetWritePosition(c));
  382. c->inFunction = false;
  383. }
  384. static void compileCallFunction(Context* c, Token t) {
  385. // used to store return address and variable index
  386. codePushInstructionI32(c, PUSH_INT32, 0);
  387. codePushInstructionI32(c, PUSH_INT32, 0);
  388. consumeToken(c, TT_OPEN_ROUND_BRACKET);
  389. i32 offset = 0;
  390. while(!peekToken(c, TT_CLOSE_ROUND_BRACKET)) {
  391. if(offset > 0) {
  392. consumeToken(c, TT_COMMA);
  393. }
  394. offset++;
  395. compileExpression(c);
  396. }
  397. consumeToken(c, TT_CLOSE_ROUND_BRACKET);
  398. codePushInstruction(c, JUMP_SUB);
  399. size_t pos = codeGetWritePosition(c);
  400. codePushI32(c, addCallFunction(c, t.stringValue, (i32)pos, offset));
  401. codePushI32(c, offset);
  402. }
  403. static void compileReturn(Context* c) {
  404. if(peekToken(c, TT_NEWLINE)) {
  405. consumeNewline(c);
  406. codePushInstructionI32(c, PUSH_INT32, 0);
  407. } else {
  408. compileExpression(c);
  409. }
  410. i32 address = (i32)codePushInstructionI32(c, RETURN_VALUE, 0);
  411. addReturn(c, address, true);
  412. }
  413. static void compileLineLiteral(Context* c, Token token) {
  414. const char* s = token.stringValue;
  415. if(strcmp(s, "print") == 0) {
  416. while(!peekToken(c, TT_NEWLINE)) {
  417. compileExpression(c);
  418. codePushInstruction(c, PRINT);
  419. }
  420. consumeNewline(c);
  421. codePushInstruction(c, PRINT_NEWLINE);
  422. } else if(peekToken(c, TT_ASSIGN)) {
  423. compileSetVariable(c, s, false);
  424. } else if(peekToken(c, TT_OPEN_ROUND_BRACKET)) {
  425. compileCallFunction(c, token);
  426. } else {
  427. THROW_ERROR("Unexpected literal(%s)", s);
  428. }
  429. }
  430. static void compileLine(Context* c, Token token) {
  431. if(token.type == TT_NEWLINE) {
  432. c->line++;
  433. } else if(token.type == TT_IF) {
  434. compileIf(c);
  435. } else if(token.type == TT_FUNCTION) {
  436. compileFunction(c);
  437. } else if(token.type == TT_RETURN) {
  438. compileReturn(c);
  439. } else if(token.type == TT_DOLLAR) {
  440. token = consumeToken(c, TT_LITERAL);
  441. compileSetVariable(c, token.stringValue, true);
  442. } else if(token.type != TT_LITERAL) {
  443. unexpectedToken(c, token, __LINE__);
  444. } else {
  445. compileLineLiteral(c, token);
  446. }
  447. }
  448. static void parseTokens(Context* c) {
  449. codeReset(c->code);
  450. size_t stackVarsLoc = codePushInstructionI32(c, PUSH_STACK_VARIABLES, 0);
  451. while(true) {
  452. Token token = tokenizerNext(c->tokenizer);
  453. if(token.type == TT_INVALID) {
  454. break;
  455. }
  456. compileLine(c, token);
  457. }
  458. if(c->variables != nullptr) {
  459. codeRewriteI32(c, stackVarsLoc, c->variables->index + 1);
  460. }
  461. checkForInvalidFunctions(c);
  462. }
  463. Error compileFile(Tokenizer* t, Code* code) {
  464. Context c = {.tokenizer = t, .code = code, .line = 1};
  465. if(!setjmp(c.jump)) {
  466. parseTokens(&c);
  467. }
  468. cleanContext(&c);
  469. return c.error;
  470. }