Compiler.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. #include "Compiler.h"
  2. #include <setjmp.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "Code.h"
  7. #include "Constants.h"
  8. #include "Memory.h"
  9. #include "SystemFunctions.h"
  10. typedef struct Variable Variable;
  11. struct Variable {
  12. Variable* next;
  13. i32 index;
  14. char name[];
  15. };
  16. typedef struct Function Function;
  17. struct Function {
  18. Function* next;
  19. i32 address;
  20. i32 valueAddress;
  21. i32 arguments;
  22. char name[];
  23. };
  24. typedef struct Return Return;
  25. struct Return {
  26. Return* next;
  27. i32 address;
  28. bool hasArgument;
  29. };
  30. typedef struct {
  31. Tokenizer* tokenizer;
  32. Code* code;
  33. Error error;
  34. int line;
  35. jmp_buf jump;
  36. Variable* variables;
  37. Function* functions;
  38. Return* returns;
  39. bool inFunction;
  40. } Context;
  41. #define THROW_ERROR(format, ...) \
  42. snprintf( \
  43. c->error.text, sizeof(c->error.text), "Line %d | " format, \
  44. c->line __VA_OPT__(, ) __VA_ARGS__); \
  45. longjmp(c->jump, 1)
  46. [[noreturn]] static void throwOutOfCodeMemory(Context* c) {
  47. THROW_ERROR("Too less memory for code");
  48. }
  49. static void codePushInstruction(Context* c, Instruction i) {
  50. if(bufferWriteU8(&c->code->code, i)) {
  51. throwOutOfCodeMemory(c);
  52. }
  53. }
  54. static void codePushI32(Context* c, i32 i) {
  55. if(bufferWriteI32(&c->code->code, i)) {
  56. throwOutOfCodeMemory(c);
  57. }
  58. }
  59. static void codePushConstantString(Context* c, const char* s) {
  60. if(bufferWriteString(&c->code->code, s)) {
  61. throwOutOfCodeMemory(c);
  62. }
  63. }
  64. static size_t codeGetWritePosition(const Context* c) {
  65. return c->code->code.writeIndex;
  66. }
  67. static size_t codePushInstructionI32(Context* c, Instruction i, i32 v) {
  68. codePushInstruction(c, i);
  69. size_t pos = codeGetWritePosition(c);
  70. codePushI32(c, v);
  71. return pos;
  72. }
  73. static void codeRewriteI32(Context* c, size_t pos, i32 i) {
  74. size_t oldPos = codeGetWritePosition(c);
  75. c->code->code.writeIndex = pos;
  76. codePushI32(c, i);
  77. c->code->code.writeIndex = oldPos;
  78. }
  79. static bool hasLocalVar(const Context* c) {
  80. return c->variables != nullptr && c->variables->index < 0;
  81. }
  82. static i32 addVariable(Context* c, const char* name, bool forceGlobal) {
  83. for(Variable* v = c->variables; v != nullptr; v = v->next) {
  84. if(!forceGlobal && c->inFunction && v->index >= 0) {
  85. break;
  86. }
  87. if(strcmp(v->name, name) == 0) {
  88. return v->index;
  89. }
  90. }
  91. size_t l = strlen(name) + 1;
  92. Variable* v = memoryAllocate(sizeof(Variable) + l);
  93. if(v == nullptr) {
  94. THROW_ERROR("Too less memory for variables");
  95. }
  96. v->next = c->variables;
  97. if(c->inFunction) {
  98. v->index = hasLocalVar(c) ? c->variables->index - 1 : -1;
  99. } else {
  100. v->index = c->variables != nullptr ? c->variables->index + 1 : 0;
  101. }
  102. memcpy(v->name, name, l);
  103. c->variables = v;
  104. return v->index;
  105. }
  106. static void addRawFunction(
  107. Context* c, const char* name, i32 address, i32 valueAddress,
  108. i32 arguments) {
  109. size_t l = strlen(name) + 1;
  110. Function* f = memoryAllocate(sizeof(Function) + l);
  111. if(f == nullptr) {
  112. THROW_ERROR("Too less memory for functions");
  113. }
  114. f->next = c->functions;
  115. f->address = address;
  116. f->valueAddress = valueAddress;
  117. f->arguments = arguments;
  118. memcpy(f->name, name, l);
  119. c->functions = f;
  120. }
  121. static void addFunction(
  122. Context* c, const char* name, i32 address, i32 arguments) {
  123. for(Function* f = c->functions; f != nullptr; f = f->next) {
  124. if(strcmp(f->name, name) != 0) {
  125. continue;
  126. } else if(f->address >= 0) {
  127. THROW_ERROR("Function '%s' registered again", name);
  128. } else if(f->arguments != arguments) {
  129. THROW_ERROR("Function '%s' with inconsistent arguments", name);
  130. }
  131. f->address = address;
  132. codeRewriteI32(c, (size_t)f->valueAddress, address);
  133. }
  134. addRawFunction(c, name, address, -1, arguments);
  135. }
  136. static i32 addCallFunction(
  137. Context* c, const char* name, i32 address, i32 arguments) {
  138. for(Function* f = c->functions; f != nullptr; f = f->next) {
  139. if(strcmp(f->name, name) == 0 && f->address >= 0) {
  140. if(f->arguments != arguments) {
  141. THROW_ERROR("Function '%s' with inconsistent arguments", name);
  142. }
  143. return f->address;
  144. }
  145. }
  146. addRawFunction(c, name, -1, address, arguments);
  147. return -1;
  148. }
  149. static void checkForInvalidFunctions(Context* c) {
  150. for(Function* f = c->functions; f != nullptr; f = f->next) {
  151. if(f->address < 0) {
  152. THROW_ERROR("Unmapped function call '%s'", f->name);
  153. }
  154. }
  155. }
  156. static void addReturn(Context* c, i32 address, bool hasArgument) {
  157. if(c->returns != nullptr && c->returns->hasArgument != hasArgument) {
  158. THROW_ERROR("Inconsistent returns in function");
  159. }
  160. Return* r = memoryAllocate(sizeof(Return));
  161. if(r == nullptr) {
  162. THROW_ERROR("Too less memory for returns");
  163. }
  164. r->next = c->returns;
  165. r->hasArgument = hasArgument;
  166. r->address = address;
  167. c->returns = r;
  168. }
  169. #define CLEAN_LIST(Type, field) \
  170. do { \
  171. Type* v = c->field; \
  172. while(v != nullptr) { \
  173. Type* next = v->next; \
  174. memoryFree(v); \
  175. v = next; \
  176. } \
  177. c->field = nullptr; \
  178. } while(false)
  179. static void cleanReturns(Context* c) {
  180. CLEAN_LIST(Return, returns);
  181. }
  182. static void cleanContext(Context* c) {
  183. CLEAN_LIST(Variable, variables);
  184. CLEAN_LIST(Function, functions);
  185. cleanReturns(c);
  186. }
  187. [[noreturn]] static void unexpectedToken(Context* c, Token token, int line) {
  188. char buffer[128];
  189. tokenizerPrintToken(&token, buffer, sizeof(buffer));
  190. THROW_ERROR("Unexpected %s token %d", buffer, line);
  191. }
  192. static Token consumeToken(Context* c, TokenType type) {
  193. Token actual = tokenizerNext(c->tokenizer);
  194. if(actual.type != type) {
  195. unexpectedToken(c, actual, __LINE__);
  196. }
  197. return actual;
  198. }
  199. static bool peekToken(Context* c, TokenType type) {
  200. return tokenizerPeek(c->tokenizer).type == type;
  201. }
  202. static bool consumeTokenIf(Context* c, TokenType type) {
  203. if(peekToken(c, type)) {
  204. consumeToken(c, type);
  205. return true;
  206. }
  207. return false;
  208. }
  209. static void consumeNewline(Context* c) {
  210. if(peekToken(c, TT_INVALID)) {
  211. return;
  212. }
  213. consumeToken(c, TT_NEWLINE);
  214. c->line++;
  215. }
  216. static void compileReadVariable(Context* c, Token token, bool forceGlobal) {
  217. i32 index = addVariable(c, token.stringValue, forceGlobal);
  218. codePushInstructionI32(c, READ_VARIABLE, index);
  219. }
  220. static void compileCallFunction(Context* c, Token t);
  221. static void compileConstant(Context* c) {
  222. Token token = tokenizerNext(c->tokenizer);
  223. if(token.type == TT_STRING) {
  224. codePushInstruction(c, PUSH_CONSTANT_STRING);
  225. codePushConstantString(c, token.stringValue);
  226. } else if(token.type == TT_INT32) {
  227. codePushInstructionI32(c, PUSH_INT32, token.intValue);
  228. } else if(token.type == TT_LITERAL) {
  229. if(peekToken(c, TT_OPEN_ROUND_BRACKET)) {
  230. compileCallFunction(c, token);
  231. } else {
  232. compileReadVariable(c, token, false);
  233. }
  234. } else if(token.type == TT_DOLLAR) {
  235. token = consumeToken(c, TT_LITERAL);
  236. compileReadVariable(c, token, true);
  237. } else {
  238. unexpectedToken(c, token, __LINE__);
  239. }
  240. }
  241. static void compileUnary(Context* c) {
  242. if(consumeTokenIf(c, TT_NOT)) {
  243. compileUnary(c);
  244. codePushInstruction(c, NOT);
  245. } else {
  246. compileConstant(c);
  247. }
  248. }
  249. static void compileMul(Context* c) {
  250. compileUnary(c);
  251. while(true) {
  252. if(consumeTokenIf(c, TT_MULTIPLY)) {
  253. compileUnary(c);
  254. codePushInstruction(c, MUL);
  255. } else if(consumeTokenIf(c, TT_DIVIDE)) {
  256. compileUnary(c);
  257. codePushInstruction(c, DIV);
  258. } else {
  259. break;
  260. }
  261. }
  262. }
  263. static void compileAdd(Context* c) {
  264. compileMul(c);
  265. while(true) {
  266. if(consumeTokenIf(c, TT_ADD)) {
  267. compileMul(c);
  268. codePushInstruction(c, ADD);
  269. } else if(consumeTokenIf(c, TT_SUBTRACT)) {
  270. compileMul(c);
  271. codePushInstruction(c, SUB);
  272. } else {
  273. break;
  274. }
  275. }
  276. }
  277. static void compileCompare(Context* c) {
  278. compileAdd(c);
  279. while(true) {
  280. if(consumeTokenIf(c, TT_EQUAL)) {
  281. compileAdd(c);
  282. codePushInstruction(c, EQUAL);
  283. } else if(consumeTokenIf(c, TT_NOT_EQUAL)) {
  284. compileAdd(c);
  285. codePushInstruction(c, NOT_EQUAL);
  286. } else if(consumeTokenIf(c, TT_GREATER)) {
  287. compileAdd(c);
  288. codePushInstruction(c, GREATER);
  289. } else if(consumeTokenIf(c, TT_SMALLER)) {
  290. compileAdd(c);
  291. codePushInstruction(c, SMALLER);
  292. } else if(consumeTokenIf(c, TT_GREATER_OR_EQUAL)) {
  293. compileAdd(c);
  294. codePushInstruction(c, GREATER_OR_EQUAL);
  295. } else if(consumeTokenIf(c, TT_SMALLER_OR_EQUAL)) {
  296. compileAdd(c);
  297. codePushInstruction(c, SMALLER_OR_EQUAL);
  298. } else {
  299. break;
  300. }
  301. }
  302. }
  303. static void compileLogical(Context* c) {
  304. compileCompare(c);
  305. while(true) {
  306. if(consumeTokenIf(c, TT_AND)) {
  307. compileCompare(c);
  308. codePushInstruction(c, AND);
  309. } else if(consumeTokenIf(c, TT_OR)) {
  310. compileCompare(c);
  311. codePushInstruction(c, OR);
  312. } else {
  313. break;
  314. }
  315. }
  316. }
  317. static void compileExpression(Context* c) {
  318. compileLogical(c);
  319. }
  320. static void compileLine(Context* c, Token token);
  321. static void compileIf(Context* c) {
  322. compileExpression(c);
  323. size_t posIndex = codePushInstructionI32(c, JUMP_ON_0, 0);
  324. consumeNewline(c);
  325. while(!peekToken(c, TT_END)) {
  326. compileLine(c, tokenizerNext(c->tokenizer));
  327. }
  328. consumeToken(c, TT_END);
  329. consumeNewline(c);
  330. codeRewriteI32(c, posIndex, (i32)codeGetWritePosition(c));
  331. }
  332. static void compileSetVariable(Context* c, const char* name, bool forceGlobal) {
  333. consumeToken(c, TT_ASSIGN);
  334. compileExpression(c);
  335. codePushInstructionI32(c, SET_VARIABLE, addVariable(c, name, forceGlobal));
  336. }
  337. static i32 compileFunctionArguments(Context* c) {
  338. consumeToken(c, TT_OPEN_ROUND_BRACKET);
  339. i32 vars = 0;
  340. while(!peekToken(c, TT_CLOSE_ROUND_BRACKET)) {
  341. if(vars > 0) {
  342. consumeToken(c, TT_COMMA);
  343. }
  344. Token varToken = consumeToken(c, TT_LITERAL);
  345. addVariable(c, varToken.stringValue, false);
  346. vars++;
  347. }
  348. consumeToken(c, TT_CLOSE_ROUND_BRACKET);
  349. return vars;
  350. }
  351. static void rewriteReturns(Context* c, i32 address) {
  352. for(Return* r = c->returns; r != nullptr; r = r->next) {
  353. codeRewriteI32(c, (size_t)r->address, address);
  354. }
  355. cleanReturns(c);
  356. }
  357. static void compileFunction(Context* c) {
  358. if(c->inFunction) {
  359. THROW_ERROR("Functions in functions are not allowed");
  360. }
  361. c->inFunction = true;
  362. size_t functionEnd = codePushInstructionI32(c, JUMP, 0);
  363. i32 functionStart = (i32)codeGetWritePosition(c);
  364. Token t = consumeToken(c, TT_LITERAL);
  365. size_t stackVarsLoc = codePushInstructionI32(c, PUSH_STACK_VARIABLES, 0);
  366. Variable* resetVar = c->variables;
  367. i32 vars = compileFunctionArguments(c);
  368. addFunction(c, t.stringValue, functionStart, vars);
  369. while(!peekToken(c, TT_END)) {
  370. compileLine(c, tokenizerNext(c->tokenizer));
  371. }
  372. consumeToken(c, TT_END);
  373. i32 popVars = hasLocalVar(c) ? -c->variables->index : 0;
  374. codePushInstructionI32(c, PUSH_INT32, 0);
  375. codePushInstructionI32(c, RETURN_VALUE, popVars);
  376. rewriteReturns(c, popVars);
  377. codeRewriteI32(c, stackVarsLoc, popVars - vars);
  378. while(c->variables != resetVar) {
  379. Variable* next = c->variables->next;
  380. memoryFree(c->variables);
  381. c->variables = next;
  382. }
  383. codeRewriteI32(c, functionEnd, (i32)codeGetWritePosition(c));
  384. c->inFunction = false;
  385. }
  386. static i32 compileCallFunctionArguments(Context* c) {
  387. consumeToken(c, TT_OPEN_ROUND_BRACKET);
  388. i32 offset = 0;
  389. while(!peekToken(c, TT_CLOSE_ROUND_BRACKET)) {
  390. if(offset > 0) {
  391. consumeToken(c, TT_COMMA);
  392. }
  393. offset++;
  394. compileExpression(c);
  395. }
  396. consumeToken(c, TT_CLOSE_ROUND_BRACKET);
  397. return offset;
  398. }
  399. static void compileCallFunction(Context* c, Token t) {
  400. i32 index = getSystemFunctionIndex(t.stringValue);
  401. if(index >= 0) {
  402. i32 args = compileCallFunctionArguments(c);
  403. i32 wantedArgs = getSystemFunctionArguments(index);
  404. if(wantedArgs >= 0 && args != wantedArgs) {
  405. THROW_ERROR(
  406. "System function '%s' called with invalid amount of arguments",
  407. t.stringValue);
  408. }
  409. static_assert(MAX_SYSTEM_FUNCTION_ARGUMENTS < (1 << 8));
  410. codePushInstructionI32(c, CALL_SYSTEM, (index << 8) | args);
  411. } else {
  412. // used to store return address and variable index
  413. codePushInstructionI32(c, PUSH_INT32, 0);
  414. codePushInstructionI32(c, PUSH_INT32, 0);
  415. i32 offset = compileCallFunctionArguments(c);
  416. codePushInstruction(c, JUMP_SUB);
  417. size_t pos = codeGetWritePosition(c);
  418. codePushI32(c, addCallFunction(c, t.stringValue, (i32)pos, offset));
  419. codePushI32(c, offset);
  420. }
  421. }
  422. static void compileReturn(Context* c) {
  423. if(peekToken(c, TT_NEWLINE)) {
  424. consumeNewline(c);
  425. codePushInstructionI32(c, PUSH_INT32, 0);
  426. } else {
  427. compileExpression(c);
  428. }
  429. i32 address = (i32)codePushInstructionI32(c, RETURN_VALUE, 0);
  430. addReturn(c, address, true);
  431. }
  432. static void compileLineLiteral(Context* c, Token token) {
  433. const char* s = token.stringValue;
  434. if(peekToken(c, TT_ASSIGN)) {
  435. compileSetVariable(c, s, false);
  436. } else if(peekToken(c, TT_OPEN_ROUND_BRACKET)) {
  437. compileCallFunction(c, token);
  438. codePushInstruction(c, POP);
  439. } else {
  440. THROW_ERROR("Unexpected literal(%s)", s);
  441. }
  442. }
  443. static void compileLine(Context* c, Token token) {
  444. if(token.type == TT_NEWLINE) {
  445. c->line++;
  446. } else if(token.type == TT_IF) {
  447. compileIf(c);
  448. } else if(token.type == TT_FUNCTION) {
  449. compileFunction(c);
  450. } else if(token.type == TT_RETURN) {
  451. compileReturn(c);
  452. } else if(token.type == TT_DOLLAR) {
  453. token = consumeToken(c, TT_LITERAL);
  454. compileSetVariable(c, token.stringValue, true);
  455. } else if(token.type != TT_LITERAL) {
  456. unexpectedToken(c, token, __LINE__);
  457. } else {
  458. compileLineLiteral(c, token);
  459. }
  460. }
  461. static void parseTokens(Context* c) {
  462. codeReset(c->code);
  463. size_t stackVarsLoc = codePushInstructionI32(c, PUSH_STACK_VARIABLES, 0);
  464. while(true) {
  465. Token token = tokenizerNext(c->tokenizer);
  466. if(token.type == TT_INVALID) {
  467. break;
  468. }
  469. compileLine(c, token);
  470. }
  471. if(c->variables != nullptr) {
  472. codeRewriteI32(c, stackVarsLoc, c->variables->index + 1);
  473. }
  474. checkForInvalidFunctions(c);
  475. }
  476. Error compileFile(Tokenizer* t, Code* code) {
  477. Context c = {.tokenizer = t, .code = code, .line = 1};
  478. if(!setjmp(c.jump)) {
  479. parseTokens(&c);
  480. }
  481. cleanContext(&c);
  482. return c.error;
  483. }