FileTokens.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. #include <errno.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer/FileTokens.h"
  6. #include "utils/SnuviUtils.h"
  7. static Error* error;
  8. typedef char String[64];
  9. #define STRING_LENGTH ((int)sizeof(String))
  10. typedef struct {
  11. String name;
  12. FileTokens tokens;
  13. } Define;
  14. #define DEFINES 256
  15. static Define defines[DEFINES];
  16. static int defineIndex = 0;
  17. static int ftGetLine(const FileTokens* ft, int index) {
  18. int fileLayer = 0;
  19. int lines = 1;
  20. for(int i = index; i >= 0; i--) {
  21. FileToken* t = ft->tokens + i;
  22. if(t->type == FT_PATH) {
  23. if(fileLayer == 0) {
  24. return lines;
  25. }
  26. fileLayer--;
  27. } else if(t->type == FT_END_PATH) {
  28. fileLayer++;
  29. } else if(t->type == FT_NEWLINE) {
  30. lines += fileLayer == 0;
  31. }
  32. }
  33. return 0;
  34. }
  35. static const char* ftGetPath(const FileTokens* ft, int index) {
  36. int fileLayer = 0;
  37. for(int i = index; i >= 0; i--) {
  38. FileToken* t = ft->tokens + i;
  39. if(t->type == FT_PATH) {
  40. if(fileLayer == 0) {
  41. return t->literal;
  42. }
  43. fileLayer--;
  44. } else if(t->type == FT_END_PATH) {
  45. fileLayer++;
  46. }
  47. }
  48. return "unknown path";
  49. }
  50. static void ftError(const FileTokens* ft, int index, const char* format, ...) {
  51. va_list args;
  52. va_start(args, format);
  53. eInitErrorV(error, "", ftGetLine(ft, index), format, args);
  54. int fileLayer = 0;
  55. for(int i = index; i >= 0; i--) {
  56. FileToken* t = ft->tokens + i;
  57. if(t->type == FT_PATH) {
  58. if(fileLayer == 0) {
  59. eAddPath(error, t->literal);
  60. } else {
  61. fileLayer--;
  62. }
  63. } else if(t->type == FT_END_PATH) {
  64. fileLayer++;
  65. }
  66. }
  67. va_end(args);
  68. }
  69. static void ftSystemError(const char* msg, const FileTokens* parent,
  70. const FileTokens* ft, int index) {
  71. ftError(parent, index, "%s '%s': %s", msg, ftGetPath(ft, 0),
  72. strerror(errno));
  73. }
  74. static void ftToSpace(FileToken* t) {
  75. free(t->literal);
  76. if(t->type != FT_NEWLINE) {
  77. t->type = FT_SPACE;
  78. }
  79. t->literal = NULL;
  80. t->single = 0;
  81. }
  82. static FileToken* ftAddToken(FileTokens* ft, FileTokenType type) {
  83. if(ft->length >= ft->capacity) {
  84. ft->capacity = (ft->capacity * 5) / 4 + 8;
  85. ft->tokens = realloc(ft->tokens, ft->capacity * sizeof(FileToken));
  86. }
  87. FileToken* t = ft->tokens + ft->length++;
  88. t->type = type;
  89. t->literal = NULL;
  90. t->single = 0;
  91. return t;
  92. }
  93. static FileToken* ftAddCopyToken(FileTokens* ft, const FileToken* t) {
  94. FileToken* added = ftAddToken(ft, t->type);
  95. added->literal = t->literal == NULL ? NULL : strdup(t->literal);
  96. added->single = t->single;
  97. return added;
  98. }
  99. static void ftAddPath(FileTokens* ft, const char* path) {
  100. ftAddToken(ft, FT_PATH)->literal = strdup(path);
  101. }
  102. static void ftAddEndPath(FileTokens* ft) {
  103. ftAddToken(ft, FT_END_PATH);
  104. }
  105. static void ftAddNewline(FileTokens* ft) {
  106. ftAddToken(ft, FT_NEWLINE);
  107. }
  108. static void ftAddLiteral(FileTokens* ft, const char* path, int length) {
  109. ftAddToken(ft, FT_LITERAL)->literal = strndup(path, length);
  110. }
  111. static void ftAddSingle(FileTokens* ft, int single) {
  112. ftAddToken(ft, FT_SINGLE)->single = single;
  113. }
  114. static void ftAddSpace(FileTokens* ft) {
  115. ftAddToken(ft, FT_SPACE);
  116. }
  117. void ftPrint(FileTokens* ft) {
  118. for(int i = 0; i < ft->length; i++) {
  119. switch(ft->tokens[i].type) {
  120. case FT_PATH:
  121. printf("\033[0;34mPath: %s\n", ft->tokens[i].literal);
  122. break;
  123. case FT_END_PATH: puts("\033[0;34mEnd Path"); break;
  124. case FT_NEWLINE: putchar('\n'); break;
  125. case FT_LITERAL:
  126. printf("\033[0;33m%s", ft->tokens[i].literal);
  127. break;
  128. case FT_SINGLE:
  129. printf("\033[0;31m%c", (char)ft->tokens[i].single);
  130. break;
  131. case FT_SPACE: putchar(' '); break;
  132. }
  133. }
  134. printf("\033[0m");
  135. }
  136. static bool ftHasError() {
  137. return eHasError(error);
  138. }
  139. static bool ftStringCompare(const char* a, const char* b) {
  140. return strcmp(a, b) == 0;
  141. }
  142. static void ftInitFileTokens(FileTokens* ft) {
  143. ft->tokens = NULL;
  144. ft->capacity = 0;
  145. ft->length = 0;
  146. }
  147. static void ftInitFileTokensWithPath(FileTokens* ft, const char* path) {
  148. ftInitFileTokens(ft);
  149. ftAddPath(ft, path);
  150. }
  151. static void ftTokenize(FileTokens* ft, unsigned char* data) {
  152. int index = 0;
  153. int oldIndex = 0;
  154. bool inString = false;
  155. while(true) {
  156. unsigned char c = data[index];
  157. if(isAllowedInName(c) || (inString && c != '"' && c != '\0')) {
  158. index++;
  159. continue;
  160. }
  161. if(oldIndex < index) {
  162. ftAddLiteral(ft, (const char*)(data + oldIndex), index - oldIndex);
  163. }
  164. if(c == '"') {
  165. inString = !inString;
  166. }
  167. if(c == '\0') {
  168. break;
  169. } else if(c == '\n') {
  170. ftAddNewline(ft);
  171. } else if(c == ' ') {
  172. ftAddSpace(ft);
  173. } else if(c == '/' && data[index + 1] == '/') {
  174. while(data[index] != '\0' && data[index] != '\n') {
  175. index++;
  176. }
  177. ftAddNewline(ft);
  178. } else if(c == '/' && data[index + 1] == '*') {
  179. int startIndex = ft->length - 1;
  180. index += 2;
  181. while(true) {
  182. if(data[index] == '\0') {
  183. ftError(ft, startIndex, "test");
  184. return;
  185. } else if(data[index] == '\n') {
  186. ftAddNewline(ft);
  187. } else if(data[index] == '*' && data[index + 1] == '/') {
  188. index++;
  189. break;
  190. }
  191. index++;
  192. }
  193. } else {
  194. ftAddSingle(ft, c);
  195. }
  196. index++;
  197. oldIndex = index;
  198. }
  199. }
  200. static void ftReadFull(FILE* file, FileTokens* parent, FileTokens* ft,
  201. int index) {
  202. if(fseek(file, 0, SEEK_END)) {
  203. ftSystemError("cannot seek end of file", parent, ft, index);
  204. return;
  205. }
  206. long length = ftell(file);
  207. if(length < 0) {
  208. ftSystemError("cannot tell end of file", parent, ft, index);
  209. return;
  210. }
  211. rewind(file);
  212. unsigned char* data = malloc(length + 1);
  213. int readValues = fread(data, 1, length, file);
  214. if(readValues != length) {
  215. ftSystemError("cannot read file", parent, ft, index);
  216. free(data);
  217. return;
  218. }
  219. data[length] = '\0';
  220. ftTokenize(ft, data);
  221. ftAddEndPath(ft);
  222. free(data);
  223. }
  224. static void ftReadFullFile(FileTokens* parent, FileTokens* ft, int index) {
  225. FILE* file = fopen(ftGetPath(ft, 0), "r");
  226. if(file == NULL) {
  227. ftSystemError("cannot open file", parent, ft, index);
  228. return;
  229. }
  230. ftReadFull(file, parent, ft, index);
  231. if(fclose(file) < 0) {
  232. ftSystemError("cannot close file", parent, ft, index);
  233. }
  234. }
  235. static void ftReplace(int from, int toEx, FileTokens* ft,
  236. const FileTokens* replacement) {
  237. int newCapacity = ft->capacity + replacement->capacity;
  238. FileToken* newTokens = malloc(newCapacity * sizeof(FileToken));
  239. // these will be overwritten
  240. for(int i = from; i < toEx; i++) {
  241. free(ft->tokens[i].literal);
  242. ft->tokens[i].literal = NULL;
  243. }
  244. memcpy(newTokens, ft->tokens, from * sizeof(FileToken));
  245. for(int i = 0; i < replacement->length; i++) {
  246. FileToken* src = replacement->tokens + i;
  247. FileToken* dst = newTokens + from + i;
  248. dst->literal = src->literal == NULL ? NULL : strdup(src->literal);
  249. dst->single = src->single;
  250. dst->type = src->type;
  251. }
  252. memcpy(newTokens + from + replacement->length, ft->tokens + toEx,
  253. (ft->length - toEx) * sizeof(FileToken));
  254. free(ft->tokens);
  255. ft->tokens = newTokens;
  256. ft->capacity = newCapacity;
  257. ft->length = from + replacement->length + ft->length - toEx;
  258. }
  259. static void ftSkipSpaces(int* index, FileTokens* ft) {
  260. while(*index < ft->length) {
  261. FileToken* t = ft->tokens + *index;
  262. if(t->type != FT_SPACE) {
  263. return;
  264. }
  265. (*index)++;
  266. }
  267. }
  268. const char* INCLUDE_ERROR = "include path must be a string";
  269. static bool ftIncludeDoubleQuote(int* index, FileTokens* ft) {
  270. if(*index >= ft->length || ft->tokens[*index].type != FT_SINGLE ||
  271. ft->tokens[*index].single != '"') {
  272. ftError(ft, *index - 1, INCLUDE_ERROR);
  273. return true;
  274. }
  275. (*index)++;
  276. return false;
  277. }
  278. static void ftHandleInclude(int* index, int start, FileTokens* ft) {
  279. ftSkipSpaces(index, ft);
  280. if(ftIncludeDoubleQuote(index, ft)) {
  281. return;
  282. }
  283. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  284. ftError(ft, *index - 1, INCLUDE_ERROR);
  285. return;
  286. }
  287. const char* path = ft->tokens[(*index)++].literal;
  288. if(ftIncludeDoubleQuote(index, ft)) {
  289. return;
  290. }
  291. const char* parentPath = ftGetPath(ft, *index - 1);
  292. int afterLastSlash = 0;
  293. for(int i = 0; parentPath[i] != '\0'; i++) {
  294. afterLastSlash = parentPath[i] == '/' ? i + 1 : afterLastSlash;
  295. }
  296. int pathLength = strlen(path) + 1;
  297. char* fullPath = malloc(afterLastSlash + pathLength);
  298. memcpy(fullPath, parentPath, afterLastSlash);
  299. memcpy(fullPath + afterLastSlash, path, pathLength);
  300. FileTokens include;
  301. ftInitFileTokensWithPath(&include, fullPath);
  302. free(fullPath);
  303. ftReadFullFile(ft, &include, *index - 1);
  304. if(!ftHasError()) {
  305. ftReplace(start, *index, ft, &include);
  306. *index = start;
  307. }
  308. ftDelete(&include);
  309. }
  310. static int ftGetDefineIndex(const char* s) {
  311. for(int i = 0; i < defineIndex; i++) {
  312. if(ftStringCompare(defines[i].name, s)) {
  313. return i;
  314. }
  315. }
  316. return -1;
  317. }
  318. static bool ftIsDefined(const char* s) {
  319. return ftGetDefineIndex(s) != -1;
  320. }
  321. static int ftFindEndIf(int from, FileTokens* ft) {
  322. int layer = 0;
  323. for(int i = from; i < ft->length - 1; i++) {
  324. if(ft->tokens[i].type != FT_SINGLE || ft->tokens[i].single != '#') {
  325. continue;
  326. } else if(ft->tokens[i + 1].type != FT_LITERAL) {
  327. continue;
  328. }
  329. if(ftStringCompare(ft->tokens[i + 1].literal, "ifndef")) {
  330. layer++;
  331. } else if(ftStringCompare(ft->tokens[i + 1].literal, "endif")) {
  332. if(layer == 0) {
  333. return i;
  334. }
  335. layer--;
  336. }
  337. }
  338. return -1;
  339. }
  340. static void ftHandleIfNotDefined(int* index, int start, FileTokens* ft) {
  341. ftSkipSpaces(index, ft);
  342. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  343. ftError(ft, start, "ifndef expects a literal");
  344. return;
  345. }
  346. const char* check = ft->tokens[(*index)++].literal;
  347. int endIf = ftFindEndIf(*index, ft);
  348. if(endIf == -1) {
  349. ftError(ft, start, "cannot find #endif");
  350. return;
  351. }
  352. if(ftIsDefined(check)) {
  353. for(int i = start; i < endIf + 2; i++) {
  354. ftToSpace(ft->tokens + i);
  355. }
  356. } else {
  357. ftToSpace(ft->tokens + start); // #
  358. ftToSpace(ft->tokens + start + 1); // ifndef
  359. ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
  360. ftToSpace(ft->tokens + endIf); // #
  361. ftToSpace(ft->tokens + endIf + 1); // endif
  362. }
  363. }
  364. static void ftHandleDefine(int* index, int start, FileTokens* ft) {
  365. if(defineIndex >= DEFINES) {
  366. ftError(ft, *index - 1, "too much defines");
  367. return;
  368. }
  369. ftSkipSpaces(index, ft);
  370. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  371. ftError(ft, *index - 1, "define expects a literal");
  372. return;
  373. }
  374. const char* defineName = ft->tokens[(*index)++].literal;
  375. ftSkipSpaces(index, ft);
  376. int defineStart = *index;
  377. bool skip = false;
  378. while(*index < ft->length) {
  379. FileToken* t = ft->tokens + *index;
  380. if(!skip && t->type == FT_SINGLE && t->single == '\\') {
  381. skip = true;
  382. ftToSpace(t);
  383. } else if(t->type == FT_NEWLINE) {
  384. if(skip) {
  385. skip = false;
  386. } else {
  387. break;
  388. }
  389. } else if(skip) {
  390. ftError(ft, *index - 1, "expected newline after \\");
  391. return;
  392. }
  393. (*index)++;
  394. }
  395. int foundIndex = ftGetDefineIndex(defineName);
  396. if(foundIndex != -1) {
  397. ftError(ft, *index - 1, "'%s' redefined", defineName);
  398. return;
  399. }
  400. Define* define = NULL;
  401. if(foundIndex == -1) {
  402. define = defines + defineIndex++;
  403. strncpy(define->name, defineName, STRING_LENGTH - 1);
  404. define->name[STRING_LENGTH - 1] = '\0';
  405. } else {
  406. define = defines + foundIndex;
  407. ftDelete(&define->tokens);
  408. }
  409. ftInitFileTokens(&define->tokens);
  410. for(int i = defineStart; i < *index; i++) {
  411. ftAddCopyToken(&define->tokens, ft->tokens + i);
  412. if(ft->tokens[i].type == FT_NEWLINE) {
  413. FileToken* t = define->tokens.tokens + (define->tokens.length - 1);
  414. ftToSpace(t);
  415. t->type = FT_SPACE;
  416. }
  417. }
  418. for(int i = start; i < *index; i++) {
  419. ftToSpace(ft->tokens + i);
  420. }
  421. }
  422. static void ftHandleUndefine(int* index, int start, FileTokens* ft) {
  423. ftSkipSpaces(index, ft);
  424. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  425. ftError(ft, *index - 1, "undefine expects a literal");
  426. return;
  427. }
  428. const char* defineName = ft->tokens[(*index)++].literal;
  429. int foundIndex = ftGetDefineIndex(defineName);
  430. if(foundIndex != -1) {
  431. Define* define = defines + foundIndex;
  432. ftDelete(&define->tokens);
  433. defineIndex--;
  434. if(defineIndex != foundIndex) {
  435. memcpy(define, defines + defineIndex, sizeof(Define));
  436. }
  437. }
  438. for(int i = start; i < *index; i++) {
  439. ftToSpace(ft->tokens + i);
  440. }
  441. }
  442. static void ftParseInstruction(int* index, FileTokens* ft) {
  443. int start = *index;
  444. (*index)++;
  445. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  446. ftError(ft, *index - 1, "expected literal after #");
  447. return;
  448. }
  449. const char* name = ft->tokens[(*index)++].literal;
  450. if(ftStringCompare(name, "include")) {
  451. ftHandleInclude(index, start, ft);
  452. } else if(ftStringCompare(name, "ifndef")) {
  453. ftHandleIfNotDefined(index, start, ft);
  454. } else if(ftStringCompare(name, "define")) {
  455. ftHandleDefine(index, start, ft);
  456. } else if(ftStringCompare(name, "undef")) {
  457. ftHandleUndefine(index, start, ft);
  458. } else if(ftStringCompare(name, "endif")) {
  459. ftError(ft, *index - 1, "endif without if");
  460. } else {
  461. ftError(ft, *index - 1, "unknown preprocessor literal '%s'", name);
  462. }
  463. }
  464. static bool ftReplaceMakros(FileTokens* ft, int index) {
  465. FileToken* t = ft->tokens + index;
  466. int foundIndex = ftGetDefineIndex(t->literal);
  467. if(foundIndex != -1) {
  468. ftReplace(index, index + 1, ft, &(defines[foundIndex].tokens));
  469. return true;
  470. }
  471. return false;
  472. }
  473. static void ftSearchInstruction(FileTokens* ft) {
  474. for(int i = 0; i < ft->length && !ftHasError(); i++) {
  475. if(ft->tokens[i].type == FT_SINGLE && ft->tokens[i].single == '#') {
  476. ftParseInstruction(&i, ft);
  477. } else if(ft->tokens[i].type == FT_LITERAL) {
  478. if(ftReplaceMakros(ft, i)) {
  479. i--;
  480. }
  481. }
  482. }
  483. }
  484. void ftInit(const char* path, FileTokens* ft, Error* e) {
  485. defineIndex = 0;
  486. error = e;
  487. eInitSuccess(error);
  488. ftInitFileTokensWithPath(ft, path);
  489. ftReadFullFile(ft, ft, 0);
  490. if(!ftHasError()) {
  491. ftSearchInstruction(ft);
  492. }
  493. for(int i = 0; i < defineIndex; i++) {
  494. ftDelete(&(defines[i].tokens));
  495. }
  496. }
  497. void ftDelete(FileTokens* ft) {
  498. for(int i = 0; i < ft->length; i++) {
  499. free(ft->tokens[i].literal);
  500. }
  501. free(ft->tokens);
  502. ft->tokens = NULL;
  503. ft->capacity = 0;
  504. ft->length = 0;
  505. }