FileTokens.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. #include <errno.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer/FileTokens.h"
  6. #include "utils/SnuviUtils.h"
  7. static Error* error = NULL;
  8. typedef char String[64];
  9. #define STRING_LENGTH ((int)sizeof(String))
  10. typedef struct {
  11. String name;
  12. FileTokens tokens;
  13. } Define;
  14. #define DEFINES 256
  15. static Define defines[DEFINES];
  16. static int defineIndex = 0;
  17. static int ftGetLine(const FileTokens* ft, int index) {
  18. int fileLayer = 0;
  19. int lines = 1;
  20. for(int i = index; i >= 0; i--) {
  21. FileToken* t = ft->tokens + i;
  22. if(t->type == FT_PATH) {
  23. if(fileLayer == 0) {
  24. return lines;
  25. }
  26. fileLayer--;
  27. } else if(t->type == FT_END_PATH) {
  28. fileLayer++;
  29. } else if(t->type == FT_NEWLINE) {
  30. lines += fileLayer == 0;
  31. }
  32. }
  33. return 0;
  34. }
  35. static const char* ftGetPath(const FileTokens* ft, int index) {
  36. int fileLayer = 0;
  37. for(int i = index; i >= 0; i--) {
  38. FileToken* t = ft->tokens + i;
  39. if(t->type == FT_PATH) {
  40. if(fileLayer == 0) {
  41. return t->literal;
  42. }
  43. fileLayer--;
  44. } else if(t->type == FT_END_PATH) {
  45. fileLayer++;
  46. }
  47. }
  48. return "unknown path";
  49. }
  50. static void ftError(const FileTokens* ft, int index, const char* format, ...) {
  51. va_list args;
  52. va_start(args, format);
  53. eInitErrorV(error, "", ftGetLine(ft, index), format, args);
  54. int fileLayer = 0;
  55. for(int i = index; i >= 0; i--) {
  56. FileToken* t = ft->tokens + i;
  57. if(t->type == FT_PATH) {
  58. if(fileLayer == 0) {
  59. eAddPath(error, t->literal);
  60. } else {
  61. fileLayer--;
  62. }
  63. } else if(t->type == FT_END_PATH) {
  64. fileLayer++;
  65. }
  66. }
  67. va_end(args);
  68. }
  69. static void ftSystemError(const char* msg, const FileTokens* parent,
  70. const FileTokens* ft, int index) {
  71. ftError(parent, index, "%s '%s': %s", msg, ftGetPath(ft, 0),
  72. strerror(errno));
  73. }
  74. static void ftToSpace(FileToken* t) {
  75. free(t->literal);
  76. if(t->type != FT_NEWLINE) {
  77. t->type = FT_SPACE;
  78. }
  79. t->literal = NULL;
  80. t->single = 0;
  81. }
  82. static FileToken* ftAddToken(FileTokens* ft, FileTokenType type) {
  83. if(ft->length >= ft->capacity) {
  84. ft->capacity = (ft->capacity * 5) / 4 + 8;
  85. ft->tokens = (FileToken*)realloc(ft->tokens, (size_t)ft->capacity *
  86. sizeof(FileToken));
  87. }
  88. FileToken* t = ft->tokens + ft->length++;
  89. t->type = type;
  90. t->literal = NULL;
  91. t->single = 0;
  92. return t;
  93. }
  94. static FileToken* ftAddCopyToken(FileTokens* ft, const FileToken* t) {
  95. FileToken* added = ftAddToken(ft, t->type);
  96. added->literal = t->literal == NULL ? NULL : strdup(t->literal);
  97. added->single = t->single;
  98. return added;
  99. }
  100. static void ftAddPath(FileTokens* ft, const char* path) {
  101. ftAddToken(ft, FT_PATH)->literal = strdup(path);
  102. }
  103. static void ftAddEndPath(FileTokens* ft) {
  104. ftAddToken(ft, FT_END_PATH);
  105. }
  106. static void ftAddNewline(FileTokens* ft) {
  107. ftAddToken(ft, FT_NEWLINE);
  108. }
  109. static void ftAddLiteral(FileTokens* ft, const char* path, int length) {
  110. ftAddToken(ft, FT_LITERAL)->literal = strndup(path, (size_t)length);
  111. }
  112. static void ftAddSingle(FileTokens* ft, int single) {
  113. ftAddToken(ft, FT_SINGLE)->single = single;
  114. }
  115. static void ftAddSpace(FileTokens* ft) {
  116. ftAddToken(ft, FT_SPACE);
  117. }
  118. void ftPrint(FileTokens* ft) {
  119. for(int i = 0; i < ft->length; i++) {
  120. switch(ft->tokens[i].type) {
  121. case FT_PATH:
  122. printf("\033[0;34mPath: %s\n", ft->tokens[i].literal);
  123. break;
  124. case FT_END_PATH: puts("\033[0;34mEnd Path"); break;
  125. case FT_NEWLINE: putchar('\n'); break;
  126. case FT_LITERAL:
  127. printf("\033[0;33m%s", ft->tokens[i].literal);
  128. break;
  129. case FT_SINGLE:
  130. printf("\033[0;31m%c", (char)ft->tokens[i].single);
  131. break;
  132. case FT_SPACE: putchar(' '); break;
  133. default: putchar('?'); break;
  134. }
  135. }
  136. printf("\033[0m");
  137. }
  138. static bool ftHasError(void) {
  139. return eHasError(error);
  140. }
  141. static bool ftStringCompare(const char* a, const char* b) {
  142. return strcmp(a, b) == 0;
  143. }
  144. static void ftInitFileTokens(FileTokens* ft) {
  145. ft->tokens = NULL;
  146. ft->capacity = 0;
  147. ft->length = 0;
  148. }
  149. static void ftInitFileTokensWithPath(FileTokens* ft, const char* path) {
  150. ftInitFileTokens(ft);
  151. ftAddPath(ft, path);
  152. }
  153. static void ftTokenize(FileTokens* ft, unsigned char* data) {
  154. int index = 0;
  155. int oldIndex = 0;
  156. bool inString = false;
  157. while(true) {
  158. unsigned char c = data[index];
  159. if(isAllowedInName(c) || (inString && c != '"' && c != '\0')) {
  160. index++;
  161. continue;
  162. }
  163. if(oldIndex < index) {
  164. ftAddLiteral(ft, (const char*)(data + oldIndex), index - oldIndex);
  165. }
  166. if(c == '"') {
  167. inString = !inString;
  168. }
  169. if(c == '\0') {
  170. break;
  171. } else if(c == '\n') {
  172. ftAddNewline(ft);
  173. } else if(c == ' ') {
  174. ftAddSpace(ft);
  175. } else if(c == '/' && data[index + 1] == '/') {
  176. while(data[index] != '\0' && data[index] != '\n') {
  177. index++;
  178. }
  179. ftAddNewline(ft);
  180. } else if(c == '/' && data[index + 1] == '*') {
  181. int startIndex = ft->length - 1;
  182. index += 2;
  183. while(true) {
  184. if(data[index] == '\0') {
  185. ftError(ft, startIndex, "test");
  186. return;
  187. } else if(data[index] == '\n') {
  188. ftAddNewline(ft);
  189. } else if(data[index] == '*' && data[index + 1] == '/') {
  190. index++;
  191. break;
  192. }
  193. index++;
  194. }
  195. } else {
  196. ftAddSingle(ft, c);
  197. }
  198. index++;
  199. oldIndex = index;
  200. }
  201. }
  202. static void ftReadFull(FILE* file, FileTokens* parent, FileTokens* ft,
  203. int index) {
  204. if(fseek(file, 0, SEEK_END)) {
  205. ftSystemError("cannot seek end of file", parent, ft, index);
  206. return;
  207. }
  208. long signedLength = ftell(file);
  209. if(signedLength < 0) {
  210. ftSystemError("cannot tell end of file", parent, ft, index);
  211. return;
  212. }
  213. size_t length = (size_t)signedLength;
  214. rewind(file);
  215. unsigned char* data = (unsigned char*)malloc(length + 1);
  216. size_t readValues = fread(data, 1, length, file);
  217. if(readValues != length) {
  218. ftSystemError("cannot read file", parent, ft, index);
  219. free(data);
  220. return;
  221. }
  222. data[length] = '\0';
  223. ftTokenize(ft, data);
  224. ftAddEndPath(ft);
  225. free(data);
  226. }
  227. static void ftReadFullFile(FileTokens* parent, FileTokens* ft, int index) {
  228. FILE* file = fopen(ftGetPath(ft, 0), "r");
  229. if(file == NULL) {
  230. ftSystemError("cannot open file", parent, ft, index);
  231. return;
  232. }
  233. ftReadFull(file, parent, ft, index);
  234. if(fclose(file) < 0) {
  235. ftSystemError("cannot close file", parent, ft, index);
  236. }
  237. }
  238. static void ftReplace(int from, int toEx, FileTokens* ft,
  239. const FileTokens* replacement) {
  240. int newCapacity = ft->capacity + replacement->capacity;
  241. FileToken* newTokens =
  242. (FileToken*)malloc((size_t)newCapacity * sizeof(FileToken));
  243. // these will be overwritten
  244. for(int i = from; i < toEx; i++) {
  245. free(ft->tokens[i].literal);
  246. ft->tokens[i].literal = NULL;
  247. }
  248. memcpy(newTokens, ft->tokens, (size_t)from * sizeof(FileToken));
  249. for(int i = 0; i < replacement->length; i++) {
  250. FileToken* src = replacement->tokens + i;
  251. FileToken* dst = newTokens + from + i;
  252. dst->literal = src->literal == NULL ? NULL : strdup(src->literal);
  253. dst->single = src->single;
  254. dst->type = src->type;
  255. }
  256. memcpy(newTokens + from + replacement->length, ft->tokens + toEx,
  257. (size_t)(ft->length - toEx) * sizeof(FileToken));
  258. free(ft->tokens);
  259. ft->tokens = newTokens;
  260. ft->capacity = newCapacity;
  261. ft->length = from + replacement->length + ft->length - toEx;
  262. }
  263. static void ftSkipSpaces(int* index, FileTokens* ft) {
  264. while(*index < ft->length) {
  265. FileToken* t = ft->tokens + *index;
  266. if(t->type != FT_SPACE) {
  267. return;
  268. }
  269. (*index)++;
  270. }
  271. }
  272. const char* INCLUDE_ERROR = "include path must be a string";
  273. static bool ftIncludeDoubleQuote(int* index, FileTokens* ft) {
  274. if(*index >= ft->length || ft->tokens[*index].type != FT_SINGLE ||
  275. ft->tokens[*index].single != '"') {
  276. ftError(ft, *index - 1, INCLUDE_ERROR);
  277. return true;
  278. }
  279. (*index)++;
  280. return false;
  281. }
  282. static void ftHandleInclude(int* index, int start, FileTokens* ft) {
  283. ftSkipSpaces(index, ft);
  284. if(ftIncludeDoubleQuote(index, ft)) {
  285. return;
  286. }
  287. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  288. ftError(ft, *index - 1, INCLUDE_ERROR);
  289. return;
  290. }
  291. const char* path = ft->tokens[(*index)++].literal;
  292. if(ftIncludeDoubleQuote(index, ft)) {
  293. return;
  294. }
  295. const char* parentPath = ftGetPath(ft, *index - 1);
  296. size_t afterLastSlash = 0;
  297. for(size_t i = 0; parentPath[i] != '\0'; i++) {
  298. afterLastSlash = parentPath[i] == '/' ? i + 1 : afterLastSlash;
  299. }
  300. size_t pathLength = strlen(path) + 1;
  301. char* fullPath = (char*)malloc(afterLastSlash + pathLength);
  302. memcpy(fullPath, parentPath, afterLastSlash);
  303. memcpy(fullPath + afterLastSlash, path, pathLength);
  304. FileTokens include;
  305. ftInitFileTokensWithPath(&include, fullPath);
  306. free(fullPath);
  307. ftReadFullFile(ft, &include, *index - 1);
  308. if(!ftHasError()) {
  309. ftReplace(start, *index, ft, &include);
  310. *index = start;
  311. }
  312. ftDelete(&include);
  313. }
  314. static int ftGetDefineIndex(const char* s) {
  315. for(int i = 0; i < defineIndex; i++) {
  316. if(ftStringCompare(defines[i].name, s)) {
  317. return i;
  318. }
  319. }
  320. return -1;
  321. }
  322. static bool ftIsDefined(const char* s) {
  323. return ftGetDefineIndex(s) != -1;
  324. }
  325. static int ftFindEndIf(int from, FileTokens* ft) {
  326. int layer = 0;
  327. for(int i = from; i < ft->length - 1; i++) {
  328. if(ft->tokens[i].type != FT_SINGLE || ft->tokens[i].single != '#') {
  329. continue;
  330. } else if(ft->tokens[i + 1].type != FT_LITERAL) {
  331. continue;
  332. }
  333. if(ftStringCompare(ft->tokens[i + 1].literal, "ifndef")) {
  334. layer++;
  335. } else if(ftStringCompare(ft->tokens[i + 1].literal, "endif")) {
  336. if(layer == 0) {
  337. return i;
  338. }
  339. layer--;
  340. }
  341. }
  342. return -1;
  343. }
  344. static void ftHandleIfNotDefined(int* index, int start, FileTokens* ft) {
  345. ftSkipSpaces(index, ft);
  346. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  347. ftError(ft, start, "ifndef expects a literal");
  348. return;
  349. }
  350. const char* check = ft->tokens[(*index)++].literal;
  351. int endIf = ftFindEndIf(*index, ft);
  352. if(endIf == -1) {
  353. ftError(ft, start, "cannot find #endif");
  354. return;
  355. }
  356. if(ftIsDefined(check)) {
  357. for(int i = start; i < endIf + 2; i++) {
  358. ftToSpace(ft->tokens + i);
  359. }
  360. } else {
  361. ftToSpace(ft->tokens + start); // #
  362. ftToSpace(ft->tokens + start + 1); // ifndef
  363. ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
  364. ftToSpace(ft->tokens + endIf); // #
  365. ftToSpace(ft->tokens + endIf + 1); // endif
  366. }
  367. }
  368. static void ftHandleDefine(int* index, int start, FileTokens* ft) {
  369. if(defineIndex >= DEFINES) {
  370. ftError(ft, *index - 1, "too much defines");
  371. return;
  372. }
  373. ftSkipSpaces(index, ft);
  374. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  375. ftError(ft, *index - 1, "define expects a literal");
  376. return;
  377. }
  378. const char* defineName = ft->tokens[(*index)++].literal;
  379. ftSkipSpaces(index, ft);
  380. int defineStart = *index;
  381. bool skip = false;
  382. while(*index < ft->length) {
  383. FileToken* t = ft->tokens + *index;
  384. if(!skip && t->type == FT_SINGLE && t->single == '\\') {
  385. skip = true;
  386. ftToSpace(t);
  387. } else if(t->type == FT_NEWLINE) {
  388. if(skip) {
  389. skip = false;
  390. } else {
  391. break;
  392. }
  393. } else if(skip) {
  394. ftError(ft, *index - 1, "expected newline after \\");
  395. return;
  396. }
  397. (*index)++;
  398. }
  399. int foundIndex = ftGetDefineIndex(defineName);
  400. if(foundIndex != -1) {
  401. ftError(ft, *index - 1, "'%s' redefined", defineName);
  402. return;
  403. }
  404. Define* define = NULL;
  405. if(foundIndex == -1) {
  406. define = defines + defineIndex++;
  407. strncpy(define->name, defineName, STRING_LENGTH - 1);
  408. define->name[STRING_LENGTH - 1] = '\0';
  409. } else {
  410. define = defines + foundIndex;
  411. ftDelete(&define->tokens);
  412. }
  413. ftInitFileTokens(&define->tokens);
  414. for(int i = defineStart; i < *index; i++) {
  415. ftAddCopyToken(&define->tokens, ft->tokens + i);
  416. if(ft->tokens[i].type == FT_NEWLINE) {
  417. FileToken* t = define->tokens.tokens + (define->tokens.length - 1);
  418. ftToSpace(t);
  419. t->type = FT_SPACE;
  420. }
  421. }
  422. for(int i = start; i < *index; i++) {
  423. ftToSpace(ft->tokens + i);
  424. }
  425. }
  426. static void ftHandleUndefine(int* index, int start, FileTokens* ft) {
  427. ftSkipSpaces(index, ft);
  428. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  429. ftError(ft, *index - 1, "undefine expects a literal");
  430. return;
  431. }
  432. const char* defineName = ft->tokens[(*index)++].literal;
  433. int foundIndex = ftGetDefineIndex(defineName);
  434. if(foundIndex != -1) {
  435. Define* define = defines + foundIndex;
  436. ftDelete(&define->tokens);
  437. defineIndex--;
  438. if(defineIndex != foundIndex) {
  439. memcpy(define, defines + defineIndex, sizeof(Define));
  440. }
  441. }
  442. for(int i = start; i < *index; i++) {
  443. ftToSpace(ft->tokens + i);
  444. }
  445. }
  446. static void ftParseInstruction(int* index, FileTokens* ft) {
  447. int start = *index;
  448. (*index)++;
  449. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  450. ftError(ft, *index - 1, "expected literal after #");
  451. return;
  452. }
  453. const char* name = ft->tokens[(*index)++].literal;
  454. if(ftStringCompare(name, "include")) {
  455. ftHandleInclude(index, start, ft);
  456. } else if(ftStringCompare(name, "ifndef")) {
  457. ftHandleIfNotDefined(index, start, ft);
  458. } else if(ftStringCompare(name, "define")) {
  459. ftHandleDefine(index, start, ft);
  460. } else if(ftStringCompare(name, "undef")) {
  461. ftHandleUndefine(index, start, ft);
  462. } else if(ftStringCompare(name, "endif")) {
  463. ftError(ft, *index - 1, "endif without if");
  464. } else {
  465. ftError(ft, *index - 1, "unknown preprocessor literal '%s'", name);
  466. }
  467. }
  468. static bool ftReplaceMakros(FileTokens* ft, int index) {
  469. FileToken* t = ft->tokens + index;
  470. int foundIndex = ftGetDefineIndex(t->literal);
  471. if(foundIndex != -1) {
  472. ftReplace(index, index + 1, ft, &(defines[foundIndex].tokens));
  473. return true;
  474. }
  475. return false;
  476. }
  477. static void ftSearchInstruction(FileTokens* ft) {
  478. for(int i = 0; i < ft->length && !ftHasError(); i++) {
  479. if(ft->tokens[i].type == FT_SINGLE && ft->tokens[i].single == '#') {
  480. ftParseInstruction(&i, ft);
  481. } else if(ft->tokens[i].type == FT_LITERAL) {
  482. if(ftReplaceMakros(ft, i)) {
  483. i--;
  484. }
  485. }
  486. }
  487. }
  488. void ftInit(const char* path, FileTokens* ft, Error* e) {
  489. defineIndex = 0;
  490. error = e;
  491. eInitSuccess(error);
  492. ftInitFileTokensWithPath(ft, path);
  493. ftReadFullFile(ft, ft, 0);
  494. if(!ftHasError()) {
  495. ftSearchInstruction(ft);
  496. }
  497. for(int i = 0; i < defineIndex; i++) {
  498. ftDelete(&(defines[i].tokens));
  499. }
  500. }
  501. void ftDelete(FileTokens* ft) {
  502. for(int i = 0; i < ft->length; i++) {
  503. free(ft->tokens[i].literal);
  504. }
  505. free(ft->tokens);
  506. ft->tokens = NULL;
  507. ft->capacity = 0;
  508. ft->length = 0;
  509. }