FileTokens.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. #include <errno.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer/FileTokens.h"
  6. #include "utils/SnuviUtils.h"
  7. static Error* error = NULL;
  8. typedef char String[64];
  9. #define STRING_LENGTH ((int)sizeof(String))
  10. typedef struct {
  11. String name;
  12. FileTokens tokens;
  13. } Define;
  14. #define DEFINES 256
  15. static Define defines[DEFINES];
  16. static int defineIndex = 0;
  17. static int ftGetLine(const FileTokens* ft, int index) {
  18. int fileLayer = 0;
  19. int lines = 1;
  20. for(int i = index; i >= 0; i--) {
  21. FileToken* t = ft->tokens + i;
  22. if(t->type == FT_PATH) {
  23. if(fileLayer == 0) {
  24. return lines;
  25. }
  26. fileLayer--;
  27. } else if(t->type == FT_END_PATH) {
  28. fileLayer++;
  29. } else if(t->type == FT_NEWLINE) {
  30. lines += fileLayer == 0;
  31. }
  32. }
  33. return 0;
  34. }
  35. static const char* ftGetPath(const FileTokens* ft, int index) {
  36. int fileLayer = 0;
  37. for(int i = index; i >= 0; i--) {
  38. FileToken* t = ft->tokens + i;
  39. if(t->type == FT_PATH) {
  40. if(fileLayer == 0) {
  41. return t->literal;
  42. }
  43. fileLayer--;
  44. } else if(t->type == FT_END_PATH) {
  45. fileLayer++;
  46. }
  47. }
  48. return "unknown path";
  49. }
  50. static void ftError(const FileTokens* ft, int codeLine, int index,
  51. const char* format, ...) {
  52. va_list args;
  53. va_start(args, format);
  54. eInitErrorV(error, "", ftGetLine(ft, index), codeLine, format, args);
  55. int fileLayer = 0;
  56. for(int i = index; i >= 0; i--) {
  57. FileToken* t = ft->tokens + i;
  58. if(t->type == FT_PATH) {
  59. if(fileLayer == 0) {
  60. eAddPath(error, t->literal);
  61. } else {
  62. fileLayer--;
  63. }
  64. } else if(t->type == FT_END_PATH) {
  65. fileLayer++;
  66. }
  67. }
  68. va_end(args);
  69. }
  70. static void ftSystemError(const char* msg, const FileTokens* parent,
  71. const FileTokens* ft, int codeLine, int index) {
  72. ftError(parent, codeLine, index, "%s '%s': %s", msg, ftGetPath(ft, 0),
  73. strerror(errno));
  74. }
  75. static void ftToSpace(FileToken* t) {
  76. free(t->literal);
  77. if(t->type != FT_NEWLINE) {
  78. t->type = FT_SPACE;
  79. }
  80. t->literal = NULL;
  81. t->single = 0;
  82. }
  83. static FileToken* ftAddToken(FileTokens* ft, FileTokenType type) {
  84. if(ft->length >= ft->capacity) {
  85. ft->capacity = (ft->capacity * 5) / 4 + 8;
  86. ft->tokens = (FileToken*)realloc(ft->tokens, (size_t)ft->capacity *
  87. sizeof(FileToken));
  88. }
  89. FileToken* t = ft->tokens + ft->length++;
  90. t->type = type;
  91. t->literal = NULL;
  92. t->single = 0;
  93. return t;
  94. }
  95. static FileToken* ftAddCopyToken(FileTokens* ft, const FileToken* t) {
  96. FileToken* added = ftAddToken(ft, t->type);
  97. added->literal = t->literal == NULL ? NULL : strdup(t->literal);
  98. added->single = t->single;
  99. return added;
  100. }
  101. static void ftAddPath(FileTokens* ft, const char* path) {
  102. ftAddToken(ft, FT_PATH)->literal = strdup(path);
  103. }
  104. static void ftAddEndPath(FileTokens* ft) {
  105. ftAddToken(ft, FT_END_PATH);
  106. }
  107. static void ftAddNewline(FileTokens* ft) {
  108. ftAddToken(ft, FT_NEWLINE);
  109. }
  110. static void ftAddLiteral(FileTokens* ft, const char* path, int length) {
  111. ftAddToken(ft, FT_LITERAL)->literal = strndup(path, (size_t)length);
  112. }
  113. static void ftAddSingle(FileTokens* ft, int single) {
  114. ftAddToken(ft, FT_SINGLE)->single = single;
  115. }
  116. static void ftAddSpace(FileTokens* ft) {
  117. ftAddToken(ft, FT_SPACE);
  118. }
  119. void ftPrint(FileTokens* ft) {
  120. for(int i = 0; i < ft->length; i++) {
  121. switch(ft->tokens[i].type) {
  122. case FT_PATH:
  123. printf("\033[0;34mPath: %s\n", ft->tokens[i].literal);
  124. break;
  125. case FT_END_PATH: puts("\033[0;34mEnd Path"); break;
  126. case FT_NEWLINE: putchar('\n'); break;
  127. case FT_LITERAL:
  128. printf("\033[0;33m%s", ft->tokens[i].literal);
  129. break;
  130. case FT_SINGLE:
  131. printf("\033[0;31m%c", (char)ft->tokens[i].single);
  132. break;
  133. case FT_SPACE: putchar(' '); break;
  134. default: putchar('?'); break;
  135. }
  136. }
  137. printf("\033[0m");
  138. }
  139. static bool ftHasError(void) {
  140. return eHasError(error);
  141. }
  142. static bool ftStringCompare(const char* a, const char* b) {
  143. return strcmp(a, b) == 0;
  144. }
  145. static void ftInitFileTokens(FileTokens* ft) {
  146. ft->tokens = NULL;
  147. ft->capacity = 0;
  148. ft->length = 0;
  149. }
  150. static void ftInitFileTokensWithPath(FileTokens* ft, const char* path) {
  151. ftInitFileTokens(ft);
  152. ftAddPath(ft, path);
  153. }
  154. static void ftTokenize(FileTokens* ft, unsigned char* data) {
  155. int index = 0;
  156. int oldIndex = 0;
  157. bool inString = false;
  158. while(true) {
  159. unsigned char c = data[index];
  160. if(isAllowedInName(c) || (inString && c != '"' && c != '\0')) {
  161. index++;
  162. continue;
  163. }
  164. if(oldIndex < index) {
  165. ftAddLiteral(ft, (const char*)(data + oldIndex), index - oldIndex);
  166. }
  167. if(c == '"') {
  168. inString = !inString;
  169. }
  170. if(c == '\0') {
  171. break;
  172. } else if(c == '\n') {
  173. ftAddNewline(ft);
  174. } else if(c == ' ') {
  175. ftAddSpace(ft);
  176. } else if(c == '/' && data[index + 1] == '/') {
  177. while(data[index] != '\0' && data[index] != '\n') {
  178. index++;
  179. }
  180. ftAddNewline(ft);
  181. } else if(c == '/' && data[index + 1] == '*') {
  182. int startIndex = ft->length - 1;
  183. index += 2;
  184. while(true) {
  185. if(data[index] == '\0') {
  186. ftError(ft, __LINE__, startIndex, "test");
  187. return;
  188. } else if(data[index] == '\n') {
  189. ftAddNewline(ft);
  190. } else if(data[index] == '*' && data[index + 1] == '/') {
  191. index++;
  192. break;
  193. }
  194. index++;
  195. }
  196. } else {
  197. ftAddSingle(ft, c);
  198. }
  199. index++;
  200. oldIndex = index;
  201. }
  202. }
  203. static void ftReadFull(FILE* file, FileTokens* parent, FileTokens* ft,
  204. int index) {
  205. if(fseek(file, 0, SEEK_END)) {
  206. ftSystemError("cannot seek end of file", parent, ft, __LINE__, index);
  207. return;
  208. }
  209. long signedLength = ftell(file);
  210. if(signedLength < 0) {
  211. ftSystemError("cannot tell end of file", parent, ft, __LINE__, index);
  212. return;
  213. }
  214. size_t length = (size_t)signedLength;
  215. rewind(file);
  216. unsigned char* data = (unsigned char*)malloc(length + 1);
  217. size_t readValues = fread(data, 1, length, file);
  218. if(readValues != length) {
  219. ftSystemError("cannot read file", parent, ft, __LINE__, index);
  220. free(data);
  221. return;
  222. }
  223. data[length] = '\0';
  224. ftTokenize(ft, data);
  225. ftAddEndPath(ft);
  226. free(data);
  227. }
  228. static void ftReadFullFile(FileTokens* parent, FileTokens* ft, int index) {
  229. FILE* file = fopen(ftGetPath(ft, 0), "r");
  230. if(file == NULL) {
  231. ftSystemError("cannot open file", parent, ft, __LINE__, index);
  232. return;
  233. }
  234. ftReadFull(file, parent, ft, index);
  235. if(fclose(file) < 0) {
  236. ftSystemError("cannot close file", parent, ft, __LINE__, index);
  237. }
  238. }
  239. static void ftReplace(int from, int toEx, FileTokens* ft,
  240. const FileTokens* replacement) {
  241. int newCapacity = ft->capacity + replacement->capacity;
  242. FileToken* newTokens =
  243. (FileToken*)malloc((size_t)newCapacity * sizeof(FileToken));
  244. // these will be overwritten
  245. for(int i = from; i < toEx; i++) {
  246. free(ft->tokens[i].literal);
  247. ft->tokens[i].literal = NULL;
  248. }
  249. memcpy(newTokens, ft->tokens, (size_t)from * sizeof(FileToken));
  250. for(int i = 0; i < replacement->length; i++) {
  251. FileToken* src = replacement->tokens + i;
  252. FileToken* dst = newTokens + from + i;
  253. dst->literal = src->literal == NULL ? NULL : strdup(src->literal);
  254. dst->single = src->single;
  255. dst->type = src->type;
  256. }
  257. memcpy(newTokens + from + replacement->length, ft->tokens + toEx,
  258. (size_t)(ft->length - toEx) * sizeof(FileToken));
  259. free(ft->tokens);
  260. ft->tokens = newTokens;
  261. ft->capacity = newCapacity;
  262. ft->length = from + replacement->length + ft->length - toEx;
  263. }
  264. static void ftSkipSpaces(int* index, FileTokens* ft) {
  265. while(*index < ft->length) {
  266. FileToken* t = ft->tokens + *index;
  267. if(t->type != FT_SPACE) {
  268. return;
  269. }
  270. (*index)++;
  271. }
  272. }
  273. const char* INCLUDE_ERROR = "include path must be a string";
  274. static bool ftIncludeDoubleQuote(int* index, FileTokens* ft) {
  275. if(*index >= ft->length || ft->tokens[*index].type != FT_SINGLE ||
  276. ft->tokens[*index].single != '"') {
  277. ftError(ft, __LINE__, *index - 1, INCLUDE_ERROR);
  278. return true;
  279. }
  280. (*index)++;
  281. return false;
  282. }
  283. static void ftHandleInclude(int* index, int start, FileTokens* ft) {
  284. ftSkipSpaces(index, ft);
  285. if(ftIncludeDoubleQuote(index, ft)) {
  286. return;
  287. }
  288. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  289. ftError(ft, __LINE__, *index - 1, INCLUDE_ERROR);
  290. return;
  291. }
  292. const char* path = ft->tokens[(*index)++].literal;
  293. if(ftIncludeDoubleQuote(index, ft)) {
  294. return;
  295. }
  296. const char* parentPath = ftGetPath(ft, *index - 1);
  297. size_t afterLastSlash = 0;
  298. for(size_t i = 0; parentPath[i] != '\0'; i++) {
  299. afterLastSlash = parentPath[i] == '/' ? i + 1 : afterLastSlash;
  300. }
  301. size_t pathLength = strlen(path) + 1;
  302. char* fullPath = (char*)malloc(afterLastSlash + pathLength);
  303. memcpy(fullPath, parentPath, afterLastSlash);
  304. memcpy(fullPath + afterLastSlash, path, pathLength);
  305. FileTokens include;
  306. ftInitFileTokensWithPath(&include, fullPath);
  307. free(fullPath);
  308. ftReadFullFile(ft, &include, *index - 1);
  309. if(!ftHasError()) {
  310. ftReplace(start, *index, ft, &include);
  311. *index = start;
  312. }
  313. ftDelete(&include);
  314. }
  315. static int ftGetDefineIndex(const char* s) {
  316. for(int i = 0; i < defineIndex; i++) {
  317. if(ftStringCompare(defines[i].name, s)) {
  318. return i;
  319. }
  320. }
  321. return -1;
  322. }
  323. static bool ftIsDefined(const char* s) {
  324. return ftGetDefineIndex(s) != -1;
  325. }
  326. static int ftFindEndIf(int from, FileTokens* ft) {
  327. int layer = 0;
  328. for(int i = from; i < ft->length - 1; i++) {
  329. if(ft->tokens[i].type != FT_SINGLE || ft->tokens[i].single != '#') {
  330. continue;
  331. } else if(ft->tokens[i + 1].type != FT_LITERAL) {
  332. continue;
  333. }
  334. if(ftStringCompare(ft->tokens[i + 1].literal, "ifndef")) {
  335. layer++;
  336. } else if(ftStringCompare(ft->tokens[i + 1].literal, "endif")) {
  337. if(layer == 0) {
  338. return i;
  339. }
  340. layer--;
  341. }
  342. }
  343. return -1;
  344. }
  345. static void ftHandleIfNotDefined(int* index, int start, FileTokens* ft) {
  346. ftSkipSpaces(index, ft);
  347. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  348. ftError(ft, __LINE__, start, "ifndef expects a literal");
  349. return;
  350. }
  351. const char* check = ft->tokens[(*index)++].literal;
  352. int endIf = ftFindEndIf(*index, ft);
  353. if(endIf == -1) {
  354. ftError(ft, __LINE__, start, "cannot find #endif");
  355. return;
  356. }
  357. if(ftIsDefined(check)) {
  358. for(int i = start; i < endIf + 2; i++) {
  359. ftToSpace(ft->tokens + i);
  360. }
  361. } else {
  362. ftToSpace(ft->tokens + start); // #
  363. ftToSpace(ft->tokens + start + 1); // ifndef
  364. ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
  365. ftToSpace(ft->tokens + endIf); // #
  366. ftToSpace(ft->tokens + endIf + 1); // endif
  367. }
  368. }
  369. static void ftHandleDefine(int* index, int start, FileTokens* ft) {
  370. if(defineIndex >= DEFINES) {
  371. ftError(ft, __LINE__, *index - 1, "too much defines");
  372. return;
  373. }
  374. ftSkipSpaces(index, ft);
  375. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  376. ftError(ft, __LINE__, *index - 1, "define expects a literal");
  377. return;
  378. }
  379. const char* defineName = ft->tokens[(*index)++].literal;
  380. ftSkipSpaces(index, ft);
  381. int defineStart = *index;
  382. bool skip = false;
  383. while(*index < ft->length) {
  384. FileToken* t = ft->tokens + *index;
  385. if(!skip && t->type == FT_SINGLE && t->single == '\\') {
  386. skip = true;
  387. ftToSpace(t);
  388. } else if(t->type == FT_NEWLINE) {
  389. if(skip) {
  390. skip = false;
  391. } else {
  392. break;
  393. }
  394. } else if(skip) {
  395. ftError(ft, __LINE__, *index - 1, "expected newline after \\");
  396. return;
  397. }
  398. (*index)++;
  399. }
  400. int foundIndex = ftGetDefineIndex(defineName);
  401. if(foundIndex != -1) {
  402. ftError(ft, __LINE__, *index - 1, "'%s' redefined", defineName);
  403. return;
  404. }
  405. Define* define = NULL;
  406. if(foundIndex == -1) {
  407. define = defines + defineIndex++;
  408. strncpy(define->name, defineName, STRING_LENGTH - 1);
  409. define->name[STRING_LENGTH - 1] = '\0';
  410. } else {
  411. define = defines + foundIndex;
  412. ftDelete(&define->tokens);
  413. }
  414. ftInitFileTokens(&define->tokens);
  415. for(int i = defineStart; i < *index; i++) {
  416. ftAddCopyToken(&define->tokens, ft->tokens + i);
  417. if(ft->tokens[i].type == FT_NEWLINE) {
  418. FileToken* t = define->tokens.tokens + (define->tokens.length - 1);
  419. ftToSpace(t);
  420. t->type = FT_SPACE;
  421. }
  422. }
  423. for(int i = start; i < *index; i++) {
  424. ftToSpace(ft->tokens + i);
  425. }
  426. }
  427. static void ftHandleUndefine(int* index, int start, FileTokens* ft) {
  428. ftSkipSpaces(index, ft);
  429. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  430. ftError(ft, __LINE__, *index - 1, "undefine expects a literal");
  431. return;
  432. }
  433. const char* defineName = ft->tokens[(*index)++].literal;
  434. int foundIndex = ftGetDefineIndex(defineName);
  435. if(foundIndex != -1) {
  436. Define* define = defines + foundIndex;
  437. ftDelete(&define->tokens);
  438. defineIndex--;
  439. if(defineIndex != foundIndex) {
  440. memcpy(define, defines + defineIndex, sizeof(Define));
  441. }
  442. }
  443. for(int i = start; i < *index; i++) {
  444. ftToSpace(ft->tokens + i);
  445. }
  446. }
  447. static void ftParseInstruction(int* index, FileTokens* ft) {
  448. int start = *index;
  449. (*index)++;
  450. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  451. ftError(ft, __LINE__, *index - 1, "expected literal after #");
  452. return;
  453. }
  454. const char* name = ft->tokens[(*index)++].literal;
  455. if(ftStringCompare(name, "include")) {
  456. ftHandleInclude(index, start, ft);
  457. } else if(ftStringCompare(name, "ifndef")) {
  458. ftHandleIfNotDefined(index, start, ft);
  459. } else if(ftStringCompare(name, "define")) {
  460. ftHandleDefine(index, start, ft);
  461. } else if(ftStringCompare(name, "undef")) {
  462. ftHandleUndefine(index, start, ft);
  463. } else if(ftStringCompare(name, "endif")) {
  464. ftError(ft, __LINE__, *index - 1, "endif without if");
  465. } else {
  466. ftError(ft, __LINE__, *index - 1, "unknown preprocessor literal '%s'",
  467. name);
  468. }
  469. }
  470. static bool ftReplaceMakros(FileTokens* ft, int index) {
  471. FileToken* t = ft->tokens + index;
  472. int foundIndex = ftGetDefineIndex(t->literal);
  473. if(foundIndex != -1) {
  474. ftReplace(index, index + 1, ft, &(defines[foundIndex].tokens));
  475. return true;
  476. }
  477. return false;
  478. }
  479. static void ftSearchInstruction(FileTokens* ft) {
  480. for(int i = 0; i < ft->length && !ftHasError(); i++) {
  481. if(ft->tokens[i].type == FT_SINGLE && ft->tokens[i].single == '#') {
  482. ftParseInstruction(&i, ft);
  483. } else if(ft->tokens[i].type == FT_LITERAL) {
  484. if(ftReplaceMakros(ft, i)) {
  485. i--;
  486. }
  487. }
  488. }
  489. }
  490. void ftInit(const char* path, FileTokens* ft, Error* e) {
  491. defineIndex = 0;
  492. error = e;
  493. eInitSuccess(error);
  494. ftInitFileTokensWithPath(ft, path);
  495. ftReadFullFile(ft, ft, 0);
  496. if(!ftHasError()) {
  497. ftSearchInstruction(ft);
  498. }
  499. for(int i = 0; i < defineIndex; i++) {
  500. ftDelete(&(defines[i].tokens));
  501. }
  502. }
  503. void ftDelete(FileTokens* ft) {
  504. for(int i = 0; i < ft->length; i++) {
  505. free(ft->tokens[i].literal);
  506. }
  507. free(ft->tokens);
  508. ft->tokens = NULL;
  509. ft->capacity = 0;
  510. ft->length = 0;
  511. }