FileTokens.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. #include <errno.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer/FileTokens.h"
  6. #include "utils/SnuviUtils.h"
  7. static Error* error;
  8. static int line = 0;
  9. typedef char String[64];
  10. #define STRING_LENGTH ((int)sizeof(String))
  11. typedef struct {
  12. String name;
  13. } Define;
  14. #define DEFINES 256
  15. static Define defines[DEFINES];
  16. static int defineIndex = 0;
  17. static const char* ftGetPath(const FileTokens* ft) {
  18. if(ft->length < 1 || ft->tokens[0].type != FT_PATH) {
  19. eInitError(error, line, "path not set");
  20. return "";
  21. }
  22. return ft->tokens[0].literal;
  23. }
  24. static void ftSystemError(const char* msg, const FileTokens* ft) {
  25. eInitError(error, line, "%s '%s': %s", msg, ftGetPath(ft), strerror(errno));
  26. }
  27. static void ftToSpace(FileToken* t) {
  28. free(t->literal);
  29. if(t->type != FT_NEWLINE) {
  30. t->type = FT_SPACE;
  31. }
  32. t->literal = NULL;
  33. t->single = 0;
  34. }
  35. static FileToken* ftAddToken(FileTokens* ft, FileTokenType type) {
  36. if(ft->length >= ft->capacity) {
  37. ft->capacity = (ft->capacity * 5) / 4 + 8;
  38. ft->tokens = realloc(ft->tokens, ft->capacity * sizeof(FileToken));
  39. }
  40. FileToken* t = ft->tokens + ft->length++;
  41. t->type = type;
  42. t->literal = NULL;
  43. t->single = 0;
  44. return t;
  45. }
  46. static void ftAddPath(FileTokens* ft, const char* path) {
  47. ftAddToken(ft, FT_PATH)->literal = strdup(path);
  48. }
  49. static void ftAddEndPath(FileTokens* ft) {
  50. ftAddToken(ft, FT_END_PATH);
  51. }
  52. static void ftAddNewline(FileTokens* ft) {
  53. ftAddToken(ft, FT_NEWLINE);
  54. }
  55. static void ftAddLiteral(FileTokens* ft, const char* path, int length) {
  56. ftAddToken(ft, FT_LITERAL)->literal = strndup(path, length);
  57. }
  58. static void ftAddSingle(FileTokens* ft, int single) {
  59. ftAddToken(ft, FT_SINGLE)->single = single;
  60. }
  61. static void ftAddSpace(FileTokens* ft) {
  62. ftAddToken(ft, FT_SPACE);
  63. }
  64. static void ftPrint(FileTokens* ft) {
  65. for(int i = 0; i < ft->length; i++) {
  66. switch(ft->tokens[i].type) {
  67. case FT_PATH:
  68. printf("\033[0;34mPath: %s\n", ft->tokens[i].literal);
  69. break;
  70. case FT_END_PATH: puts("\033[0;34mEnd Path"); break;
  71. case FT_NEWLINE: putchar('\n'); break;
  72. case FT_LITERAL:
  73. printf("\033[0;33m%s", ft->tokens[i].literal);
  74. break;
  75. case FT_SINGLE:
  76. printf("\033[0;31m%c", (char)ft->tokens[i].single);
  77. break;
  78. case FT_SPACE: putchar(' '); break;
  79. }
  80. }
  81. printf("\033[0m");
  82. }
  83. static bool ftHasError() {
  84. return eHasError(error);
  85. }
  86. static bool ftStringCompare(const char* a, const char* b) {
  87. return strcmp(a, b) == 0;
  88. }
  89. static void ftInitFileTokens(FileTokens* ft, const char* path) {
  90. ft->tokens = NULL;
  91. ft->capacity = 0;
  92. ft->length = 0;
  93. ftAddPath(ft, path);
  94. }
  95. static void ftTokenize(FileTokens* ft, unsigned char* data) {
  96. int index = 0;
  97. int oldIndex = 0;
  98. bool inString = false;
  99. int line = 1;
  100. while(true) {
  101. unsigned char c = data[index];
  102. if(isAllowedInName(c) || (inString && c != '"' && c != '\0')) {
  103. index++;
  104. continue;
  105. }
  106. if(oldIndex < index) {
  107. ftAddLiteral(ft, (const char*)(data + oldIndex), index - oldIndex);
  108. }
  109. if(c == '"') {
  110. inString = !inString;
  111. }
  112. if(c == '\0') {
  113. break;
  114. } else if(c == '\n') {
  115. line++;
  116. ftAddNewline(ft);
  117. } else if(c == ' ') {
  118. ftAddSpace(ft);
  119. } else if(c == '/' && data[index + 1] == '/') {
  120. while(data[index] != '\0' && data[index] != '\n') {
  121. index++;
  122. }
  123. ftAddNewline(ft);
  124. } else if(c == '/' && data[index + 1] == '*') {
  125. int startLine = line;
  126. index += 2;
  127. while(true) {
  128. if(data[index] == '\0') {
  129. eInitError(error, startLine, "unclosed multiline comment");
  130. return;
  131. } else if(data[index] == '\n') {
  132. ftAddNewline(ft);
  133. line++;
  134. } else if(data[index] == '*' && data[index + 1] == '/') {
  135. index++;
  136. break;
  137. }
  138. index++;
  139. }
  140. } else {
  141. ftAddSingle(ft, c);
  142. }
  143. index++;
  144. oldIndex = index;
  145. }
  146. }
  147. static void ftReadFull(FILE* file, FileTokens* ft) {
  148. if(fseek(file, 0, SEEK_END)) {
  149. ftSystemError("cannot seek end of file", ft);
  150. return;
  151. }
  152. long length = ftell(file);
  153. if(length < 0) {
  154. ftSystemError("cannot tell end of file", ft);
  155. return;
  156. }
  157. rewind(file);
  158. unsigned char* data = malloc(length + 1);
  159. int readValues = fread(data, 1, length, file);
  160. if(readValues != length) {
  161. ftSystemError("cannot read file", ft);
  162. free(data);
  163. return;
  164. }
  165. data[length] = '\0';
  166. ftTokenize(ft, data);
  167. ftAddEndPath(ft);
  168. free(data);
  169. }
  170. static void ftReadFullFile(FileTokens* ft) {
  171. FILE* file = fopen(ftGetPath(ft), "r");
  172. if(file == NULL) {
  173. ftSystemError("cannot open file", ft);
  174. return;
  175. }
  176. ftReadFull(file, ft);
  177. if(fclose(file) < 0) {
  178. ftSystemError("cannot close file", ft);
  179. }
  180. }
  181. static void ftReplace(int from, int to, FileTokens* ft,
  182. FileTokens* replacement) {
  183. int newCapacity = ft->capacity + replacement->capacity;
  184. FileToken* newTokens = malloc(newCapacity * sizeof(FileToken));
  185. // these will be overwritten
  186. for(int i = from; i < to; i++) {
  187. free(ft->tokens[i].literal);
  188. ft->tokens[i].literal = NULL;
  189. }
  190. memcpy(newTokens, ft->tokens, from * sizeof(FileToken));
  191. memcpy(newTokens + from, replacement->tokens,
  192. replacement->length * sizeof(FileToken));
  193. memcpy(newTokens + from + replacement->length, ft->tokens + to,
  194. (ft->length - to) * sizeof(FileToken));
  195. free(ft->tokens);
  196. ft->tokens = newTokens;
  197. ft->capacity = newCapacity;
  198. ft->length = from + replacement->length + ft->length - to;
  199. // ownership moved
  200. for(int i = 0; i < replacement->length; i++) {
  201. replacement->tokens[i].literal = NULL;
  202. }
  203. }
  204. static void ftSkipSpaces(int* index, FileTokens* ft) {
  205. while(*index < ft->length) {
  206. FileToken* t = ft->tokens + *index;
  207. if(t->type != FT_SPACE) {
  208. return;
  209. }
  210. (*index)++;
  211. }
  212. }
  213. const char* INCLUDE_ERROR = "include path must be a string";
  214. static bool ftIncludeDoubleQuote(int* index, FileTokens* ft) {
  215. if(*index >= ft->length || ft->tokens[*index].type != FT_SINGLE ||
  216. ft->tokens[*index].single != '"') {
  217. eInitError(error, line, INCLUDE_ERROR);
  218. return true;
  219. }
  220. (*index)++;
  221. return false;
  222. }
  223. static void ftHandleInclude(int* index, int start, FileTokens* ft) {
  224. ftSkipSpaces(index, ft);
  225. if(ftIncludeDoubleQuote(index, ft)) {
  226. return;
  227. }
  228. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  229. eInitError(error, line, INCLUDE_ERROR);
  230. return;
  231. }
  232. const char* path = ft->tokens[(*index)++].literal;
  233. if(ftIncludeDoubleQuote(index, ft)) {
  234. return;
  235. }
  236. const char* parentPath = ftGetPath(ft);
  237. int afterLastSlash = 0;
  238. for(int i = 0; parentPath[i] != '\0'; i++) {
  239. afterLastSlash = parentPath[i] == '/' ? i + 1 : afterLastSlash;
  240. }
  241. int pathLength = strlen(path) + 1;
  242. char* fullPath = malloc(afterLastSlash + pathLength);
  243. memcpy(fullPath, parentPath, afterLastSlash);
  244. memcpy(fullPath + afterLastSlash, path, pathLength);
  245. FileTokens include;
  246. ftInitFileTokens(&include, fullPath);
  247. free(fullPath);
  248. ftReadFullFile(&include);
  249. if(!ftHasError()) {
  250. ftReplace(start, *index, ft, &include);
  251. *index = start;
  252. }
  253. ftDelete(&include);
  254. }
  255. static bool ftIsDefined(const char* s) {
  256. for(int i = 0; i < defineIndex; i++) {
  257. if(ftStringCompare(defines[i].name, s)) {
  258. return true;
  259. }
  260. }
  261. return false;
  262. }
  263. static int ftFindEndIf(int from, FileTokens* ft) {
  264. int layer = 0;
  265. for(int i = from; i < ft->length - 1; i++) {
  266. if(ft->tokens[i].type != FT_SINGLE || ft->tokens[i].single != '#') {
  267. continue;
  268. } else if(ft->tokens[i + 1].type != FT_LITERAL) {
  269. continue;
  270. }
  271. if(ftStringCompare(ft->tokens[i + 1].literal, "ifndef")) {
  272. layer++;
  273. } else if(ftStringCompare(ft->tokens[i + 1].literal, "endif")) {
  274. if(layer == 0) {
  275. return i;
  276. }
  277. layer--;
  278. }
  279. }
  280. return -1;
  281. }
  282. static void ftHandleIfNotDefined(int* index, int start, FileTokens* ft) {
  283. (void)start;
  284. ftSkipSpaces(index, ft);
  285. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  286. eInitError(error, line, "ifndef expects a literal");
  287. return;
  288. }
  289. const char* check = ft->tokens[(*index)++].literal;
  290. int endIf = ftFindEndIf(*index, ft);
  291. if(endIf == -1) {
  292. eInitError(error, line, "cannot find #endif");
  293. return;
  294. }
  295. if(ftIsDefined(check)) {
  296. for(int i = start; i < endIf + 2; i++) {
  297. ftToSpace(ft->tokens + i);
  298. }
  299. } else {
  300. ftToSpace(ft->tokens + start); // #
  301. ftToSpace(ft->tokens + start + 1); // ifndef
  302. ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
  303. ftToSpace(ft->tokens + endIf); // #
  304. ftToSpace(ft->tokens + endIf + 1); // endif
  305. }
  306. }
  307. static void frHandleDefine(int* index, int start, FileTokens* ft) {
  308. if(defineIndex >= DEFINES) {
  309. eInitError(error, line, "too much defines");
  310. return;
  311. }
  312. ftSkipSpaces(index, ft);
  313. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  314. eInitError(error, line, "define expects a literal");
  315. return;
  316. }
  317. const char* define = ft->tokens[(*index)++].literal;
  318. if(ftIsDefined(define)) {
  319. return;
  320. }
  321. strncpy(defines[defineIndex].name, define, STRING_LENGTH);
  322. defineIndex++;
  323. ftToSpace(ft->tokens + start); // #
  324. ftToSpace(ft->tokens + start + 1); // define
  325. ftToSpace(ft->tokens + (*index - 1)); // DEFINITION
  326. }
  327. static void ftParseInstruction(int* index, FileTokens* ft) {
  328. int start = *index;
  329. (*index)++;
  330. if(*index >= ft->length || ft->tokens[*index].type != FT_LITERAL) {
  331. eInitError(error, line, "expected literal after #");
  332. return;
  333. }
  334. const char* name = ft->tokens[(*index)++].literal;
  335. if(ftStringCompare(name, "include")) {
  336. ftHandleInclude(index, start, ft);
  337. } else if(ftStringCompare(name, "ifndef")) {
  338. ftHandleIfNotDefined(index, start, ft);
  339. } else if(ftStringCompare(name, "define")) {
  340. frHandleDefine(index, start, ft);
  341. } else if(ftStringCompare(name, "endif")) {
  342. eInitError(error, line, "endif without if");
  343. } else {
  344. eInitError(error, line, "unknown preprocessor literal '%s'", name);
  345. }
  346. }
  347. static void ftSearchInstruction(FileTokens* ft) {
  348. for(int i = 0; i < ft->length && !ftHasError(); i++) {
  349. if(ft->tokens[i].type == FT_SINGLE && ft->tokens[i].single == '#') {
  350. ftParseInstruction(&i, ft);
  351. }
  352. }
  353. }
  354. void ftInit(const char* path, FileTokens* ft, Error* e) {
  355. defineIndex = 0;
  356. error = e;
  357. eInitSuccess(error);
  358. ftInitFileTokens(ft, path);
  359. ftReadFullFile(ft);
  360. if(!ftHasError()) {
  361. ftSearchInstruction(ft);
  362. }
  363. (void)ftPrint;
  364. // ftPrint(ft);
  365. }
  366. void ftDelete(FileTokens* ft) {
  367. for(int i = 0; i < ft->length; i++) {
  368. free(ft->tokens[i].literal);
  369. }
  370. free(ft->tokens);
  371. ft->tokens = NULL;
  372. ft->capacity = 0;
  373. ft->length = 0;
  374. }