Explorar o código

unicode constants

Kajetan Johannes Hammerle %!s(int64=3) %!d(string=hai) anos
pai
achega
3a316ab3f1
Modificáronse 6 ficheiros con 37 adicións e 0 borrados
  1. 1 0
      Main.c
  2. 1 0
      Test.c
  3. 6 0
      tests/strings/chars
  4. 4 0
      tests/strings/chars.out
  5. 24 0
      tokenizer/Tokenizer.c
  6. 1 0
      tokenizer/Tokenizer.h

+ 1 - 0
Main.c

@@ -21,6 +21,7 @@ static void start(int argAmount, const char** args) {
     } else if(argAmount >= 2) {
         if(tTokenize(args[1])) {
             puts(tGetError());
+            printf("line: %d\n", tGetLine());
             return;
         }
         ByteCode* code = cCompile();

+ 1 - 0
Test.c

@@ -122,6 +122,7 @@ static void tsCheckFile() {
     if(tTokenize(path)) {
         puts(path);
         puts(tGetError());
+        printf("line: %d\n", tGetLine());
         return;
     }
     ByteCode* bc = cCompile();

+ 6 - 0
tests/strings/chars

@@ -0,0 +1,6 @@
+void main() {
+    test('!');
+    test('¡');
+    test('ࠠ');
+    test('𐀠');
+}

+ 4 - 0
tests/strings/chars.out

@@ -0,0 +1,4 @@
+33
+161
+2080
+65568

+ 24 - 0
tokenizer/Tokenizer.c

@@ -140,6 +140,25 @@ static void tAddString() {
     tAdd(&c, 1);
 }
 
+static void tAddUnicode() {
+    int c = fRead();
+    if((c & 0xE0) == 0xC0) {
+        c = ((c & 0x1F) << 6) | (fRead() & 0x3F);
+    } else if((c & 0xF0) == 0xE0) {
+        c = ((c & 0xF) << 12) | ((fRead() & 0x3F) << 6);
+        c |= fRead() & 0x3F;
+    } else if((c & 0xF8) == 0xF0) {
+        c = ((c & 0x7) << 18) | ((fRead() & 0x3F) << 12);
+        c |= (fRead() & 0x3F) << 6;
+        c |= fRead() & 0x3F;
+    }
+    tAddToken(T_CONST_INT);
+    tAdd(&c, sizeof(int));
+    if(fRead() != '\'') {
+        tError("expecting unicode end");
+    }
+}
+
 static void tAddToken2(Token te, Token t) {
     if(fReadIf('=')) {
         tAddToken(te);
@@ -238,6 +257,7 @@ static void tParseToken(int c) {
         case '{': tAddToken(T_OPEN_CURVED_BRACKET); return;
         case '}': tAddToken(T_CLOSE_CURVED_BRACKET); return;
         case '"': tAddString(); return;
+        case '\'': tAddUnicode(); return;
         case '.': tAddToken(T_POINT); return;
         case '[': tAddToken(T_OPEN_SQUARE_BRACKET); return;
         case ']': tAddToken(T_CLOSE_SQUARE_BRACKET); return;
@@ -272,6 +292,10 @@ const char* tGetError() {
     return error;
 }
 
+int tGetLine() {
+    return line;
+}
+
 void tResetReader() {
     readIndex = 0;
 }

+ 1 - 0
tokenizer/Tokenizer.h

@@ -10,6 +10,7 @@ typedef int16_t int16;
 
 bool tTokenize(const char* path);
 const char* tGetError();
+int tGetLine();
 
 void tResetReader();
 Token tPeekToken();