Browse Source

Print i32 arrays as string, UTF8 character support

Kajetan Johannes Hammerle 6 days ago
parent
commit
bb009a78e7

+ 1 - 0
CMakeLists.txt

@@ -12,6 +12,7 @@ set(SRC
     "src/Buffer.c"
     "src/Memory.c"
     "src/SystemFunctions.c"
+    "src/Utils.c"
 )
 
 set(COMPILER_ARGUMENTS

+ 26 - 1
src/SystemFunctions.c

@@ -8,6 +8,7 @@
 #include <unistd.h>
 
 #include "Memory.h"
+#include "Utils.h"
 
 #define RETURN_INT(value)  \
     *r = INT_VALUE(value); \
@@ -17,12 +18,36 @@ static u32 seed = 0;
 static bool rawMode = false;
 static struct termios originalTerminal;
 
+static void printArray(Code* c, const Value* v) {
+    if(v->data == -1) {
+        printf("null array");
+        return;
+    }
+    Allocation* a = memoryConvertToPointer(v->data);
+    Value* end = a->values + a->values[0].data + 1;
+    for(Value* i = a->values + 1; i != end; i++) {
+        switch(i->type) {
+            case VT_INT32: {
+                UTF8 u = convertUnicodeToUTF8(i->data);
+                for(u32 l = 0; l < u.length; l++) {
+                    putchar(u.data[l]);
+                }
+                break;
+            }
+            case VT_ARRAY: printf("array"); break;
+            case VT_CONSTANT_STRING:
+                printf("%s", c->code.data + i->data);
+                break;
+        }
+    }
+}
+
 static bool sfPrint(Code* c, Value* r, Value* vs, i32 n) {
     for(i32 i = 0; i < n; i++) {
         Value* v = vs + i;
         switch(v->type) {
             case VT_INT32: printf("%d", v->data); break;
-            case VT_ARRAY: printf("array"); break;
+            case VT_ARRAY: printArray(c, v); break;
             case VT_CONSTANT_STRING:
                 printf("%s", c->code.data + v->data);
                 break;

+ 34 - 3
src/Tokenizer.c

@@ -9,6 +9,7 @@
 #include <string.h>
 
 #include "Constants.h"
+#include "Utils.h"
 
 typedef struct {
     int line;
@@ -121,8 +122,9 @@ static const char* tokenizerAddNumber(TState* t, const char* s) {
 }
 
 static char mapEscapeSequence(TState* t, const char* s) {
-    static char map[][2] = {
-        {'n', '\n'}, {'t', '\t'}, {'r', '\r'}, {'\\', '\\'}, {'#', '\0'}};
+    static char map[][2] = {{'n', '\n'},  {'t', '\t'}, {'r', '\r'},
+                            {'\\', '\\'}, {'"', '"'},  {'\'', '\''},
+                            {'#', '\0'}};
     for(size_t i = 0; map[i][0] != '#'; i++) {
         if(s[1] == map[i][0]) {
             return map[i][1];
@@ -156,6 +158,33 @@ static const char* tokenizerAddString(TState* t, const char* s) {
     return s;
 }
 
+static const char* tokenizerAddChar(TState* t, const char* s) {
+    UTF8 buffer = {};
+    while(true) {
+        char c = *(++s);
+        if(c == '\0') {
+            THROW_ERROR("Unclosed character");
+        } else if(c == '\'') {
+            s++;
+            break;
+        } else if(buffer.length >= sizeof(buffer.data)) {
+            THROW_ERROR("Too long character");
+        } else if(buffer.length > 0 && !isUTF8Remainder(c)) {
+            THROW_ERROR("Invalid character");
+        } else if(c == '\\') {
+            c = mapEscapeSequence(t, s);
+            s++;
+        }
+        buffer.data[buffer.length++] = c;
+    }
+    i32 i = convertUTF8toUnicode(buffer);
+    tAddToken(t, TT_INT32);
+    if(bufferWriteI32(&t->tokenizer->buffer, i)) {
+        tTooMuchTokens(t);
+    }
+    return s;
+}
+
 #define SIMPLE_TOKEN(ch, token) \
     else if(c == ch) {          \
         tAddToken(t, token);    \
@@ -181,10 +210,12 @@ static void tParseLineString(TState* t, const char* s) {
             s = tokenizerAddNumber(t, s);
         } else if(c == '"') {
             s = tokenizerAddString(t, s);
+        } else if(c == '\'') {
+            s = tokenizerAddChar(t, s);
         } else if(c == '\n' || c == '#') {
             tAddToken(t, TT_NEWLINE);
             break;
-        } else if(c == ' ') {
+        } else if(c == ' ' || c == '\r') {
             s++;
         }
         SIMPLE_TOKEN(',', TT_COMMA)

+ 43 - 0
src/Utils.c

@@ -0,0 +1,43 @@
+#include "Utils.h"
+
+UTF8 convertUnicodeToUTF8(i32 c) {
+    UTF8 u = {0};
+    if(c >= 0x10000) {
+        u.length = 4;
+        u.data[0] = (i8)0b1111'0000 | ((c >> 18) & 0b0000'0111);
+        u.data[1] = (i8)0b1000'0000 | ((c >> 12) & 0b0011'1111);
+        u.data[2] = (i8)0b1000'0000 | ((c >> 6) & 0b0011'1111);
+        u.data[3] = (i8)0b1000'0000 | (c & 0b0011'1111);
+    } else if(c >= 0x800) {
+        u.length = 3;
+        u.data[0] = (i8)0b1110'0000 | ((c >> 12) & 0b0000'1111);
+        u.data[1] = (i8)0b1000'0000 | ((c >> 6) & 0b0011'1111);
+        u.data[2] = (i8)0b1000'0000 | (c & 0b0011'1111);
+    } else if(c >= 0x80) {
+        u.length = 2;
+        u.data[0] = (i8)0b1100'0000 | ((c >> 6) & 0b0001'1111);
+        u.data[1] = (i8)0b1000'0000 | (c & 0b0011'1111);
+    } else {
+        u.length = 1;
+        u.data[0] = c & 0b0111'1111;
+    }
+    return u;
+}
+
+i32 convertUTF8toUnicode(UTF8 c) {
+    if(c.length == 4) {
+        return ((c.data[0] & 0b0000'0111) << 18) |
+               ((c.data[1] & 0b0011'1111) << 12) |
+               ((c.data[2] & 0b0011'1111) << 6) | (c.data[3] & 0b0011'1111);
+    } else if(c.length == 3) {
+        return ((c.data[0] & 0b0000'1111) << 12) |
+               ((c.data[1] & 0b0011'1111) << 6) | (c.data[2] & 0b0011'1111);
+    } else if(c.length == 2) {
+        return ((c.data[0] & 0b0001'1111) << 6) | (c.data[1] & 0b0011'1111);
+    }
+    return c.data[0];
+}
+
+bool isUTF8Remainder(i8 c) {
+    return (c & 0b1100'0000) == 0b1000'0000;
+}

+ 11 - 0
src/Utils.h

@@ -1,6 +1,8 @@
 #ifndef BASIC_UTILS_H
 #define BASIC_UTILS_H
 
+#include "Types.h"
+
 #define CLEAN_LIST(var, Type, field) \
     do {                             \
         Type* v = var->field;        \
@@ -12,4 +14,13 @@
         var->field = nullptr;        \
     } while(false)
 
+typedef struct {
+    i8 data[4];
+    u32 length;
+} UTF8;
+
+UTF8 convertUnicodeToUTF8(i32 c);
+i32 convertUTF8toUnicode(UTF8 c);
+bool isUTF8Remainder(i8 c);
+
 #endif

+ 0 - 1
test/Array.basic

@@ -25,7 +25,6 @@ printLine(getAllocations())
 d[0] = array(3)
 d[1] = array(4)
 d[2] = d
-printLine(d)
 printLine(getAllocations())
 
 d = 5

+ 0 - 1
test/Array.basic_result

@@ -9,6 +9,5 @@
 3
 3
 4
-array
 6
 3

+ 22 - 0
test/ArrayString.basic

@@ -0,0 +1,22 @@
+a = array(4)
+
+a[0] = 'ä'
+a[1] = 'b'
+a[2] = 'c'
+a[3] = '\''
+
+printLine(a)
+printLine(a[0])
+printLine(a[1])
+printLine(a[2])
+printLine(a[3])
+
+a[2] = a[2] + 1
+
+printLine(a)
+printLine(a[0])
+printLine(a[1])
+printLine(a[2])
+printLine(a[3])
+
+printLine(array(0))

+ 11 - 0
test/ArrayString.basic_result

@@ -0,0 +1,11 @@
+äbc'
+228
+98
+99
+39
+äbd'
+228
+98
+100
+39
+null array

+ 1 - 0
test/Escape.basic

@@ -1,3 +1,4 @@
 print("\\\n")
 print("\r\n")
 print("\tx\n")
+print("\"\n")

+ 1 - 0
test/Escape.basic_result

@@ -1,3 +1,4 @@
 \
 
 	x
+"