Explorar o código

bytecode unicode text

Kajetan Johannes Hammerle %!s(int64=3) %!d(string=hai) anos
pai
achega
6b62743bd2
Modificáronse 12 ficheiros con 187 adicións e 38 borrados
  1. 14 0
      Compiler.c
  2. 5 0
      DataType.c
  3. 1 0
      DataType.h
  4. 17 0
      Test.c
  5. 14 0
      tests/strings/string
  6. 9 0
      tests/strings/string.out
  7. 3 3
      tests/struct_ref/int_reference
  8. 20 14
      tokenizer/Tokenizer.c
  9. 19 0
      utils/ByteCodePrinter.c
  10. 1 0
      vm/Operation.h
  11. 79 21
      vm/Script.c
  12. 5 0
      vm/Script.h

+ 14 - 0
Compiler.c

@@ -333,6 +333,19 @@ static DataType cLiteral() {
     return dtToVariable(v.type);
 }
 
+static DataType cText() {
+    cAddOperation(OP_PUSH_TEXT);
+    int32 lengthAddress = cReserveInt32();
+    int32 length = 0;
+    int32 c;
+    while(tReadInt32(&c) && c != 0) {
+        cAddInt32(c);
+        length++;
+    }
+    cSetInt32(lengthAddress, length);
+    return dtText();
+}
+
 static DataType cBracketPrimary() {
     DataType result = cExpression();
     cConsumeToken(T_CLOSE_BRACKET);
@@ -372,6 +385,7 @@ static DataType cPrimary() {
         case T_TRUE: cAddOperation(OP_PUSH_TRUE); return dtBool();
         case T_FALSE: cAddOperation(OP_PUSH_FALSE); return dtBool();
         case T_NULLPTR: cAddOperation(OP_PUSH_NULLPTR); return dtNull();
+        case T_TEXT: return cText();
         case T_OPEN_BRACKET: return cBracketPrimary();
         case T_LITERAL: return cLiteral();
         case T_NEW: return cAllocArray();

+ 5 - 0
DataType.c

@@ -86,6 +86,11 @@ DataType dtNull() {
     return dt;
 }
 
+DataType dtText() {
+    DataType dt = {DT_INT32, 1, 0};
+    return dt;
+}
+
 DataType dtVoid() {
     DataType dt = {DT_VOID, 0, 0};
     return dt;

+ 1 - 0
DataType.h

@@ -49,6 +49,7 @@ DataType dtInt64();
 DataType dtFloat();
 DataType dtBool();
 DataType dtNull();
+DataType dtText();
 DataType dtVoid();
 DataType dtStruct(Struct* st);
 

+ 17 - 0
Test.c

@@ -61,6 +61,22 @@ static void tsBoolPrinter(Script* sc) {
     }
 }
 
+static void tsTextPrinter(Script* sc) {
+    Pointer p;
+    if(sPopPointer(sc, &p)) {
+        int32 length;
+        if(sGetPointerLength(sc, &p, &length)) {
+            return;
+        }
+        for(int i = 0; i < length; i++) {
+            int32* ip = sCheckAddress(sc, &p, sizeof(int32));
+            tsPrintToBuffer("%c", (char)*ip);
+            p.offset += sizeof(int32);
+        }
+        tsPrintToBuffer("\n");
+    }
+}
+
 static void tsAppend(const char* s) {
     for(int i = 0; pathIndex < (PATH_MAX - 1) && s[i] != '\0'; i++) {
         path[pathIndex++] = s[i];
@@ -178,6 +194,7 @@ void tsStart(const char* path) {
     tsAddPrinter(&sts, dtInt64(), tsInt64Printer);
     tsAddPrinter(&sts, dtFloat(), tsFloatPrinter);
     tsAddPrinter(&sts, dtBool(), tsBoolPrinter);
+    tsAddPrinter(&sts, dtText(), tsTextPrinter);
 
     doneTests = 0;
     allTests = 0;

+ 14 - 0
tests/strings/string

@@ -0,0 +1,14 @@
+void main() {
+    int* a = "öallo";
+    a[0] = 'H';
+    int** b = &a; 
+    test(a == *b);
+    test(*a);
+    test(a[0]);
+    test(a[1]);
+    test(a[2]);
+    test(a[3]);
+    test(a[4]);
+    test(a);
+    test(length(a));
+}

+ 9 - 0
tests/strings/string.out

@@ -0,0 +1,9 @@
+true
+72
+72
+97
+108
+108
+111
+Hallo
+5

+ 3 - 3
tests/struct_ref/int_reference

@@ -1,4 +1,4 @@
-void test(int* t) {
+void test2(int* t) {
     *t = 8;
     *t += 1;
 }
@@ -9,8 +9,8 @@ void main() {
     a = 6;
     test(a);
     test(*(b));
-    test(&a);
-    test(b);
+    test2(&a);
+    test2(b);
     test(a);
     test(*b);
 }

+ 20 - 14
tokenizer/Tokenizer.c

@@ -134,10 +134,24 @@ static void tParseNumber(int c) {
     }
 }
 
+static int32 tUnicode(int32 c) {
+    if((c & 0xE0) == 0xC0) {
+        c = ((c & 0x1F) << 6) | (fRead() & 0x3F);
+    } else if((c & 0xF0) == 0xE0) {
+        c = ((c & 0xF) << 12) | ((fRead() & 0x3F) << 6);
+        c |= fRead() & 0x3F;
+    } else if((c & 0xF8) == 0xF0) {
+        c = ((c & 0x7) << 18) | ((fRead() & 0x3F) << 12);
+        c |= (fRead() & 0x3F) << 6;
+        c |= fRead() & 0x3F;
+    }
+    return c;
+}
+
 static void tAddString() {
     tAddToken(T_TEXT);
     while(true) {
-        int c = fRead();
+        int32 c = fRead();
         if(c == '"') {
             break;
         } else if(c == '\\') {
@@ -149,24 +163,16 @@ static void tAddString() {
         } else if(c == EOF) {
             tError("unclosed string starting at line %d", line);
         }
-        tAdd(&c, 1);
+        c = tUnicode(c);
+        tAdd(&c, sizeof(int32));
     }
-    char c = '\0';
-    tAdd(&c, 1);
+    int32 c = 0;
+    tAdd(&c, sizeof(int32));
 }
 
 static void tAddUnicode() {
     int32 c = fRead();
-    if((c & 0xE0) == 0xC0) {
-        c = ((c & 0x1F) << 6) | (fRead() & 0x3F);
-    } else if((c & 0xF0) == 0xE0) {
-        c = ((c & 0xF) << 12) | ((fRead() & 0x3F) << 6);
-        c |= fRead() & 0x3F;
-    } else if((c & 0xF8) == 0xF0) {
-        c = ((c & 0x7) << 18) | ((fRead() & 0x3F) << 12);
-        c |= (fRead() & 0x3F) << 6;
-        c |= fRead() & 0x3F;
-    }
+    c = tUnicode(c);
     tAddToken(T_CONST_INT32);
     tAdd(&c, sizeof(int32));
     if(fRead() != '\'') {

+ 19 - 0
utils/ByteCodePrinter.c

@@ -70,6 +70,16 @@ static void btAddFloat() {
     btAdd(" %10.2f |", value);
 }
 
+static void btAddText() {
+    int32 length = 0;
+    btRead(&length, sizeof(int32));
+    for(int i = 0; i < length; i++) {
+        int32 c;
+        btRead(&c, sizeof(int32));
+    }
+    btAdd(" %10s |", "Text");
+}
+
 static void btAddFiller() {
     btAdd("            |");
 }
@@ -128,6 +138,14 @@ static void btPrintFloat(const char* op) {
     puts(buffer);
 }
 
+static void btPrintText(const char* op) {
+    btFillBase();
+    btAddOperation(op);
+    btAddText();
+    btAddFiller();
+    puts(buffer);
+}
+
 #define PRINT_OP_BASE(op, name)                                                \
     case op:                                                                   \
         btPrint##name(#op);                                                    \
@@ -161,6 +179,7 @@ static void btConsumeOperation() {
         PRINT_OP(OP_PUSH_TRUE);
         PRINT_OP(OP_PUSH_FALSE);
         PRINT_OP(OP_PUSH_NULLPTR);
+        PRINT_OP_BASE(OP_PUSH_TEXT, Text);
         PRINT_NUMBER_OP(ADD);
         PRINT_NUMBER_OP(SUB);
         PRINT_NUMBER_OP(MUL);

+ 1 - 0
vm/Operation.h

@@ -14,6 +14,7 @@ typedef enum Operation {
     OP_PUSH_TRUE,
     OP_PUSH_FALSE,
     OP_PUSH_NULLPTR,
+    OP_PUSH_TEXT,
     OP_NUMBER(ADD),
     OP_NUMBER(SUB),
     OP_NUMBER(MUL),

+ 79 - 21
vm/Script.c

@@ -130,17 +130,17 @@ READ_POP_PUSH(int64, Int64)
 READ_POP_PUSH(float, Float)
 POP_PUSH(bool, Bool)
 
-static bool sPopPointer(Script* sc, Pointer* value) {
+bool sPopPointer(Script* sc, Pointer* value) {
     return sPop(sc, value, sizeof(Pointer));
 }
 
-static bool sPushPointer(Script* sc, Pointer* value) {
+bool sPushPointer(Script* sc, Pointer* value) {
     return sPush(sc, value, sizeof(Pointer));
 }
 
-static void sPushNullPointer(Script* sc) {
+bool sPushNullPointer(Script* sc) {
     Pointer p = {-1, -1};
-    sPushPointer(sc, &p);
+    return sPushPointer(sc, &p);
 }
 
 #define INVERT_SIGN(type, Type)                                                \
@@ -162,7 +162,39 @@ static void sReserveBytes(Script* sc) {
     }
 }
 
-static void* sCheckAddress(Script* sc, Pointer* p, int length) {
+static int32 sGetTextLength(Script* sc, Pointer* p) {
+    const int SIZE = sizeof(int32);
+    int sizeAddress = -p->array;
+    if(sizeAddress < 0 || sizeAddress + SIZE >= sc->code->length) {
+        sError(sc, "invalid code array length address %d", sizeAddress);
+        return -1;
+    }
+    int32 size;
+    memcpy(&size, sc->code->code + sizeAddress, SIZE);
+    return size;
+}
+
+static void* sCheckTextAddress(Script* sc, Pointer* p, int length) {
+    const int SIZE = sizeof(int32);
+    int address = p->offset + SIZE;
+    if(address + length > sc->code->length) {
+        sError(sc, "address %d is out of code bounds", address);
+        return NULL;
+    }
+    int32 size = sGetTextLength(sc, p);
+    if(size == -1) {
+        return NULL;
+    }
+    int startAddress = -p->array + SIZE;
+    int endAddress = startAddress + size * SIZE;
+    if(address + length > endAddress || address < startAddress) {
+        sError(sc, "invalid text address %d", (address - startAddress) / SIZE);
+        return NULL;
+    }
+    return sc->code->code + address;
+}
+
+void* sCheckAddress(Script* sc, Pointer* p, int length) {
     if(p->array >= 0) {
         Array* a = asGet(&sc->arrays, p->array);
         if(a == NULL) {
@@ -173,12 +205,14 @@ static void* sCheckAddress(Script* sc, Pointer* p, int length) {
             return NULL;
         }
         return ((char*)a->data) + p->offset;
+    } else if(p->array == -1) {
+        if(p->offset < 0 || p->offset + length > sc->stackIndex) {
+            sError(sc, "address %d is out of stack bounds", p->offset);
+            return NULL;
+        }
+        return sc->stack + p->offset;
     }
-    if(p->offset < 0 || p->offset + length > sc->stackIndex) {
-        sError(sc, "address %d is out of stack bounds", p->offset);
-        return NULL;
-    }
-    return sc->stack + p->offset;
+    return sCheckTextAddress(sc, p, length);
 }
 
 static void sNot(Script* sc) {
@@ -313,19 +347,32 @@ static void sDeleteArray(Script* sc) {
     }
 }
 
+bool sGetPointerLength(Script* sc, Pointer* p, int32* length) {
+    if(p->array == -1) {
+        *length = p->offset >= 0;
+        return false;
+    } else if(p->array <= -1) {
+        int32 l = sGetTextLength(sc, p);
+        if(l != -1) {
+            *length = l;
+            return false;
+        }
+        return true;
+    }
+    Array* a = asGet(&sc->arrays, p->array);
+    if(a == NULL) {
+        sError(sc, "invalid heap pointer %d", p->array);
+        return true;
+    }
+    *length = a->length;
+    return false;
+}
+
 static void sLength(Script* sc) {
     Pointer p;
-    if(sPopPointer(sc, &p)) {
-        if(p.array == -1) {
-            sPushInt32(sc, p.offset >= 0);
-            return;
-        }
-        Array* a = asGet(&sc->arrays, p.array);
-        if(a == NULL) {
-            sError(sc, "invalid heap pointer");
-            return;
-        }
-        sPushInt32(sc, a->length);
+    int32 length;
+    if(sPopPointer(sc, &p) && !sGetPointerLength(sc, &p, &length)) {
+        sPushInt32(sc, length);
     }
 }
 
@@ -405,6 +452,16 @@ static void sCall(Script* sc) {
     }
 }
 
+static void sPushText(Script* sc) {
+    int address = sc->readIndex;
+    int32 length;
+    if(sReadInt32(sc, &length)) {
+        Pointer p = {.array = -address, .offset = address};
+        sPushPointer(sc, &p);
+        sc->readIndex += sizeof(int32) * length;
+    }
+}
+
 #define CHANGE_OP(type, op)                                                    \
     {                                                                          \
         char c = 0;                                                            \
@@ -497,6 +554,7 @@ static void sConsumeInstruction(Script* sc) {
         case OP_PUSH_TRUE: sPushBool(sc, true); break;
         case OP_PUSH_FALSE: sPushBool(sc, false); break;
         case OP_PUSH_NULLPTR: sPushNullPointer(sc); break;
+        case OP_PUSH_TEXT: sPushText(sc); break;
         case OP_DIV_INT32: DIVISION(int32, Int32); break;
         case OP_DIV_INT64: DIVISION(int64, Int64); break;
         case OP_DIV_FLOAT: DIVISION(float, Float); break;

+ 5 - 0
vm/Script.h

@@ -34,5 +34,10 @@ bool sPopFloat(Script* sc, float* f);
 bool sPushFloat(Script* sc, float f);
 bool sPopBool(Script* sc, bool* b);
 bool sPushBool(Script* sc, bool b);
+bool sPopPointer(Script* sc, Pointer* p);
+bool sPushPointer(Script* sc, Pointer* p);
+bool sPushNullPointer(Script* sc);
+void* sCheckAddress(Script* sc, Pointer* p, int length);
+bool sGetPointerLength(Script* sc, Pointer* p, int32* length);
 
 #endif