Browse Source

Use standard string functions

Kajetan Johannes Hammerle 1 week ago
parent
commit
267af23382
5 changed files with 39 additions and 90 deletions
  1. 1 0
      include/core/utils/Utility.hpp
  2. 29 87
      src/ArrayString.cpp
  3. 5 0
      src/Utility.cpp
  4. 2 1
      test/Main.cpp
  5. 2 2
      test/modules/ArrayStringTests.cpp

+ 1 - 0
include/core/utils/Utility.hpp

@@ -38,6 +38,7 @@ namespace Core {
     check_return Error toString(long double ld, char* buffer, int size);
 
     check_return Error putChar(int c);
+    check_return Error putChars(const char* s);
 
     void memorySet(void* p, int c, i64 n);
     void memoryCopy(void* dest, const void* src, i64 n);

+ 29 - 87
src/ArrayString.cpp

@@ -1,90 +1,45 @@
 #include "core/utils/ArrayString.hpp"
 
+#include <limits.h>
+#include <stdlib.h>
 #include <string.h>
+#include <uchar.h>
 
+#include "core/data/Array.hpp"
 #include "core/math/Math.hpp"
 #include "core/utils/Error.hpp"
+#include "core/utils/Utility.hpp"
 
 using CharString = Core::CharString;
 using Char32String = Core::Char32String;
 using Error = Core::Error;
 namespace ErrorCode = Core::ErrorCode;
 
-template<typename T>
-constexpr int stringLength(const T* c) {
-    const T* i = c + 1;
+constexpr int stringLength(const c32* c) {
+    const c32* i = c + 1;
     while(*(c++) != '\0') {}
     return static_cast<int>(c - i);
 }
 
-static c32 read(const char*& s) {
-    if(*s == '\0') {
-        return 0;
-    }
-    return static_cast<c32>(*(s++));
-}
-
 static Error readUnicode(c32& u, const char*& s) {
-    u = read(s);
-    if((u & 0x80) == 0) {
-        return ErrorCode::NONE;
-    }
-    if((u & 0xE0) == 0xC0) {
-        c32 u2 = read(s);
-        if(u2 == 0) {
-            return ErrorCode::INVALID_CHAR;
-        }
-        u = ((u & 0x1F) << 6) | (u2 & 0x3F);
-        return ErrorCode::NONE;
-    } else if((u & 0xF0) == 0xE0) {
-        c32 u2 = read(s);
-        c32 u3 = read(s);
-        if(u2 == 0 || u3 == 0) {
-            return ErrorCode::INVALID_CHAR;
-        }
-        u = ((u & 0xF) << 12) | ((u2 & 0x3F) << 6) | (u3 & 0x3F);
-        return ErrorCode::NONE;
-    } else if((u & 0xF8) == 0xF0) {
-        c32 u2 = read(s);
-        c32 u3 = read(s);
-        c32 u4 = read(s);
-        if(u2 == 0 || u3 == 0 || u4 == 0) {
-            return ErrorCode::INVALID_CHAR;
-        }
-        u = ((u & 0x07) << 18) | ((u2 & 0x3F) << 12) | ((u3 & 0x3F) << 6) |
-            (u4 & 0x3F);
-        return ErrorCode::NONE;
-    }
-    return ErrorCode::INVALID_CHAR;
-}
-
-template<unsigned int L>
-static void unicodeToChar(c32 c, char (&buffer)[L]) {
-    static_assert(L >= 5, "to small char buffer");
-    buffer[0] = '\0';
-    if(c < (1 << 7)) {
-        buffer[0] = static_cast<char>(((c >> 0) & 0x7F) | 0x0);
-        buffer[1] = '\0';
-    } else if(c < (1 << 11)) {
-        buffer[0] = static_cast<char>(((c >> 6) & 0x1F) | 0xC0);
-        buffer[1] = static_cast<char>(((c >> 0) & 0x3F) | 0x80);
-        buffer[2] = '\0';
-    } else if(c < (1 << 16)) {
-        buffer[0] = static_cast<char>(((c >> 12) & 0x0F) | 0xE0);
-        buffer[1] = static_cast<char>(((c >> 6) & 0x3F) | 0x80);
-        buffer[2] = static_cast<char>(((c >> 0) & 0x3F) | 0x80);
-        buffer[3] = '\0';
-    } else if(c < (1 << 21)) {
-        buffer[0] = static_cast<char>(((c >> 18) & 0x07) | 0xF0);
-        buffer[1] = static_cast<char>(((c >> 12) & 0x3F) | 0x80);
-        buffer[2] = static_cast<char>(((c >> 6) & 0x3F) | 0x80);
-        buffer[3] = static_cast<char>(((c >> 0) & 0x3F) | 0x80);
-        buffer[4] = '\0';
+    size_t limit = MB_CUR_MAX;
+    size_t n = mbrtoc32(&u, s, limit, nullptr);
+    if(n > limit) {
+        return ErrorCode::INVALID_CHAR;
     }
+    s += n;
+    return ErrorCode::NONE;
 }
 
-static Error printChar(c32 u, u32 shift, u32 a, u32 o) {
-    return Core::putChar(static_cast<int>(((u >> shift) & a) | o));
+using C32Buffer = Core::Array<char, MB_LEN_MAX + 1>;
+
+static Error convertC32(C32Buffer& buffer, c32 c) {
+    size_t n = c32rtomb(buffer.begin(), c, nullptr);
+    if(n >= static_cast<size_t>(buffer.getLength())) {
+        return ErrorCode::INVALID_CHAR;
+    }
+    buffer[static_cast<i64>(n)] = '\0';
+    return ErrorCode::NONE;
 }
 
 CharString::CharString(char* buffer, i32 bufferSize)
@@ -147,14 +102,14 @@ Error CharString::append(wchar_t c) {
 }
 
 Error CharString::append(c32 c) {
-    char buffer[5];
-    unicodeToChar(c, buffer);
-    return append(static_cast<const char*>(buffer));
+    C32Buffer buffer;
+    CORE_RETURN_ERROR(convertC32(buffer, c));
+    return append(static_cast<const char*>(buffer.begin()));
 }
 
 Error CharString::append(const char* s) {
     // stringLength as s could be some part of data
-    for(int i = stringLength(s); i > 0; i--) {
+    for(size_t i = strlen(s); i > 0; i--) {
         CORE_RETURN_ERROR(append(*(s++)));
     }
     return ErrorCode::NONE;
@@ -418,22 +373,9 @@ void Char32String::clear() {
 
 Error Char32String::print() const {
     for(int i = 0; i < length; i++) {
-        c32 c = data[i];
-        if(c < (1 << 7)) {
-            CORE_RETURN_ERROR(printChar(c, 0, 0x7F, 0x0));
-        } else if(c < (1 << 11)) {
-            CORE_RETURN_ERROR(printChar(c, 6, 0x1F, 0xC0));
-            CORE_RETURN_ERROR(printChar(c, 0, 0x3F, 0x80));
-        } else if(c < (1 << 16)) {
-            CORE_RETURN_ERROR(printChar(c, 12, 0x0F, 0xE0));
-            CORE_RETURN_ERROR(printChar(c, 6, 0x3F, 0x80));
-            CORE_RETURN_ERROR(printChar(c, 0, 0x3F, 0x80));
-        } else if(c < (1 << 21)) {
-            CORE_RETURN_ERROR(printChar(c, 18, 0x07, 0xF0));
-            CORE_RETURN_ERROR(printChar(c, 12, 0x3F, 0x80));
-            CORE_RETURN_ERROR(printChar(c, 6, 0x3F, 0x80));
-            CORE_RETURN_ERROR(printChar(c, 0, 0x3F, 0x80));
-        }
+        C32Buffer buffer;
+        CORE_RETURN_ERROR(convertC32(buffer, data[i]));
+        CORE_RETURN_ERROR(putChars(buffer.begin()));
     }
     return ErrorCode::NONE;
 }

+ 5 - 0
src/Utility.cpp

@@ -54,6 +54,11 @@ Core::Error Core::putChar(int c) {
     return putchar(c) == EOF ? ErrorCode::BLOCKED_STDOUT : ErrorCode::NONE;
 }
 
+Core::Error Core::putChars(const char* s) {
+    return fputs(s, stdout) == EOF ? ErrorCode::BLOCKED_STDOUT
+                                   : ErrorCode::NONE;
+}
+
 void Core::memorySet(void* p, int c, i64 n) {
     if(n <= 0) {
         return;

+ 2 - 1
test/Main.cpp

@@ -1,3 +1,4 @@
+#include <locale.h>
 #include <string.h>
 
 #include "../src/ErrorSimulator.hpp"
@@ -5,7 +6,6 @@
 #include "Tests.hpp"
 #include "core/utils/ArrayString.hpp"
 #include "core/utils/Utility.hpp"
-
 static void onExit(int code, void* data) {
     unsigned int i = *static_cast<unsigned int*>(data);
     Core::String32<1024> s;
@@ -19,6 +19,7 @@ static void onExit(int code, void* data) {
 }
 
 int main(int argAmount, const char** args) {
+    setlocale(LC_ALL, "en_US.utf8");
     bool light = false;
     bool outOfMemoryTest = true;
     for(int i = 0; i < argAmount; i++) {

+ 2 - 2
test/modules/ArrayStringTests.cpp

@@ -904,9 +904,9 @@ static void testPrint32() {
 static void testVariousUnicode32() {
     const unsigned char buffer[] = {0xC0, 0};
     const unsigned char buffer2[] = {0xE0, 0};
-    const unsigned char buffer3[] = {0xE0, 1, 2, 0};
+    const unsigned char buffer3[] = {0xC3, 0xA4, 0};
     const unsigned char buffer4[] = {0xF0, 0};
-    const unsigned char buffer5[] = {0xF0, 1, 2, 3, 0};
+    const unsigned char buffer5[] = {0xF0, 0x9F, 0x8F, 0xA3, 0};
     const unsigned char buffer6[] = {0xFF, 0};
     String32 s;
     CORE_TEST_EQUAL(Core::ErrorCode::INVALID_CHAR, s.append(buffer));