#include "core/Unicode.h" UTF8 convertUnicodeToUTF8(u32 c) { UTF8 u = {0}; if(c >= 0x10000) { u.length = 4; u.data[0] = 0b1111'0000 | ((c >> 18) & 0b0000'0111); u.data[1] = 0b1000'0000 | ((c >> 12) & 0b0011'1111); u.data[2] = 0b1000'0000 | ((c >> 6) & 0b0011'1111); u.data[3] = 0b1000'0000 | (c & 0b0011'1111); } else if(c >= 0x800) { u.length = 3; u.data[0] = 0b1110'0000 | ((c >> 12) & 0b0000'1111); u.data[1] = 0b1000'0000 | ((c >> 6) & 0b0011'1111); u.data[2] = 0b1000'0000 | (c & 0b0011'1111); } else if(c >= 0x80) { u.length = 2; u.data[0] = 0b1100'0000 | ((c >> 6) & 0b0001'1111); u.data[1] = 0b1000'0000 | (c & 0b0011'1111); } else { u.length = 1; u.data[0] = c & 0b0111'1111; } return u; } u32 convertUTF8toUnicode(UTF8 c) { if(c.length == 4) { return ((c.data[0] & 0b0000'0111u) << 18) | ((c.data[1] & 0b0011'1111u) << 12) | ((c.data[2] & 0b0011'1111u) << 6) | (c.data[3] & 0b0011'1111u); } else if(c.length == 3) { return ((c.data[0] & 0b0000'1111u) << 12) | ((c.data[1] & 0b0011'1111u) << 6) | (c.data[2] & 0b0011'1111u); } else if(c.length == 2) { return ((c.data[0] & 0b0001'1111u) << 6) | (c.data[1] & 0b0011'1111u); } return c.data[0]; } bool isUTF8Remainder(u8 c) { return (c & 0b1100'0000) == 0b1000'0000; } u32 getUTF8Length(u8 c) { if((c & 0b1111'1000) == 0b1111'0000) { return 4; } else if((c & 0b1111'0000) == 0b1110'0000) { return 3; } else if((c & 0b1110'0000) == 0b1100'0000) { return 2; } return 1; }