| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- #include "core/Unicode.h"
- UTF8 convertUnicodeToUTF8(u32 c) {
- UTF8 u = {0};
- if(c >= 0x10000) {
- u.length = 4;
- u.data[0] = 0b1111'0000 | ((c >> 18) & 0b0000'0111);
- u.data[1] = 0b1000'0000 | ((c >> 12) & 0b0011'1111);
- u.data[2] = 0b1000'0000 | ((c >> 6) & 0b0011'1111);
- u.data[3] = 0b1000'0000 | (c & 0b0011'1111);
- } else if(c >= 0x800) {
- u.length = 3;
- u.data[0] = 0b1110'0000 | ((c >> 12) & 0b0000'1111);
- u.data[1] = 0b1000'0000 | ((c >> 6) & 0b0011'1111);
- u.data[2] = 0b1000'0000 | (c & 0b0011'1111);
- } else if(c >= 0x80) {
- u.length = 2;
- u.data[0] = 0b1100'0000 | ((c >> 6) & 0b0001'1111);
- u.data[1] = 0b1000'0000 | (c & 0b0011'1111);
- } else {
- u.length = 1;
- u.data[0] = c & 0b0111'1111;
- }
- return u;
- }
- u32 convertUTF8toUnicode(UTF8 c) {
- if(c.length == 4) {
- return ((c.data[0] & 0b0000'0111u) << 18) |
- ((c.data[1] & 0b0011'1111u) << 12) |
- ((c.data[2] & 0b0011'1111u) << 6) | (c.data[3] & 0b0011'1111u);
- } else if(c.length == 3) {
- return ((c.data[0] & 0b0000'1111u) << 12) |
- ((c.data[1] & 0b0011'1111u) << 6) | (c.data[2] & 0b0011'1111u);
- } else if(c.length == 2) {
- return ((c.data[0] & 0b0001'1111u) << 6) | (c.data[1] & 0b0011'1111u);
- }
- return c.data[0];
- }
- bool isUTF8Remainder(u8 c) {
- return (c & 0b1100'0000) == 0b1000'0000;
- }
- u32 getUTF8Length(u8 c) {
- if((c & 0b1111'1000) == 0b1111'0000) {
- return 4;
- } else if((c & 0b1111'0000) == 0b1110'0000) {
- return 3;
- } else if((c & 0b1110'0000) == 0b1100'0000) {
- return 2;
- }
- return 1;
- }
|