Unicode.c 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. #include "core/Unicode.h"
  2. UTF8 convertUnicodeToUTF8(u32 c) {
  3. UTF8 u = {0};
  4. if(c >= 0x10000) {
  5. u.length = 4;
  6. u.data[0] = 0b1111'0000 | ((c >> 18) & 0b0000'0111);
  7. u.data[1] = 0b1000'0000 | ((c >> 12) & 0b0011'1111);
  8. u.data[2] = 0b1000'0000 | ((c >> 6) & 0b0011'1111);
  9. u.data[3] = 0b1000'0000 | (c & 0b0011'1111);
  10. } else if(c >= 0x800) {
  11. u.length = 3;
  12. u.data[0] = 0b1110'0000 | ((c >> 12) & 0b0000'1111);
  13. u.data[1] = 0b1000'0000 | ((c >> 6) & 0b0011'1111);
  14. u.data[2] = 0b1000'0000 | (c & 0b0011'1111);
  15. } else if(c >= 0x80) {
  16. u.length = 2;
  17. u.data[0] = 0b1100'0000 | ((c >> 6) & 0b0001'1111);
  18. u.data[1] = 0b1000'0000 | (c & 0b0011'1111);
  19. } else {
  20. u.length = 1;
  21. u.data[0] = c & 0b0111'1111;
  22. }
  23. return u;
  24. }
  25. u32 convertUTF8toUnicode(UTF8 c) {
  26. if(c.length == 4) {
  27. return ((c.data[0] & 0b0000'0111u) << 18) |
  28. ((c.data[1] & 0b0011'1111u) << 12) |
  29. ((c.data[2] & 0b0011'1111u) << 6) | (c.data[3] & 0b0011'1111u);
  30. } else if(c.length == 3) {
  31. return ((c.data[0] & 0b0000'1111u) << 12) |
  32. ((c.data[1] & 0b0011'1111u) << 6) | (c.data[2] & 0b0011'1111u);
  33. } else if(c.length == 2) {
  34. return ((c.data[0] & 0b0001'1111u) << 6) | (c.data[1] & 0b0011'1111u);
  35. }
  36. return c.data[0];
  37. }
  38. bool isUTF8Remainder(u8 c) {
  39. return (c & 0b1100'0000) == 0b1000'0000;
  40. }
  41. u32 getUTF8Length(u8 c) {
  42. if((c & 0b1111'1000) == 0b1111'0000) {
  43. return 4;
  44. } else if((c & 0b1111'0000) == 0b1110'0000) {
  45. return 3;
  46. } else if((c & 0b1110'0000) == 0b1100'0000) {
  47. return 2;
  48. }
  49. return 1;
  50. }