ArrayString.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. #include "core/utils/ArrayString.hpp"
  2. #include "core/utils/Error.hpp"
  3. using CharString = Core::CharString;
  4. using Char32String = Core::Char32String;
  5. using Error = Core::Error;
  6. namespace ErrorCode = Core::ErrorCode;
  7. template<typename T>
  8. constexpr int stringLength(const T* c) {
  9. const T* i = c + 1;
  10. while(*(c++) != '\0') {}
  11. return static_cast<int>(c - i);
  12. }
  13. static c32 read(const char*& s) {
  14. if(*s == '\0') {
  15. return 0;
  16. }
  17. return static_cast<c32>(*(s++));
  18. }
  19. static Error readUnicode(c32& u, const char*& s) {
  20. u = read(s);
  21. if((u & 0x80) == 0) {
  22. return ErrorCode::NONE;
  23. }
  24. if((u & 0xE0) == 0xC0) {
  25. c32 u2 = read(s);
  26. if(u2 == 0) {
  27. return ErrorCode::INVALID_CHAR;
  28. }
  29. u = ((u & 0x1F) << 6) | (u2 & 0x3F);
  30. return ErrorCode::NONE;
  31. } else if((u & 0xF0) == 0xE0) {
  32. c32 u2 = read(s);
  33. c32 u3 = read(s);
  34. if(u2 == 0 || u3 == 0) {
  35. return ErrorCode::INVALID_CHAR;
  36. }
  37. u = ((u & 0xF) << 12) | ((u2 & 0x3F) << 6) | (u3 & 0x3F);
  38. return ErrorCode::NONE;
  39. } else if((u & 0xF8) == 0xF0) {
  40. c32 u2 = read(s);
  41. c32 u3 = read(s);
  42. c32 u4 = read(s);
  43. if(u2 == 0 || u3 == 0 || u4 == 0) {
  44. return ErrorCode::INVALID_CHAR;
  45. }
  46. u = ((u & 0x07) << 18) | ((u2 & 0x3F) << 12) | ((u3 & 0x3F) << 6) |
  47. (u4 & 0x3F);
  48. return ErrorCode::NONE;
  49. }
  50. return ErrorCode::INVALID_CHAR;
  51. }
  52. template<unsigned int L>
  53. static void unicodeToChar(c32 c, char (&buffer)[L]) {
  54. static_assert(L >= 5, "to small char buffer");
  55. buffer[0] = '\0';
  56. if(c < (1 << 7)) {
  57. buffer[0] = static_cast<char>(((c >> 0) & 0x7F) | 0x0);
  58. buffer[1] = '\0';
  59. } else if(c < (1 << 11)) {
  60. buffer[0] = static_cast<char>(((c >> 6) & 0x1F) | 0xC0);
  61. buffer[1] = static_cast<char>(((c >> 0) & 0x3F) | 0x80);
  62. buffer[2] = '\0';
  63. } else if(c < (1 << 16)) {
  64. buffer[0] = static_cast<char>(((c >> 12) & 0x0F) | 0xE0);
  65. buffer[1] = static_cast<char>(((c >> 6) & 0x3F) | 0x80);
  66. buffer[2] = static_cast<char>(((c >> 0) & 0x3F) | 0x80);
  67. buffer[3] = '\0';
  68. } else if(c < (1 << 21)) {
  69. buffer[0] = static_cast<char>(((c >> 18) & 0x07) | 0xF0);
  70. buffer[1] = static_cast<char>(((c >> 12) & 0x3F) | 0x80);
  71. buffer[2] = static_cast<char>(((c >> 6) & 0x3F) | 0x80);
  72. buffer[3] = static_cast<char>(((c >> 0) & 0x3F) | 0x80);
  73. buffer[4] = '\0';
  74. }
  75. }
  76. static Error printChar(c32 u, u32 shift, u32 a, u32 o) {
  77. return Core::putChar(static_cast<int>(((u >> shift) & a) | o));
  78. }
  79. CharString::CharString(char* buffer, i32 bufferSize)
  80. : length(0), capacity(bufferSize), hash(0), data(buffer) {
  81. data[0] = '\0';
  82. }
  83. Error CharString::copyFrom(const CharString& s) {
  84. clear();
  85. return s.toString(*this);
  86. }
  87. bool CharString::operator==(const char* s) const {
  88. const char* p = data;
  89. while(*s == *p && *s != '\0') {
  90. s++;
  91. p++;
  92. }
  93. return *s == *p;
  94. }
  95. bool CharString::operator==(const CharString& other) const {
  96. if(length != other.getLength()) {
  97. return false;
  98. }
  99. for(int i = 0; i < length; i++) {
  100. if(data[i] != other[i]) {
  101. return false;
  102. }
  103. }
  104. return true;
  105. }
  106. bool CharString::operator!=(const char* s) const {
  107. return !((*this) == s);
  108. }
  109. bool CharString::operator!=(const CharString& other) const {
  110. return !((*this) == other);
  111. }
  112. char CharString::operator[](int index) const {
  113. return data[index];
  114. }
  115. int CharString::getLength() const {
  116. return length;
  117. }
  118. int CharString::getCapacity() const {
  119. return capacity - 1;
  120. }
  121. Error CharString::append(char c) {
  122. if(length >= capacity - 1) {
  123. return ErrorCode::CAPACITY_REACHED;
  124. }
  125. data[length++] = c;
  126. data[length] = '\0';
  127. addToHash(static_cast<c32>(c));
  128. return ErrorCode::NONE;
  129. }
  130. Error CharString::append(signed char c) {
  131. return append(static_cast<char>(c));
  132. }
  133. Error CharString::append(unsigned char c) {
  134. return append(static_cast<char>(c));
  135. }
  136. Error CharString::append(wchar_t c) {
  137. return append(static_cast<c32>(c));
  138. }
  139. Error CharString::append(c32 c) {
  140. char buffer[5];
  141. unicodeToChar(c, buffer);
  142. return append(static_cast<const char*>(buffer));
  143. }
  144. Error CharString::append(const char* s) {
  145. // stringLength as s could be some part of data
  146. for(int i = stringLength(s); i > 0; i--) {
  147. CORE_RETURN_ERROR(append(*(s++)));
  148. }
  149. return ErrorCode::NONE;
  150. }
  151. Error CharString::append(const c32* s) {
  152. // stringLength as s could be some part of data
  153. for(int i = stringLength(s); i > 0; i--) {
  154. CORE_RETURN_ERROR(append(*(s++)));
  155. }
  156. return ErrorCode::NONE;
  157. }
  158. Error CharString::append(const signed char* s) {
  159. return append(reinterpret_cast<const char*>(s));
  160. }
  161. Error CharString::append(const unsigned char* s) {
  162. return append(reinterpret_cast<const char*>(s));
  163. }
  164. Error CharString::append(bool b) {
  165. return b ? append("true") : append("false");
  166. }
  167. Error CharString::toString(CharString& s) const {
  168. int l = length; // length changes if &s == this
  169. for(int i = 0; i < l; i++) {
  170. CORE_RETURN_ERROR(s.append(data[i]));
  171. }
  172. return ErrorCode::NONE;
  173. }
  174. Error CharString::toString(Char32String& s) const {
  175. return s.append(static_cast<const char*>(data));
  176. }
  177. void CharString::clear() {
  178. length = 0;
  179. hash = 0;
  180. data[0] = '\0';
  181. }
  182. u32 CharString::hashCode() const {
  183. return hash;
  184. }
  185. Error CharString::print() const {
  186. for(int i = 0; i < length; i++) {
  187. CORE_RETURN_ERROR(Core::putChar(data[i]));
  188. }
  189. return ErrorCode::NONE;
  190. }
  191. Error CharString::printLine() const {
  192. CORE_RETURN_ERROR(print());
  193. CORE_RETURN_ERROR(Core::putChar('\n'));
  194. return ErrorCode::NONE;
  195. }
  196. bool CharString::startsWidth(const CharString& other, int from) const {
  197. if(from > length - other.getLength()) {
  198. return false;
  199. }
  200. for(int i = 0; i < other.getLength(); i++) {
  201. if(data[from + i] != other[i]) {
  202. return false;
  203. }
  204. }
  205. return true;
  206. }
  207. int CharString::search(const CharString& other, int from) const {
  208. for(int i = from; i < length; i++) {
  209. if(startsWidth(other, i)) {
  210. return i;
  211. }
  212. }
  213. return -1;
  214. }
  215. bool CharString::contains(const CharString& other, int from) const {
  216. return search(other, from) >= 0;
  217. }
  218. int CharString::search(char u, int from) const {
  219. for(int i = from; i < length; i++) {
  220. if(data[i] == u) {
  221. return i;
  222. }
  223. }
  224. return -1;
  225. }
  226. bool CharString::contains(char u, int from) const {
  227. return search(u, from) >= 0;
  228. }
  229. Error CharString::substring(CharString& s, int from, int to) const {
  230. s.clear();
  231. from = Math::max(from, 0);
  232. to = Math::min(to, length - 1);
  233. for(int i = from; i <= to; i++) {
  234. CORE_RETURN_ERROR(s.append(data[i]));
  235. }
  236. return ErrorCode::NONE;
  237. }
  238. Error CharString::substring(CharString& s, int from) const {
  239. return substring(s, from, length - 1);
  240. }
  241. Error CharString::replace(CharString& s, const CharString& search,
  242. const CharString& replace) {
  243. int i = 0;
  244. while(i < length) {
  245. if(startsWidth(search, i)) {
  246. CORE_RETURN_ERROR(s.append(replace));
  247. i += search.getLength();
  248. } else {
  249. CORE_RETURN_ERROR(s.append(data[i]));
  250. i++;
  251. }
  252. }
  253. return copyFrom(s);
  254. }
  255. void CharString::replace(char search, char replace) {
  256. hash = 0;
  257. for(int i = 0; i < length; i++) {
  258. if(data[i] == search) {
  259. data[i] = replace;
  260. }
  261. addToHash(static_cast<c32>(data[i]));
  262. }
  263. }
  264. CharString::operator const char*() const {
  265. return data;
  266. }
  267. void CharString::addToHash(c32 u) {
  268. hash = static_cast<u32>(2120251889) * hash + static_cast<u32>(u);
  269. }
  270. Char32String::Char32String(c32* buffer, i32 bufferSize)
  271. : length(0), capacity(bufferSize), hash(0), data(buffer) {
  272. data[0] = '\0';
  273. }
  274. Error Char32String::copyFrom(const Char32String& s) {
  275. clear();
  276. return s.toString(*this);
  277. }
  278. bool Char32String::operator==(const c32* s) const {
  279. const c32* p = data;
  280. while(*s == *p && *s != '\0') {
  281. s++;
  282. p++;
  283. }
  284. return *s == *p;
  285. }
  286. bool Char32String::operator==(const Char32String& other) const {
  287. if(length != other.getLength()) {
  288. return false;
  289. }
  290. for(int i = 0; i < length; i++) {
  291. if(data[i] != other[i]) {
  292. return false;
  293. }
  294. }
  295. return true;
  296. }
  297. bool Char32String::operator!=(const c32* s) const {
  298. return !((*this) == s);
  299. }
  300. bool Char32String::operator!=(const Char32String& other) const {
  301. return !((*this) == other);
  302. }
  303. c32 Char32String::operator[](int index) const {
  304. return data[index];
  305. }
  306. int Char32String::getLength() const {
  307. return length;
  308. }
  309. int Char32String::getCapacity() const {
  310. return capacity - 1;
  311. }
  312. Error Char32String::append(char c) {
  313. return append(static_cast<c32>(c));
  314. }
  315. Error Char32String::append(signed char c) {
  316. return append(static_cast<char>(c));
  317. }
  318. Error Char32String::append(unsigned char c) {
  319. return append(static_cast<char>(c));
  320. }
  321. Error Char32String::append(wchar_t c) {
  322. return append(static_cast<c32>(c));
  323. }
  324. Error Char32String::append(c32 c) {
  325. if(length >= capacity - 1) {
  326. return ErrorCode::CAPACITY_REACHED;
  327. }
  328. data[length++] = c;
  329. data[length] = '\0';
  330. addToHash(static_cast<c32>(c));
  331. return ErrorCode::NONE;
  332. }
  333. Error Char32String::append(const char* s) {
  334. while(true) {
  335. c32 u = 0;
  336. CORE_RETURN_ERROR(readUnicode(u, s));
  337. if(u == 0) {
  338. return ErrorCode::NONE;
  339. }
  340. CORE_RETURN_ERROR(append(u));
  341. }
  342. }
  343. Error Char32String::append(const c32* s) {
  344. // stringLength as s could be some part of data
  345. for(int i = stringLength(s); i > 0; i--) {
  346. CORE_RETURN_ERROR(append(*(s++)));
  347. }
  348. return ErrorCode::NONE;
  349. }
  350. Error Char32String::append(const signed char* s) {
  351. return append(reinterpret_cast<const char*>(s));
  352. }
  353. Error Char32String::append(const unsigned char* s) {
  354. return append(reinterpret_cast<const char*>(s));
  355. }
  356. Error Char32String::append(bool b) {
  357. return b ? append("true") : append("false");
  358. }
  359. Error Char32String::toString(CharString& s) const {
  360. int l = length; // length changes if &s == this
  361. for(int i = 0; i < l; i++) {
  362. CORE_RETURN_ERROR(s.append(data[i]));
  363. }
  364. return ErrorCode::NONE;
  365. }
  366. Error Char32String::toString(Char32String& s) const {
  367. int l = length; // length changes if &s == this
  368. for(int i = 0; i < l; i++) {
  369. CORE_RETURN_ERROR(s.append(data[i]));
  370. }
  371. return ErrorCode::NONE;
  372. }
  373. void Char32String::clear() {
  374. length = 0;
  375. hash = 0;
  376. data[0] = '\0';
  377. }
  378. u32 Char32String::hashCode() const {
  379. return hash;
  380. }
  381. Error Char32String::print() const {
  382. for(int i = 0; i < length; i++) {
  383. c32 c = data[i];
  384. if(c < (1 << 7)) {
  385. CORE_RETURN_ERROR(printChar(c, 0, 0x7F, 0x0));
  386. } else if(c < (1 << 11)) {
  387. CORE_RETURN_ERROR(printChar(c, 6, 0x1F, 0xC0));
  388. CORE_RETURN_ERROR(printChar(c, 0, 0x3F, 0x80));
  389. } else if(c < (1 << 16)) {
  390. CORE_RETURN_ERROR(printChar(c, 12, 0x0F, 0xE0));
  391. CORE_RETURN_ERROR(printChar(c, 6, 0x3F, 0x80));
  392. CORE_RETURN_ERROR(printChar(c, 0, 0x3F, 0x80));
  393. } else if(c < (1 << 21)) {
  394. CORE_RETURN_ERROR(printChar(c, 18, 0x07, 0xF0));
  395. CORE_RETURN_ERROR(printChar(c, 12, 0x3F, 0x80));
  396. CORE_RETURN_ERROR(printChar(c, 6, 0x3F, 0x80));
  397. CORE_RETURN_ERROR(printChar(c, 0, 0x3F, 0x80));
  398. }
  399. }
  400. return ErrorCode::NONE;
  401. }
  402. Error Char32String::printLine() const {
  403. CORE_RETURN_ERROR(print());
  404. CORE_RETURN_ERROR(Core::putChar('\n'));
  405. return ErrorCode::NONE;
  406. }
  407. bool Char32String::startsWidth(const Char32String& other, int from) const {
  408. if(from > length - other.getLength()) {
  409. return false;
  410. }
  411. for(int i = 0; i < other.getLength(); i++) {
  412. if(data[from + i] != other[i]) {
  413. return false;
  414. }
  415. }
  416. return true;
  417. }
  418. int Char32String::search(const Char32String& other, int from) const {
  419. for(int i = from; i < length; i++) {
  420. if(startsWidth(other, i)) {
  421. return i;
  422. }
  423. }
  424. return -1;
  425. }
  426. bool Char32String::contains(const Char32String& other, int from) const {
  427. return search(other, from) >= 0;
  428. }
  429. int Char32String::search(c32 u, int from) const {
  430. for(int i = from; i < length; i++) {
  431. if(data[i] == u) {
  432. return i;
  433. }
  434. }
  435. return -1;
  436. }
  437. bool Char32String::contains(c32 u, int from) const {
  438. return search(u, from) >= 0;
  439. }
  440. Error Char32String::substring(Char32String& s, int from, int to) const {
  441. s.clear();
  442. from = Math::max(from, 0);
  443. to = Math::min(to, length - 1);
  444. for(int i = from; i <= to; i++) {
  445. CORE_RETURN_ERROR(s.append(data[i]));
  446. }
  447. return ErrorCode::NONE;
  448. }
  449. Error Char32String::substring(Char32String& s, int from) const {
  450. return substring(s, from, length - 1);
  451. }
  452. Error Char32String::replace(Char32String& s, const Char32String& search,
  453. const Char32String& replace) {
  454. int i = 0;
  455. while(i < length) {
  456. if(startsWidth(search, i)) {
  457. CORE_RETURN_ERROR(s.append(replace));
  458. i += search.getLength();
  459. } else {
  460. CORE_RETURN_ERROR(s.append(data[i]));
  461. i++;
  462. }
  463. }
  464. return copyFrom(s);
  465. }
  466. void Char32String::replace(c32 search, c32 replace) {
  467. hash = 0;
  468. for(int i = 0; i < length; i++) {
  469. if(data[i] == search) {
  470. data[i] = replace;
  471. }
  472. addToHash(static_cast<c32>(data[i]));
  473. }
  474. }
  475. Char32String::operator const c32*() const {
  476. return data;
  477. }
  478. void Char32String::addToHash(c32 u) {
  479. hash = static_cast<u32>(2120251889) * hash + static_cast<u32>(u);
  480. }