Quellcode durchsuchen

everything back to cpp

Kajetan Johannes Hammerle vor 5 Jahren
Ursprung
Commit
ffac8ecd7c
5 geänderte Dateien mit 342 neuen und 338 gelöschten Zeilen
  1. 14 9
      test/Test.cpp
  2. 265 317
      tokenizer/Tokenizer.cpp
  3. 3 3
      tokenizer/Tokenizer.h
  4. 52 8
      utils/String.cpp
  5. 8 1
      utils/String.h

+ 14 - 9
test/Test.cpp

@@ -65,24 +65,29 @@ static bool testTokenizer(const String& input, const String& output) {
         return true;
     }
     TokenStream tokenStream;
-    bool b = tokenize(&tokenStream, input);
-    if(!b) {
-        done++;
+    if(Tokenizer::tokenize(tokenStream, input)) {
+        return true;
     }
-    while(tokenStream.hasToken()) {
-        String buffer = tokenStream.nextTokenString();
+    while(true) {
         String expected = readLine(oStream);
-        if(strchr(buffer, '\n') != NULL) {
+        if(expected.getLength() == 0) {
+            break;
+        } else if(!tokenStream.hasToken()) {
+            std::cout << "error in '" << input << "\n'out of tokens\n";
+            return false;
+        }
+        String buffer = tokenStream.nextTokenString();
+        if(strchr(buffer, '\n') != nullptr) {
             expected += '\n';
             expected += readLine(oStream);
         }
         if(strcmp(buffer, expected) != 0) {
             std::cout << "error in '" << input << "\n'" << buffer << "' should be '" << expected << "'\n";
-            done--;
-            break;
+            return false;
         }
     }
-    return b;
+    done++;
+    return false;
 }
 
 static void test_testTokenizer(const char* path) {

+ 265 - 317
tokenizer/Tokenizer.cpp

@@ -1,380 +1,328 @@
-#include <stdio.h>
-#include <uchar.h>
-#include <string.h>
+#include <iostream>
+#include <fstream>
 
 #include "tokenizer/Tokenizer.h"
+#include "utils/String.h"
 
-static FILE* input = NULL;
-static TokenStream* tokens = NULL;
-static unsigned int line = 1;
-static char32_t buffer = 0;
-
-static void tokenizer_onError(const char* message, unsigned int line) {
-    printf("%s Line: %u\n", message, line);
-}
-
-static size_t tokenizer_printChar(char32_t c, char* buffer) {
-    if(c <= 0x7F) {
-        buffer[0] = (char) c;
-        return 1;
-    } else if(c < 0xE00000) {
-        buffer[0] = (char) ((c >> 8) & 0xFF);
-        buffer[1] = (char) ((c >> 0) & 0xFF);
-        return 2;
-    } else if(c <= 0xF0000000) {
-        buffer[0] = (char) ((c >> 16) & 0xFF);
-        buffer[1] = (char) ((c >> 8) & 0xFF);
-        buffer[2] = (char) ((c >> 0) & 0xFF);
-        return 3;
-    }
-    buffer[0] = (char) ((c >> 24) & 0xFF);
-    buffer[1] = (char) ((c >> 16) & 0xFF);
-    buffer[2] = (char) ((c >> 8) & 0xFF);
-    buffer[3] = (char) ((c >> 0) & 0xFF);
-    return 4;
+static void onError(const String& message, unsigned int line) {
+    std::cout << message << " Line: " << line << "\n";
 }
 
-static bool tokenizer_isLetter(char32_t c) {
+static bool isLetter(char32_t c) {
     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
 }
 
-static bool tokenizer_isDigit(char32_t c) {
+static bool isDigit(char32_t c) {
     return c >= '0' && c <= '9';
 }
 
-static bool tokenizer_isValidNameStart(char32_t c) {
-    return tokenizer_isLetter(c) || c == '.' || c == '_';
+static bool isValidNameStart(char32_t c) {
+    return isLetter(c) || c == '.' || c == '_';
 }
 
-static bool tokenizer_isValidNamePart(char32_t c) {
-    return tokenizer_isDigit(c) || tokenizer_isValidNameStart(c);
+static bool isValidNamePart(char32_t c) {
+    return isDigit(c) || isValidNameStart(c);
 }
 
-static bool tokenizer_next(char32_t* c) {
-    if(buffer != 0) {
-        *c = buffer;
-        buffer = 0;
-        return true;
-    }
-    int in = fgetc(input);
-    if(in == EOF) {
-        return false;
-    }
-    if((in & 0x80) == 0) {
-        *c = in;
-        return true;
-    }
-    if((in >> 5) == 0x6) {
-        *c = (in << 8) | fgetc(input);
-        return true;
-    }
-    if((in >> 4) == 0xE) {
-        *c = (in << 16) | (fgetc(input) << 8) | fgetc(input);
-        return true;
-    }
-    if((in >> 3) == 0x1E) {
-        *c = (in << 24) | (fgetc(input) << 16) | (fgetc(input) << 8) | fgetc(input);
-        return true;
-    }
-    return true;
-}
-
-static bool tokenizer_peek(char32_t* c) {
-    if(buffer != 0 || tokenizer_next(&buffer)) {
-        *c = buffer;
+class Data {
+public:
 
-        return true;
+    Data(const char* inputPath, TokenStream& tokens) : tokens(tokens) {
+        stream.open(inputPath);
     }
-    return false;
-}
-
-static bool tokenizer_nextIf(char32_t c) {
-    char32_t nextChar;
-    if(tokenizer_peek(&nextChar) && c == nextChar) {
-        tokenizer_next(&nextChar);
 
-        return true;
+    bool hasFileError() {
+        return !stream.good();
     }
-    return false;
-}
-
-static void tokenizer_addToken(Token token) {
-    tokens->add(token, line);
-}
 
-static void tokenizer_addStringToken(Token token, const char* text) {
-    tokens->add(token, line, text);
-}
-
-static Token tokenizer_chooseToken(char c, Token aCharEqual, Token aChar, Token aEqual, Token other) {
-    if(tokenizer_nextIf(c)) {
-        if(tokenizer_nextIf('=')) {
-            return aCharEqual;
+    bool next(char32_t& c) {
+        if(buffer != 0) {
+            c = buffer;
+            buffer = 0;
+            return true;
         }
-        return aChar;
-    } else if(tokenizer_nextIf('=')) {
-
-        return aEqual;
+        c = stream.get();
+        return stream.good();
     }
-    return other;
-}
 
-static bool tokenizer_handleLiteral(char32_t c, Token token) {
-    const size_t bufferSize = 1024;
-    char buffer[bufferSize];
-    size_t index = 1;
-    buffer[0] = c;
+    bool peek(char32_t& c) {
+        if(buffer != 0 || next(buffer)) {
+            c = buffer;
+            return true;
+        }
+        return false;
+    }
 
-    while(index < bufferSize - 1) {
-        char32_t data;
-        if(!tokenizer_peek(&data) || !tokenizer_isValidNamePart(data)) {
-            break;
+    bool nextIf(char32_t c) {
+        char32_t nextChar;
+        if(peek(nextChar) && c == nextChar) {
+            next(nextChar);
+            return true;
         }
-        buffer[index++] = data;
-        tokenizer_next(&data);
+        return false;
     }
-    buffer[index] = '\0';
 
-    if(strcmp(buffer, "if") == 0) {
-        tokenizer_addToken(Token::IF);
-    } else if(strcmp(buffer, "else") == 0) {
-        tokenizer_addToken(Token::ELSE);
-    } else if(strcmp(buffer, "elseif") == 0) {
-        tokenizer_addToken(Token::ELSEIF);
-    } else if(strcmp(buffer, "while") == 0) {
-        tokenizer_addToken(Token::WHILE);
-    } else if(strcmp(buffer, "try") == 0) {
-        tokenizer_addToken(Token::TRY);
-    } else if(strcmp(buffer, "catch") == 0) {
-        tokenizer_addToken(Token::CATCH);
-    } else if(strcmp(buffer, "for") == 0) {
-        tokenizer_addToken(Token::FOR);
-    } else if(strcmp(buffer, "function") == 0) {
-        tokenizer_addToken(Token::FUNCTION);
-    } else if(strcmp(buffer, "break") == 0) {
-        tokenizer_addToken(Token::BREAK);
-    } else if(strcmp(buffer, "continue") == 0) {
-        tokenizer_addToken(Token::CONTINUE);
-    } else if(strcmp(buffer, "return") == 0) {
-        tokenizer_addToken(Token::RETURN);
-    } else if(strcmp(buffer, "true") == 0) {
-        tokenizer_addToken(Token::TRUE);
-    } else if(strcmp(buffer, "false") == 0) {
-        tokenizer_addToken(Token::FALSE);
-    } else if(strcmp(buffer, "null") == 0) {
-        tokenizer_addToken(Token::NULL_TOKEN);
-    } else {
+    void addToken(Token token) {
+        tokens.add(token, line);
+    }
 
-        tokenizer_addStringToken(token, buffer);
+    void addToken(Token token, const char* text) {
+        tokens.add(token, line, text);
     }
-    return false;
-}
 
-static bool tokenizer_handleNumber(char32_t c) {
-    double number = c - '0';
-    char32_t data;
-    while(tokenizer_peek(&data)) {
-        if(!tokenizer_isDigit(data)) {
-            if(data != '.') {
-                break;
+    Token chooseToken(char c, Token aCharEqual, Token aChar, Token aEqual, Token other) {
+        if(nextIf(c)) {
+            if(nextIf('=')) {
+                return aCharEqual;
             }
-            tokenizer_next(&data);
-            double factor = 10;
-            while(tokenizer_peek(&data) && tokenizer_isDigit(data)) {
-                number += (data - '0') / factor;
-                factor *= 10;
-                tokenizer_next(&data);
-            }
-            break;
+            return aChar;
+        } else if(nextIf('=')) {
+            return aEqual;
         }
-        number = (number * 10) + (data - '0');
-        tokenizer_next(&data);
+        return other;
     }
-    tokens->add(Token::NUMBER, line, number);
-    return false;
-}
 
-static bool tokenizer_handleString() {
-    const size_t bufferSize = 1024;
-    char buffer[bufferSize];
-    size_t index = 0;
-
-    unsigned int oldLine = line;
-    while(index + 4 < bufferSize) {
-        char32_t data;
-        if(!tokenizer_next(&data)) {
-            tokenizer_onError("non closed string literal", oldLine);
-            return true;
+    bool handleLiteral(char32_t c, Token token) {
+        String s;
+        s += (char) c;
+        while(true) {
+            if(s.isFull()) {
+                onError("string buffer to small", line);
+                return true;
+            }
+            char32_t data;
+            if(!peek(data) || !isValidNamePart(data)) {
+                break;
+            }
+            s += (char) data;
+            next(data);
         }
-        if(data == '"') {
-            buffer[index] = '\0';
-            tokenizer_addStringToken(Token::STRING, buffer);
-            return false;
+        if(s == "if") {
+            addToken(Token::IF);
+        } else if(s == "else") {
+            addToken(Token::ELSE);
+        } else if(s == "elseif") {
+            addToken(Token::ELSEIF);
+        } else if(s == "while") {
+            addToken(Token::WHILE);
+        } else if(s == "try") {
+            addToken(Token::TRY);
+        } else if(s == "catch") {
+            addToken(Token::CATCH);
+        } else if(s == "for") {
+            addToken(Token::FOR);
+        } else if(s == "function") {
+            addToken(Token::FUNCTION);
+        } else if(s == "break") {
+            addToken(Token::BREAK);
+        } else if(s == "continue") {
+            addToken(Token::CONTINUE);
+        } else if(s == "return") {
+            addToken(Token::RETURN);
+        } else if(s == "true") {
+            addToken(Token::TRUE);
+        } else if(s == "false") {
+            addToken(Token::FALSE);
+        } else if(s == "null") {
+            addToken(Token::NULL_TOKEN);
+        } else {
+            addToken(token, s);
         }
-        if(data == '\n') {
-            line++;
+        return false;
+    }
+
+    bool handleNumber(char32_t c) {
+        double number = c - '0';
+        char32_t data;
+        while(peek(data)) {
+            if(!isDigit(data)) {
+                if(data != '.') {
+                    break;
+                }
+                next(data);
+                double factor = 10;
+                while(peek(data) && isDigit(data)) {
+                    number += (data - '0') / factor;
+                    factor *= 10;
+                    next(data);
+                }
+                break;
+            }
+            number = (number * 10) + (data - '0');
+            next(data);
         }
-        if(data == '\\') {
-            char32_t escape;
-            if(!tokenizer_next(&escape)) {
-                tokenizer_onError("missing escaped character", line);
+        tokens.add(Token::NUMBER, line, number);
+        return false;
+    }
+
+    bool handleString() {
+        String s;
+        unsigned int oldLine = line;
+        while(!s.isFull()) {
+            char32_t data;
+            if(!next(data)) {
+                onError("non closed string literal", oldLine);
                 return true;
             }
-            switch(escape) {
-                case 'n': data = '\n';
-                    break;
-                case '\\': data = '\\';
-                    break;
-                case '"': data = '"';
-                    break;
-                default:
-                    tokenizer_onError("invalid escaped character", line);
+            if(data == '"') {
+                addToken(Token::STRING, s);
+                return false;
+            }
+            if(data == '\n') {
+                line++;
+            }
+            if(data == '\\') {
+                char32_t escape;
+                if(!next(escape)) {
+                    onError("missing escaped character", line);
                     return true;
+                }
+                switch(escape) {
+                    case 'n': data = '\n';
+                        break;
+                    case '\\': data = '\\';
+                        break;
+                    case '"': data = '"';
+                        break;
+                    default:
+                        onError("invalid escaped character", line);
+                        return true;
+                }
             }
+            s += data;
         }
-        index += tokenizer_printChar(data, buffer + index);
+        onError("string buffer to small", line);
+        return true;
     }
-    tokenizer_onError("string buffer to small", line);
-
-    return true;
-}
-
-static bool tokenizer_handleOneLineComment() {
-    char32_t data;
-    while(tokenizer_next(&data) && data != '\n');
-    line++;
 
-    return false;
-}
+    bool handleOneLineComment() {
+        char32_t data;
+        while(next(data) && data != '\n');
+        line++;
+        return false;
+    }
 
-static bool tokenizer_handleMultiLineComment() {
-    char32_t first;
-    char32_t sec = 0;
-    unsigned int oldLine = line;
-    while(true) {
-        first = sec;
-        if(!tokenizer_next(&sec)) {
-            tokenizer_onError("unclosed multiline comment", oldLine);
-            return true;
+    bool handleMultiLineComment() {
+        char32_t first;
+        char32_t sec = 0;
+        unsigned int oldLine = line;
+        while(true) {
+            first = sec;
+            if(!next(sec)) {
+                onError("unclosed multiline comment", oldLine);
+                return true;
+            }
+            if(first == '*' && sec == '/') {
+                return false;
+            }
+            line += (sec == '\n');
         }
-        if(first == '*' && sec == '/') {
+    }
 
+    bool handleSlash() {
+        if(nextIf('/')) {
+            return handleOneLineComment();
+        } else if(nextIf('*')) {
+            return handleMultiLineComment();
+        } else if(nextIf('=')) {
+            addToken(Token::DIV_SET);
             return false;
         }
-        line += (sec == '\n');
-    }
-}
-
-static bool tokenizer_handleSlash() {
-    if(tokenizer_nextIf('/')) {
-        return tokenizer_handleOneLineComment();
-    } else if(tokenizer_nextIf('*')) {
-        return tokenizer_handleMultiLineComment();
-    } else if(tokenizer_nextIf('=')) {
-        tokenizer_addToken(Token::DIV_SET);
+        addToken(Token::DIV);
         return false;
     }
-    tokenizer_addToken(Token::DIV);
-
-    return false;
-}
 
-static bool tokenizer_handleSpecial(char32_t c) {
-    switch(c) {
-        case ' ':
-        case '\t':
-        case '\r':
-            return false;
-        case '\n': line++;
-            return false;
-        case '"':
-            return tokenizer_handleString();
-        case '(': tokenizer_addToken(Token::OPEN_BRACKET);
-            return false;
-        case ')': tokenizer_addToken(Token::CLOSE_BRACKET);
-            return false;
-        case '[': tokenizer_addToken(Token::OPEN_SQUARE_BRACKET);
-            return false;
-        case ']': tokenizer_addToken(Token::CLOSE_SQUARE_BRACKET);
-            return false;
-        case '{': tokenizer_addToken(Token::OPEN_CURVED_BRACKET);
-            return false;
-        case '}': tokenizer_addToken(Token::CLOSE_CURVED_BRACKET);
-            return false;
-        case '$':
-            return tokenizer_handleLiteral(c, Token::LITERAL);
-        case '@':
-            return tokenizer_handleLiteral(c, Token::LABEL);
-        case ';': tokenizer_addToken(Token::SEMICOLON);
-            return false;
-        case ',': tokenizer_addToken(Token::COMMA);
-            return false;
-        case '~': tokenizer_addToken(Token::BIT_INVERT);
-            return false;
-        case '+': tokenizer_addToken(tokenizer_nextIf('=') ? Token::ADD_SET: (tokenizer_nextIf('+') ? Token::INC: Token::ADD));
-            return false;
-        case '-': tokenizer_addToken(tokenizer_nextIf('=') ? Token::SUB_SET: (tokenizer_nextIf('-') ? Token::DEC: Token::SUB));
-            return false;
-        case '!': tokenizer_addToken(tokenizer_nextIf('=') ? Token::NOT_EQUAL: Token::INVERT);
-            break;
-        case '=': tokenizer_addToken(tokenizer_nextIf('=') ? Token::EQUAL: Token::SET);
-            return false;
-        case '*': tokenizer_addToken(tokenizer_nextIf('=') ? Token::MUL_SET: Token::MUL);
-            return false;
-        case '/':
-            return tokenizer_handleSlash();
-        case '%': tokenizer_addToken(tokenizer_nextIf('=') ? Token::MOD_SET: Token::MOD);
-            return false;
-        case '&': tokenizer_addToken(tokenizer_nextIf('=') ? Token::BIT_AND_SET: (tokenizer_nextIf('&') ? Token::AND: Token::BIT_AND));
-            return false;
-        case '|': tokenizer_addToken(tokenizer_nextIf('=') ? Token::BIT_OR_SET: (tokenizer_nextIf('|') ? Token::OR: Token::BIT_OR));
-            return false;
-        case '^': tokenizer_addToken(tokenizer_nextIf('=') ? Token::BIT_XOR_SET: Token::BIT_XOR);
-            return false;
-        case '<': tokenizer_addToken(tokenizer_chooseToken('<', Token::LEFT_SHIFT_SET, Token::LEFT_SHIFT, Token::LESS_EQUAL, Token::LESS));
-            return false;
-        case '>': tokenizer_addToken(tokenizer_chooseToken('>', Token::RIGHT_SHIFT_SET, Token::RIGHT_SHIFT, Token::GREATER_EQUAL, Token::GREATER));
-            return false;
+    bool handleSpecial(char32_t c) {
+        switch(c) {
+            case ' ':
+            case '\t':
+            case '\r':
+                return false;
+            case '\n': line++;
+                return false;
+            case '"':
+                return handleString();
+            case '(': addToken(Token::OPEN_BRACKET);
+                return false;
+            case ')': addToken(Token::CLOSE_BRACKET);
+                return false;
+            case '[': addToken(Token::OPEN_SQUARE_BRACKET);
+                return false;
+            case ']': addToken(Token::CLOSE_SQUARE_BRACKET);
+                return false;
+            case '{': addToken(Token::OPEN_CURVED_BRACKET);
+                return false;
+            case '}': addToken(Token::CLOSE_CURVED_BRACKET);
+                return false;
+            case '$':
+                return handleLiteral(c, Token::LITERAL);
+            case '@':
+                return handleLiteral(c, Token::LABEL);
+            case ';': addToken(Token::SEMICOLON);
+                return false;
+            case ',': addToken(Token::COMMA);
+                return false;
+            case '~': addToken(Token::BIT_INVERT);
+                return false;
+            case '+': addToken(nextIf('=') ? Token::ADD_SET: (nextIf('+') ? Token::INC: Token::ADD));
+                return false;
+            case '-': addToken(nextIf('=') ? Token::SUB_SET: (nextIf('-') ? Token::DEC: Token::SUB));
+                return false;
+            case '!': addToken(nextIf('=') ? Token::NOT_EQUAL: Token::INVERT);
+                break;
+            case '=': addToken(nextIf('=') ? Token::EQUAL: Token::SET);
+                return false;
+            case '*': addToken(nextIf('=') ? Token::MUL_SET: Token::MUL);
+                return false;
+            case '/':
+                return handleSlash();
+            case '%': addToken(nextIf('=') ? Token::MOD_SET: Token::MOD);
+                return false;
+            case '&': addToken(nextIf('=') ? Token::BIT_AND_SET: (nextIf('&') ? Token::AND: Token::BIT_AND));
+                return false;
+            case '|': addToken(nextIf('=') ? Token::BIT_OR_SET: (nextIf('|') ? Token::OR: Token::BIT_OR));
+                return false;
+            case '^': addToken(nextIf('=') ? Token::BIT_XOR_SET: Token::BIT_XOR);
+                return false;
+            case '<': addToken(chooseToken('<', Token::LEFT_SHIFT_SET, Token::LEFT_SHIFT, Token::LESS_EQUAL, Token::LESS));
+                return false;
+            case '>': addToken(chooseToken('>', Token::RIGHT_SHIFT_SET, Token::RIGHT_SHIFT, Token::GREATER_EQUAL, Token::GREATER));
+                return false;
+        }
+        String s("unknown token '");
+        s += c;
+        s += '\'';
+        onError(s, line);
+        return true;
     }
-    char buffer[32];
-    strncpy(buffer, "unknown token '", 32);
-    size_t index = strlen(buffer);
-    index += tokenizer_printChar(c, buffer + index);
-    buffer[index] = '\'';
-    buffer[index + 1] = '\0';
-    tokenizer_onError(buffer, line);
-
-    return true;
-}
 
-static bool tokenizer_handleChar(char32_t c) {
-    if(tokenizer_isValidNameStart(c)) {
-        return tokenizer_handleLiteral(c, Token::LITERAL);
-    } else if(tokenizer_isDigit(c)) {
-
-        return tokenizer_handleNumber(c);
+    bool handleChar(char32_t c) {
+        if(isValidNameStart(c)) {
+            return handleLiteral(c, Token::LITERAL);
+        } else if(isDigit(c)) {
+            return handleNumber(c);
+        }
+        return handleSpecial(c);
     }
-    return tokenizer_handleSpecial(c);
-}
 
-bool tokenize(TokenStream* tokenStream, const char* inputPath) {
-    input = fopen(inputPath, "r");
-    if(input == NULL) {
+private:
+    std::basic_ifstream<char32_t> stream;
+    TokenStream& tokens;
+    unsigned int line = 1;
+    char32_t buffer = 0;
+};
+
+bool Tokenizer::tokenize(TokenStream& tokenStream, const char* inputPath) {
+    Data d(inputPath, tokenStream);
+    if(d.hasFileError()) {
         return true;
     }
-    tokens = tokenStream;
-    line = 1;
-    buffer = 0;
-
     char32_t c;
-    while(tokenizer_next(&c)) {
-        if(tokenizer_handleChar(c)) {
+    while(d.next(c)) {
+        if(d.handleChar(c)) {
             return true;
         }
     }
-    tokenizer_addToken(Token::EOF_TOKEN);
-
-    fclose(input);
-    input = NULL;
+    d.addToken(Token::EOF_TOKEN);
     return false;
-}
+}

+ 3 - 3
tokenizer/Tokenizer.h

@@ -1,10 +1,10 @@
 #ifndef TOKENIZER_H
 #define TOKENIZER_H
 
-#include <stdbool.h>
-
 #include "tokenizer/TokenStream.h"
 
-bool tokenize(TokenStream* tokenStream, const char* inputPath);
+namespace Tokenizer {
+    bool tokenize(TokenStream& tokenStream, const char* inputPath);
+}
 
 #endif

+ 52 - 8
utils/String.cpp

@@ -1,4 +1,5 @@
 #include <cstdio>
+#include <cstring>
 
 #include "utils/String.h"
 
@@ -13,38 +14,81 @@ String& String::operator+=(const char* str) {
     usedCapacity--;
     size_t start = usedCapacity;
     while(usedCapacity + 1 < capacity && str[usedCapacity - start] != '\0') {
-        path[usedCapacity] = str[usedCapacity - start];
+        data[usedCapacity] = str[usedCapacity - start];
         usedCapacity++;
     }
-    path[usedCapacity++] = '\0';
+    data[usedCapacity++] = '\0';
     return *this;
 }
 
 String& String::operator+=(char c) {
     if(usedCapacity + 1 < capacity) {
-        path[usedCapacity - 1] = c;
-        path[usedCapacity] = '\0';
+        data[usedCapacity - 1] = c;
+        data[usedCapacity] = '\0';
         usedCapacity++;
     }
     return *this;
 }
 
+String& String::operator+=(char32_t c) {
+    if(c <= 0x7F) {
+        *this += (char) c;
+    } else if(c <= 0x7FF) {
+        *this += (char) (0xC0 | ((c >> 6) & 0x1F));
+        *this += (char) (0x80 | ((c >> 0) & 0x3F));
+    } else if(c <= 0xFFFF) {
+        *this += (char) (0xE0 | ((c >> 12) & 0xF));
+        *this += (char) (0x80 | ((c >> 6) & 0x3F));
+        *this += (char) (0x80 | ((c >> 0) & 0x3F));
+    } else {
+        *this += (char) (0xF0 | ((c >> 18) & 0x7));
+        *this += (char) (0x80 | ((c >> 12) & 0x3F));
+        *this += (char) (0x80 | ((c >> 6) & 0x3F));
+        *this += (char) (0x80 | ((c >> 0) & 0x3F));
+    }
+    return *this;
+}
+
 String& String::operator+=(unsigned int i) {
-    usedCapacity += snprintf(path + usedCapacity - 1, capacity - usedCapacity, "%u", i);
+    usedCapacity += snprintf(data + usedCapacity - 1, capacity - usedCapacity, "%u", i);
     return *this;
 }
 
 String& String::operator+=(double d) {
-    usedCapacity += snprintf(path + usedCapacity - 1, capacity - usedCapacity, (d == (long) d) ? "%lg.0" : "%lg", d);
+    usedCapacity += snprintf(data + usedCapacity - 1, capacity - usedCapacity, (d == (long) d) ? "%lg.0" : "%lg", d);
     return *this;
 }
 
 String String::operator+(const char* str) const {
-    String s(this->path);
+    String s(this->data);
     s += str;
     return s;
 }
 
 String::operator const char*() const {
-    return path;
+    return data;
+}
+
+size_t String::getLength() const {
+    return usedCapacity - 1;
+}
+
+bool String::isFull() const {
+    return usedCapacity >= capacity;
+}
+
+bool String::operator==(const String& other) const {
+    return usedCapacity == other.usedCapacity && *this == other.data;
+}
+
+bool String::operator!=(const String& other) const {
+    return !(*this == other);
+}
+
+bool String::operator==(const char* other) const {
+    return strcmp(data, other) == 0;
+}
+
+bool String::operator!=(const char* other) const {
+    return !(*this == other);
 }

+ 8 - 1
utils/String.h

@@ -9,15 +9,22 @@ public:
     String(const char* str);
     String& operator+=(const char* str);
     String& operator+=(char c);
+    String& operator+=(char32_t c);
     String& operator+=(unsigned int i);
     String& operator+=(double d);
     String operator+(const char* str) const;
     operator const char*() const;
+    size_t getLength() const;
+    bool isFull() const;
+    bool operator==(const String& other) const;
+    bool operator!=(const String& other) const;
+    bool operator==(const char* other) const;
+    bool operator!=(const char* other) const;
     
 private:
     static constexpr size_t capacity = 4096;
     size_t usedCapacity;
-    char path[capacity];
+    char data[capacity];
 };
 
 #endif