Browse Source

New formatting strategy

Kajetan Johannes Hammerle 10 months ago
parent
commit
9ff955ee89
3 changed files with 215 additions and 82 deletions
  1. 126 28
      Formatter.c
  2. 3 3
      Makefile
  3. 86 51
      input_cpp

+ 126 - 28
Formatter.c

@@ -7,24 +7,56 @@
 static int lineWidth = 0;
 static FILE* file = NULL;
 
-static char** words = NULL;
+typedef struct {
+    char* data;
+    int length;
+} Word;
+
+static Word* words = NULL;
 static int wordAmount = 0;
 static int wordCapacity = 0;
 
 static int randA = 0;
 static int randB = 0;
 
+static const char* beforeWord = "";
+static int beforeWordLength = 0;
 static const char* afterWord = "";
 static int afterWordLength = 0;
 static const char* afterLine = "";
 static int afterLineLength = 0;
 
+typedef struct {
+    int a;
+    int b;
+} Swap;
+
+static Swap swaps[50];
+static int swapIndex = 0;
+
+static void addSwap(int a, int b) {
+    if(swapIndex >= (int)(sizeof(swaps) / sizeof(swaps[0]))) {
+        return;
+    }
+    swaps[swapIndex].a = a;
+    swaps[swapIndex++].b = b;
+}
+
 static void addWord(char* word) {
     if(wordAmount >= wordCapacity) {
         wordCapacity = wordCapacity * 2 + 1;
-        words = (char**)realloc(words, (size_t)wordCapacity * sizeof(char*));
+        words = (Word*)realloc(words, (size_t)wordCapacity * sizeof(Word));
     }
-    words[wordAmount++] = word;
+    int index = wordAmount++;
+    words[index].data = word;
+    words[index].length =
+        beforeWordLength + (int)strlen(word) + afterWordLength;
+}
+
+static int compareWords(const void* a, const void* b) {
+    const Word* ra = (const Word*)a;
+    const Word* rb = (const Word*)b;
+    return ra->length == rb->length ? 0 : ra->length < rb->length ? 1 : -1;
 }
 
 static void parseFile(void) {
@@ -59,7 +91,7 @@ static void parseFile(void) {
 static void clean(void) {
     if(words != NULL) {
         for(int i = 0; i < wordAmount; i++) {
-            free(words[i]);
+            free(words[i].data);
         }
         free(words);
     }
@@ -69,7 +101,7 @@ static int getScore(void) {
     int score = 0;
     int length = 0;
     for(int i = 0; i < wordAmount; i++) {
-        int l = (int)strlen(words[i]) + afterWordLength;
+        int l = words[i].length;
         if(length + l <= lineWidth - afterLineLength) {
             length += l;
         } else {
@@ -81,21 +113,23 @@ static int getScore(void) {
 }
 
 static void print(void) {
-    for(int i = 0; i < lineWidth; i++) {
+    for(int i = 1; i <= lineWidth; i++) {
         putchar('0' + (i % 10));
     }
     putchar('\n');
     int length = 0;
     for(int i = 0; i < wordAmount; i++) {
-        int l = (int)strlen(words[i]) + afterWordLength;
+        int l = (int)words[i].length;
         if(length + l <= lineWidth - afterLineLength) {
-            printf("%s%s", words[i], afterWord);
+            printf("%s%s%s", beforeWord, words[i].data, afterWord);
             length += l;
         } else {
             length = l;
-            printf("%s\n%s%s", afterLine, words[i], afterWord);
+            printf("%s\n%s%s%s", afterLine, beforeWord, words[i].data,
+                   afterWord);
         }
     }
+    putchar('\n');
 }
 
 static void generateRandom(void) {
@@ -103,10 +137,10 @@ static void generateRandom(void) {
     randB = rand() % wordAmount;
 }
 
-static void swap(void) {
-    char* tmp = words[randA];
-    words[randA] = words[randB];
-    words[randB] = tmp;
+static void swap(int indexA, int indexB) {
+    Word tmp = words[indexA];
+    words[indexA] = words[indexB];
+    words[indexB] = tmp;
 }
 
 static void optimize(void) {
@@ -115,12 +149,12 @@ static void optimize(void) {
         int minScore = getScore();
         for(int i = 0; i < 100000; i++) {
             generateRandom();
-            swap();
+            swap(randA, randB);
             int score = getScore();
             if(score < minScore) {
                 minScore = score;
             } else {
-                swap();
+                swap(randA, randB);
             }
         }
         if(minScore < globalMin) {
@@ -130,35 +164,99 @@ static void optimize(void) {
         }
         for(int i = 0; i < 50; i++) {
             generateRandom();
-            swap();
+            swap(randA, randB);
+        }
+    }
+}
+
+static int findIndexOfLargestMatch(int sum, int start, int end) {
+    for(int i = start; i < end; i++) {
+        if(sum + words[i].length <= lineWidth - afterLineLength) {
+            return i;
         }
     }
+    return -1;
+}
+
+static void sort(int start, int end) {
+    if(start >= end) {
+        return;
+    }
+    qsort(words + start, (size_t)(end - start), sizeof(words[0]), compareWords);
+}
+
+static int analyseRecursion(int sum, int start, int globalIndex, int end) {
+    if(start >= end) {
+        return -1;
+    } else if(sum == lineWidth - afterLineLength) {
+        return 0;
+    }
+    int index = findIndexOfLargestMatch(sum, start, end);
+    if(index < 0) {
+        return -1;
+    }
+    int r = analyseRecursion(sum + words[index].length, index + 1,
+                             globalIndex + 1, end);
+    if(r < 0) {
+        // no match with this index, try starting from next
+        return analyseRecursion(sum, start + 1, globalIndex, end);
+    }
+    addSwap(index, globalIndex);
+    return r + 1;
+}
+
+static void analyse(void) {
+    int i = 0;
+    int end = wordAmount;
+    while(i < end) {
+        sort(i, end);
+        int r = analyseRecursion(0, i, i, end);
+        if(r > 0) {
+            i += r;
+            while(--swapIndex >= 0) {
+                swap(swaps[swapIndex].a, swaps[swapIndex].b);
+            }
+            swapIndex = 0;
+        } else {
+            end--;
+            swap(i, end);
+        }
+    }
+    print();
 }
 
 int main(int argAmount, const char* const* args) {
     if(argAmount <= 0) {
-        puts("... <width> <file> <after> <line>");
+        puts("... <mode> <width> <file> <before> <after> <line>");
         return 0;
-    } else if(argAmount < 5) {
-        printf("%s <width> <file> <after> <line>\n", args[0]);
+    } else if(argAmount < 7) {
+        printf("%s <mode> <width> <file> <before> <after> <line>\n", args[0]);
         return 0;
     }
-    lineWidth = atoi(args[1]);
+    lineWidth = atoi(args[2]);
     if(lineWidth <= 0) {
-        printf("%s is an invalid line width\n", args[1]);
+        printf("%s is an invalid line width\n", args[2]);
         return 0;
     }
-    file = fopen(args[2], "rb");
+    file = fopen(args[3], "rb");
     if(file == NULL) {
-        printf("%s is an invalid file: %s\n", args[2], strerror(errno));
+        printf("%s is an invalid file: %s\n", args[3], strerror(errno));
         return 0;
     }
-    afterWord = args[3];
-    afterWordLength = (int)strlen(args[3]);
-    afterLine = args[4];
-    afterLineLength = (int)strlen(args[4]);
+    beforeWord = args[4];
+    beforeWordLength = (int)strlen(args[4]);
+    afterWord = args[5];
+    afterWordLength = (int)strlen(args[5]);
+    afterLine = args[6];
+    afterLineLength = (int)strlen(args[6]);
+
     parseFile();
-    optimize();
+    if(strcmp(args[1], "random") == 0) {
+        optimize();
+    } else if(strcmp(args[1], "think") == 0) {
+        analyse();
+    }
+
     clean();
     if(fclose(file)) {
         printf("close error: %s\n", strerror(errno));

+ 3 - 3
Makefile

@@ -1,17 +1,17 @@
 ARGS = -Warith-conversion -Wshift-overflow=2 -Wc++-compat -Wmissing-prototypes \
        -Wdisabled-optimization -pedantic-errors -Wcast-qual -Wjump-misses-init \
        -Wmissing-include-dirs -Wformat-signedness -Wstrict-prototypes -Wshadow \
-       -Wpadded -Wall -Wfloat-equal -Wformat=2 -Wcast-align=strict -Winit-self \
+       -Walloca -Wconversion -Wwrite-strings -Wundef -Wredundant-decls -Wextra \
        -Wbad-function-cast -Wmissing-declarations -Wunsuffixed-float-constants \
        -Wnull-dereference -pedantic -Wmultichar -Wlogical-op -Wduplicated-cond \
        -Woverlength-strings -Walloc-zero -Werror -Wsign-conversion -Wdate-time \
        -Wdouble-promotion -Winvalid-pch -Wnested-externs -Wduplicated-branches \
-       -Walloca -Wconversion -Wwrite-strings -Wundef -Wredundant-decls -Wextra
+       -Wall -Wfloat-equal -Wformat=2 -Wcast-align=strict -Winit-self
 
 SRC = Formatter.c
 
 run: build
-	./formatter 77 input_cpp ", " ""
+	./formatter think 77 input_cpp "'" "', " ""
 
 build: ${SRC}
 	gcc ${SRC} ${ARGS} -o formatter -O3

+ 86 - 51
input_cpp

@@ -1,51 +1,86 @@
-'-Werror'
-'-pedantic'
-'-pedantic-errors'
-'-Wall'
-'-Wextra'
-'-Wdouble-promotion'
-'-Wformat=2'
-'-Wformat-signedness'
-'-Wnull-dereference'
-'-Winfinite-recursion'
-'-Winit-self'
-'-Wshift-overflow=2'
-'-Wswitch-enum'
-'-Wtrivial-auto-var-init'
-'-Wduplicated-branches'
-'-Wduplicated-cond'
-'-Wfloat-equal'
-'-Wshadow'
-'-Wundef'
-'-Wcast-qual'
-'-Wcast-align=strict'
-'-Wconversion'
-'-Warith-conversion'
-'-Wsign-conversion'
-'-Wlogical-op'
-'-Wmissing-declarations'
-'-Woverlength-strings'
-'-Wwrite-strings'
-'-Walloca'
-'-Walloc-zero'
-'-Wdate-time'
-'-Wdisabled-optimization'
-'-Winvalid-pch'
-'-Wmultichar'
-'-Wbidi-chars=any'
-'-Wredundant-decls'
-'-Wold-style-cast'
-'-Wvolatile'
-'-Wenum-conversion'
-'-Wsign-promo'
-'-Wdeprecated-enum-enum-conversion'
-'-Wdeprecated-enum-float-conversion'
-'-Wcomma-subscript'
-'-Wnon-virtual-dtor'
-'-Wctor-dtor-privacy'
-'-Winvalid-imported-macros'
-'-Wsuggest-override'
-'-Wdeprecated-copy-dtor'
-'-Wmissing-braces'
-'-Woverloaded-virtual'
-'-Wzero-as-null-pointer-constant'
+-Werror
+-pedantic
+-pedantic-errors
+-Wall
+-Wextra
+-Wdouble-promotion
+-Wformat=2
+-Wformat-signedness
+-Wnull-dereference
+-Winfinite-recursion
+-Winit-self
+-Wmissing-include-dirs
+-Wshift-overflow=2
+-Wswitch-enum
+-Wtrivial-auto-var-init
+-Wduplicated-branches
+-Wduplicated-cond
+-Wfloat-equal
+-Wshadow
+-Wundef
+-Wcast-qual
+-Wcast-align=strict
+-Wconversion
+-Warith-conversion
+-Wsign-conversion
+-Wlogical-op
+-Wmissing-declarations
+-Woverlength-strings
+-Wwrite-strings
+-Walloca
+-Walloc-zero
+-Wdate-time
+-Wdisabled-optimization
+-Winvalid-pch
+-Wmultichar
+-Wbidi-chars=any
+-Wredundant-decls
+-Wold-style-cast
+-Wvolatile
+-Wenum-conversion
+-Wsign-promo
+-Wdeprecated-enum-enum-conversion
+-Wdeprecated-enum-float-conversion
+-Wcomma-subscript
+-Wnon-virtual-dtor
+-Wctor-dtor-privacy
+-Winvalid-imported-macros
+-Wsuggest-override
+-Wdeprecated-copy-dtor
+-Wmissing-braces
+-Woverloaded-virtual
+-Wzero-as-null-pointer-constant
+-Wanalyzer-too-complex
+-Warray-bounds=2
+-Warray-parameter
+-Wattribute-alias=2
+-Wbidi-chars
+-Waligned-new=all
+-Wcatch-value=3
+-Wconditionally-supported
+-Wctad-maybe-unsupported
+-Wformat-truncation=2
+-Wformat-overflow=2
+-Wframe-larger-than=8388608
+-Wimplicit-fallthrough=5
+-Wlarger-than=1073741824
+-Wmismatched-tags
+-Wmultiple-inheritance
+-Wnoexcept
+-Wplacement-new=2
+-Wredundant-tags
+-Wregister
+-Wstack-protector
+-Wstack-usage=8388608
+-Wstrict-null-sentinel
+-Wstrict-overflow=5
+-Wstringop-overflow=4
+-Wsuggest-attribute=const
+-Wsuggest-final-methods
+-Wsuggest-final-types
+-Wsynth
+-Wtrampolines
+-Wunused-const-variable=2
+-Wunused-macros
+-Wuse-after-free=3
+-Wvirtual-inheritance