summaryrefslogtreecommitdiff
path: root/app/bin/levenshtein.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/bin/levenshtein.c')
-rw-r--r--app/bin/levenshtein.c75
1 files changed, 75 insertions, 0 deletions
diff --git a/app/bin/levenshtein.c b/app/bin/levenshtein.c
new file mode 100644
index 0000000..d51af01
--- /dev/null
+++ b/app/bin/levenshtein.c
@@ -0,0 +1,75 @@
+// `levenshtein.c` - levenshtein
+// MIT licensed.
+// Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "include/levenshtein.h"
+
+// Returns a size_t, depicting the difference between `a` and `b`.
+// See <https://en.wikipedia.org/wiki/Levenshtein_distance> for more information.
+size_t
+levenshtein_n(const char *a, const size_t length, const char *b,
+ const size_t bLength)
+{
+ // Shortcut optimizations / degenerate cases.
+ if (a == b) {
+ return 0;
+ }
+
+ if (length == 0) {
+ return bLength;
+ }
+
+ if (bLength == 0) {
+ return length;
+ }
+
+ size_t *cache = calloc(length, sizeof(size_t));
+ size_t index = 0;
+ size_t bIndex = 0;
+ size_t distance;
+ size_t bDistance;
+ size_t result;
+ char code;
+
+ // initialize the vector.
+ while (index < length) {
+ cache[index] = index + 1;
+ index++;
+ }
+
+ // Loop.
+ while (bIndex < bLength) {
+ code = b[bIndex];
+ result = distance = bIndex++;
+ index = SIZE_MAX;
+
+ while (++index < length) {
+ bDistance = code == a[index] ? distance : distance + 1;
+ distance = cache[index];
+
+ cache[index] = result = distance > result
+ ? bDistance > result
+ ? result + 1
+ : bDistance
+ : bDistance > distance
+ ? distance + 1
+ : bDistance;
+ }
+ }
+
+ free(cache);
+
+ return result;
+}
+
+size_t
+levenshtein(const char *a, const char *b)
+{
+ const size_t length = strlen(a);
+ const size_t bLength = strlen(b);
+
+ return levenshtein_n(a, length, b, bLength);
+}