blob: d51af015ed140953ff121ee98ede46b74524b081 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
// `levenshtein.c` - levenshtein
// MIT licensed.
// Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include "include/levenshtein.h"
// Returns a size_t, depicting the difference between `a` and `b`.
// See <https://en.wikipedia.org/wiki/Levenshtein_distance> for more information.
size_t
levenshtein_n(const char *a, const size_t length, const char *b,
const size_t bLength)
{
// Shortcut optimizations / degenerate cases.
if (a == b) {
return 0;
}
if (length == 0) {
return bLength;
}
if (bLength == 0) {
return length;
}
size_t *cache = calloc(length, sizeof(size_t));
size_t index = 0;
size_t bIndex = 0;
size_t distance;
size_t bDistance;
size_t result;
char code;
// initialize the vector.
while (index < length) {
cache[index] = index + 1;
index++;
}
// Loop.
while (bIndex < bLength) {
code = b[bIndex];
result = distance = bIndex++;
index = SIZE_MAX;
while (++index < length) {
bDistance = code == a[index] ? distance : distance + 1;
distance = cache[index];
cache[index] = result = distance > result
? bDistance > result
? result + 1
: bDistance
: bDistance > distance
? distance + 1
: bDistance;
}
}
free(cache);
return result;
}
size_t
levenshtein(const char *a, const char *b)
{
const size_t length = strlen(a);
const size_t bLength = strlen(b);
return levenshtein_n(a, length, b, bLength);
}
|