diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2026-03-08 17:28:33 +0100 |
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2026-03-08 17:28:33 +0100 |
| commit | 5f59a34ab747dde8ede7357f3431bf06bd6002fe (patch) | |
| tree | 056a4477fd870d454d5be5868cddab829a47f4d2 /lib/wcs-two-way.h | |
| parent | 27dae84ed92f1ef0300263091972338d12e78348 (diff) | |
New upstream version 1.4.2upstream/1.4.2upstream
Diffstat (limited to 'lib/wcs-two-way.h')
| -rw-r--r-- | lib/wcs-two-way.h | 170 |
1 files changed, 86 insertions, 84 deletions
diff --git a/lib/wcs-two-way.h b/lib/wcs-two-way.h index 4cd384aa..9d07736a 100644 --- a/lib/wcs-two-way.h +++ b/lib/wcs-two-way.h @@ -1,5 +1,5 @@ /* Wide character substring search, using the Two-Way algorithm. - Copyright (C) 2008-2025 Free Software Foundation, Inc. + Copyright (C) 2008-2026 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Eric Blake <ebb9@byu.net>, 2008. @@ -91,13 +91,6 @@ static size_t critical_factorization (const UNIT *needle, size_t needle_len, size_t *period) { - /* Index of last character of left half, or SIZE_MAX. */ - size_t max_suffix, max_suffix_rev; - size_t j; /* Index into NEEDLE for current candidate suffix. */ - size_t k; /* Offset into current period. */ - size_t p; /* Intermediate period. */ - UNIT a, b; /* Current comparison characters. */ - /* Special case NEEDLE_LEN of 1 or 2 (all callers already filtered out 0-length needles. */ if (needle_len < 3) @@ -116,73 +109,83 @@ critical_factorization (const UNIT *needle, size_t needle_len, */ /* Perform lexicographic search. */ - max_suffix = SIZE_MAX; - j = 0; - k = p = 1; - while (j + k < needle_len) - { - a = CANON_ELEMENT (needle[j + k]); - b = CANON_ELEMENT (needle[max_suffix + k]); - if (a < b) - { - /* Suffix is smaller, period is entire prefix so far. */ - j += k; - k = 1; - p = j - max_suffix; - } - else if (a == b) - { - /* Advance through repetition of the current period. */ - if (k != p) - ++k; - else - { - j += p; - k = 1; - } - } - else /* b < a */ - { - /* Suffix is larger, start over from current location. */ - max_suffix = j++; - k = p = 1; - } - } - *period = p; + size_t max_suffix = /* Index of last character of left half, or SIZE_MAX. */ + SIZE_MAX; + { + size_t j = 0; /* Index into NEEDLE for current candidate suffix. */ + size_t k = 1; /* Offset into current period. */ + size_t p = 1; /* Intermediate period. */ + while (j + k < needle_len) + { + UNIT a = CANON_ELEMENT (needle[j + k]); + UNIT b = CANON_ELEMENT (needle[max_suffix + k]); + if (a < b) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* b < a */ + { + /* Suffix is larger, start over from current location. */ + max_suffix = j++; + k = p = 1; + } + } + *period = p; + } /* Perform reverse lexicographic search. */ - max_suffix_rev = SIZE_MAX; - j = 0; - k = p = 1; - while (j + k < needle_len) - { - a = CANON_ELEMENT (needle[j + k]); - b = CANON_ELEMENT (needle[max_suffix_rev + k]); - if (b < a) - { - /* Suffix is smaller, period is entire prefix so far. */ - j += k; - k = 1; - p = j - max_suffix_rev; - } - else if (a == b) - { - /* Advance through repetition of the current period. */ - if (k != p) - ++k; - else - { - j += p; - k = 1; - } - } - else /* a < b */ - { - /* Suffix is larger, start over from current location. */ - max_suffix_rev = j++; - k = p = 1; - } - } + size_t max_suffix_rev = /* Index of last character of left half, or SIZE_MAX. */ + SIZE_MAX; + size_t p_rev; + { + size_t j = 0; /* Index into NEEDLE for current candidate suffix. */ + size_t k = 1; /* Offset into current period. */ + size_t p = 1; /* Intermediate period. */ + while (j + k < needle_len) + { + UNIT a = CANON_ELEMENT (needle[j + k]); + UNIT b = CANON_ELEMENT (needle[max_suffix_rev + k]); + if (b < a) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix_rev; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* a < b */ + { + /* Suffix is larger, start over from current location. */ + max_suffix_rev = j++; + k = p = 1; + } + } + p_rev = p; + } /* Choose the shorter suffix. Return the index of the first character of the right half, rather than the last character of the left half. @@ -200,7 +203,7 @@ critical_factorization (const UNIT *needle, size_t needle_len, factorization. */ if (max_suffix_rev + 1 < max_suffix + 1) return max_suffix + 1; - *period = p; + *period = p_rev; return max_suffix_rev + 1; } @@ -218,15 +221,12 @@ static RETURN_TYPE _GL_ATTRIBUTE_PURE two_way_short_needle (const UNIT *haystack, size_t haystack_len, const UNIT *needle, size_t needle_len) { - size_t i; /* Index into current character of NEEDLE. */ - size_t j; /* Index into current window of HAYSTACK. */ - size_t period; /* The period of the right half of needle. */ - size_t suffix; /* The index of the right half of needle. */ - /* Factor the needle into two halves, such that the left half is smaller than the global period, and the right half is periodic (with a period as large as NEEDLE_LEN - suffix). */ - suffix = critical_factorization (needle, needle_len, &period); + size_t period; /* The period of the right half of needle. */ + size_t suffix = /* The index of the right half of needle. */ + critical_factorization (needle, needle_len, &period); /* Perform the search. Each iteration compares the right half first. */ @@ -236,11 +236,12 @@ two_way_short_needle (const UNIT *haystack, size_t haystack_len, only advance by the period, so use memory to avoid rescanning known occurrences of the period in the right half. */ size_t memory = 0; - j = 0; + size_t j = 0; /* Index into current window of HAYSTACK. */ while (AVAILABLE (haystack, haystack_len, j, needle_len)) { /* Scan for matches in right half. */ - i = MAX (suffix, memory); + size_t i = /* Index into current character of NEEDLE. */ + MAX (suffix, memory); while (i < needle_len && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; @@ -270,11 +271,12 @@ two_way_short_needle (const UNIT *haystack, size_t haystack_len, /* The two halves of needle are distinct; no extra memory is required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; - j = 0; + size_t j = 0; /* Index into current window of HAYSTACK. */ while (AVAILABLE (haystack, haystack_len, j, needle_len)) { /* Scan for matches in right half. */ - i = suffix; + size_t i = /* Index into current character of NEEDLE. */ + suffix; while (i < needle_len && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; |
