diff options
Diffstat (limited to 'lib/unistr/u-strstr.h')
-rw-r--r-- | lib/unistr/u-strstr.h | 116 |
1 files changed, 17 insertions, 99 deletions
diff --git a/lib/unistr/u-strstr.h b/lib/unistr/u-strstr.h index 459215ec..df32be8b 100644 --- a/lib/unistr/u-strstr.h +++ b/lib/unistr/u-strstr.h @@ -1,6 +1,6 @@ /* Substring test for UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2002, 2005. + Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2002. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -24,108 +24,26 @@ FUNC (const UNIT *haystack, const UNIT *needle) if (first == 0) return (UNIT *) haystack; - /* Is needle nearly empty (only one unit)? */ + /* Is needle nearly empty? */ if (needle[1] == 0) return U_STRCHR (haystack, first); -#ifdef U_STRMBTOUC - /* Is needle nearly empty (only one character)? */ - { - ucs4_t first_uc; - int count = U_STRMBTOUC (&first_uc, needle); - if (count > 0 && needle[count] == 0) - return U_STRCHR (haystack, first_uc); - } -#endif - -#if UNIT_IS_UINT8_T - return (uint8_t *) strstr ((const char *) haystack, (const char *) needle); -#else - { - /* Minimizing the worst-case complexity: - Let n = U_STRLEN(haystack), m = U_STRLEN(needle). - The naïve algorithm is O(n*m) worst-case. - The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a - memory allocation. - To achieve linear complexity and yet amortize the cost of the - memory allocation, we activate the Knuth-Morris-Pratt algorithm - only once the naïve algorithm has already run for some time; more - precisely, when - - the outer loop count is >= 10, - - the average number of comparisons per outer loop is >= 5, - - the total number of comparisons is >= m. - But we try it only once. If the memory allocation attempt failed, - we don't retry it. */ - bool try_kmp = true; - size_t outer_loop_count = 0; - size_t comparison_count = 0; - size_t last_ccount = 0; /* last comparison count */ - const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */ - - /* Speed up the following searches of needle by caching its first - character. */ - UNIT b = *needle++; - - for (;; haystack++) + /* Search for needle's first unit. */ + for (; *haystack != 0; haystack++) + if (*haystack == first) { - if (*haystack == 0) - /* No match. */ - return NULL; - - /* See whether it's advisable to use an asymptotically faster - algorithm. */ - if (try_kmp - && outer_loop_count >= 10 - && comparison_count >= 5 * outer_loop_count) + /* Compare with needle's remaining units. */ + const UNIT *hptr = haystack + 1; + const UNIT *nptr = needle + 1; + for (;;) { - /* See if needle + comparison_count now reaches the end of - needle. */ - if (needle_last_ccount != NULL) - { - needle_last_ccount += - U_STRNLEN (needle_last_ccount, - comparison_count - last_ccount); - if (*needle_last_ccount == 0) - needle_last_ccount = NULL; - last_ccount = comparison_count; - } - if (needle_last_ccount == NULL) - { - /* Try the Knuth-Morris-Pratt algorithm. */ - const UNIT *result; - bool success = - knuth_morris_pratt (haystack, - needle - 1, U_STRLEN (needle - 1), - &result); - if (success) - return (UNIT *) result; - try_kmp = false; - } - } - - outer_loop_count++; - comparison_count++; - if (*haystack == b) - /* The first character matches. */ - { - const UNIT *rhaystack = haystack + 1; - const UNIT *rneedle = needle; - - for (;; rhaystack++, rneedle++) - { - if (*rneedle == 0) - /* Found a match. */ - return (UNIT *) haystack; - if (*rhaystack == 0) - /* No match. */ - return NULL; - comparison_count++; - if (*rhaystack != *rneedle) - /* Nothing in this round. */ - break; - } + if (*hptr != *nptr) + break; + hptr++; nptr++; + if (*nptr == 0) + return (UNIT *) haystack; } } - } -#endif + + return NULL; } |