1 files changed, 99 insertions, 17 deletions
diff --git a/lib/unistr/u-strstr.h b/lib/unistr/u-strstr.h
index df32be8b..459215ec 100644
--- a/lib/unistr/u-strstr.h
+++ b/lib/unistr/u-strstr.h
@@ -1,6 +1,6 @@
 /* Substring test for UTF-8/UTF-16/UTF-32 strings.
-   Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
-   Written by Bruno Haible <bruno@clisp.org>, 2002.
+   Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
 
    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published
@@ -24,26 +24,108 @@ FUNC (const UNIT *haystack, const UNIT *needle)
   if (first == 0)
     return (UNIT *) haystack;
 
-  /* Is needle nearly empty?  */
+  /* Is needle nearly empty (only one unit)?  */
   if (needle[1] == 0)
     return U_STRCHR (haystack, first);
 
-  /* Search for needle's first unit.  */
-  for (; *haystack != 0; haystack++)
-    if (*haystack == first)
+#ifdef U_STRMBTOUC
+  /* Is needle nearly empty (only one character)?  */
+  {
+    ucs4_t first_uc;
+    int count = U_STRMBTOUC (&first_uc, needle);
+    if (count > 0 && needle[count] == 0)
+      return U_STRCHR (haystack, first_uc);
+  }
+#endif
+
+#if UNIT_IS_UINT8_T
+  return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
+#else
+  {
+    /* Minimizing the worst-case complexity:
+       Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
+       The naïve algorithm is O(n*m) worst-case.
+       The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
+       memory allocation.
+       To achieve linear complexity and yet amortize the cost of the
+       memory allocation, we activate the Knuth-Morris-Pratt algorithm
+       only once the naïve algorithm has already run for some time; more
+       precisely, when
+         - the outer loop count is >= 10,
+         - the average number of comparisons per outer loop is >= 5,
+         - the total number of comparisons is >= m.
+       But we try it only once.  If the memory allocation attempt failed,
+       we don't retry it.  */
+    bool try_kmp = true;
+    size_t outer_loop_count = 0;
+    size_t comparison_count = 0;
+    size_t last_ccount = 0;                  /* last comparison count */
+    const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
+
+    /* Speed up the following searches of needle by caching its first
+       character.  */
+    UNIT b = *needle++;
+
+    for (;; haystack++)
       {
-        /* Compare with needle's remaining units.  */
-        const UNIT *hptr = haystack + 1;
-        const UNIT *nptr = needle + 1;
-        for (;;)
+        if (*haystack == 0)
+          /* No match.  */
+          return NULL;
+
+        /* See whether it's advisable to use an asymptotically faster
+           algorithm.  */
+        if (try_kmp
+            && outer_loop_count >= 10
+            && comparison_count >= 5 * outer_loop_count)
           {
-            if (*hptr != *nptr)
-              break;
-            hptr++; nptr++;
-            if (*nptr == 0)
-              return (UNIT *) haystack;
+            /* See if needle + comparison_count now reaches the end of
+               needle.  */
+            if (needle_last_ccount != NULL)
+              {
+                needle_last_ccount +=
+                  U_STRNLEN (needle_last_ccount,
+                             comparison_count - last_ccount);
+                if (*needle_last_ccount == 0)
+                  needle_last_ccount = NULL;
+                last_ccount = comparison_count;
+              }
+            if (needle_last_ccount == NULL)
+              {
+                /* Try the Knuth-Morris-Pratt algorithm.  */
+                const UNIT *result;
+                bool success =
+                  knuth_morris_pratt (haystack,
+                                      needle - 1, U_STRLEN (needle - 1),
+                                      &result);
+                if (success)
+                  return (UNIT *) result;
+                try_kmp = false;
+              }
           }
-      }
 
-  return NULL;
+        outer_loop_count++;
+        comparison_count++;
+        if (*haystack == b)
+          /* The first character matches.  */
+          {
+            const UNIT *rhaystack = haystack + 1;
+            const UNIT *rneedle = needle;
+
+            for (;; rhaystack++, rneedle++)
+              {
+                if (*rneedle == 0)
+                  /* Found a match.  */
+                  return (UNIT *) haystack;
+                if (*rhaystack == 0)
+                  /* No match.  */
+                  return NULL;
+                comparison_count++;
+                if (*rhaystack != *rneedle)
+                  /* Nothing in this round.  */
+                  break;
+              }
+          }
+      }
+  }
+#endif
 }