summaryrefslogtreecommitdiff
path: root/lib/unistr
diff options
context:
space:
mode:
authorManuel A. Fernandez Montecelo <manuel.montezelo@gmail.com>2016-05-26 16:48:39 +0100
committerManuel A. Fernandez Montecelo <manuel.montezelo@gmail.com>2016-05-26 16:48:39 +0100
commit8dfc46115527afe3706e9e4225e9ad019c97d695 (patch)
tree372d5192b218455834781a0037c57e919a06b488 /lib/unistr
parent2291cf138eb72ad38d8c59b8b6f762875c8c4ff2 (diff)
parent5f2b09982312c98863eb9a8dfe2c608b81f58259 (diff)
Merge tag 'upstream/0.9.6'
Upstream version 0.9.6
Diffstat (limited to 'lib/unistr')
-rw-r--r--lib/unistr/u-cmp2.h2
-rw-r--r--lib/unistr/u-cpy-alloc.h2
-rw-r--r--lib/unistr/u-cpy.h2
-rw-r--r--lib/unistr/u-endswith.h2
-rw-r--r--lib/unistr/u-move.h2
-rw-r--r--lib/unistr/u-set.h2
-rw-r--r--lib/unistr/u-startswith.h2
-rw-r--r--lib/unistr/u-stpcpy.h2
-rw-r--r--lib/unistr/u-stpncpy.h2
-rw-r--r--lib/unistr/u-strcat.h2
-rw-r--r--lib/unistr/u-strcoll.h21
-rw-r--r--lib/unistr/u-strcpy.h2
-rw-r--r--lib/unistr/u-strcspn.h2
-rw-r--r--lib/unistr/u-strdup.h2
-rw-r--r--lib/unistr/u-strlen.h2
-rw-r--r--lib/unistr/u-strncat.h2
-rw-r--r--lib/unistr/u-strncpy.h2
-rw-r--r--lib/unistr/u-strnlen.h2
-rw-r--r--lib/unistr/u-strpbrk.h2
-rw-r--r--lib/unistr/u-strspn.h2
-rw-r--r--lib/unistr/u-strstr.h116
-rw-r--r--lib/unistr/u-strtok.h4
-rw-r--r--lib/unistr/u16-check.c2
-rw-r--r--lib/unistr/u16-chr.c2
-rw-r--r--lib/unistr/u16-cmp.c2
-rw-r--r--lib/unistr/u16-cmp2.c2
-rw-r--r--lib/unistr/u16-cpy-alloc.c2
-rw-r--r--lib/unistr/u16-cpy.c2
-rw-r--r--lib/unistr/u16-endswith.c2
-rw-r--r--lib/unistr/u16-mblen.c2
-rw-r--r--lib/unistr/u16-mbsnlen.c2
-rw-r--r--lib/unistr/u16-mbtouc-aux.c2
-rw-r--r--lib/unistr/u16-mbtouc-unsafe-aux.c2
-rw-r--r--lib/unistr/u16-mbtouc-unsafe.c2
-rw-r--r--lib/unistr/u16-mbtouc.c2
-rw-r--r--lib/unistr/u16-mbtoucr.c2
-rw-r--r--lib/unistr/u16-move.c2
-rw-r--r--lib/unistr/u16-next.c2
-rw-r--r--lib/unistr/u16-prev.c2
-rw-r--r--lib/unistr/u16-set.c2
-rw-r--r--lib/unistr/u16-startswith.c2
-rw-r--r--lib/unistr/u16-stpcpy.c2
-rw-r--r--lib/unistr/u16-stpncpy.c2
-rw-r--r--lib/unistr/u16-strcat.c2
-rw-r--r--lib/unistr/u16-strchr.c2
-rw-r--r--lib/unistr/u16-strcmp.c2
-rw-r--r--lib/unistr/u16-strcoll.c4
-rw-r--r--lib/unistr/u16-strcpy.c2
-rw-r--r--lib/unistr/u16-strcspn.c2
-rw-r--r--lib/unistr/u16-strdup.c2
-rw-r--r--lib/unistr/u16-strlen.c2
-rw-r--r--lib/unistr/u16-strmblen.c2
-rw-r--r--lib/unistr/u16-strmbtouc.c2
-rw-r--r--lib/unistr/u16-strncat.c2
-rw-r--r--lib/unistr/u16-strncmp.c2
-rw-r--r--lib/unistr/u16-strncpy.c2
-rw-r--r--lib/unistr/u16-strnlen.c2
-rw-r--r--lib/unistr/u16-strpbrk.c2
-rw-r--r--lib/unistr/u16-strrchr.c2
-rw-r--r--lib/unistr/u16-strspn.c2
-rw-r--r--lib/unistr/u16-strstr.c13
-rw-r--r--lib/unistr/u16-strtok.c3
-rw-r--r--lib/unistr/u16-to-u32.c2
-rw-r--r--lib/unistr/u16-to-u8.c2
-rw-r--r--lib/unistr/u16-uctomb-aux.c2
-rw-r--r--lib/unistr/u16-uctomb.c2
-rw-r--r--lib/unistr/u32-check.c2
-rw-r--r--lib/unistr/u32-chr.c2
-rw-r--r--lib/unistr/u32-cmp.c2
-rw-r--r--lib/unistr/u32-cmp2.c2
-rw-r--r--lib/unistr/u32-cpy-alloc.c2
-rw-r--r--lib/unistr/u32-cpy.c2
-rw-r--r--lib/unistr/u32-endswith.c2
-rw-r--r--lib/unistr/u32-mblen.c2
-rw-r--r--lib/unistr/u32-mbsnlen.c2
-rw-r--r--lib/unistr/u32-mbtouc-unsafe.c2
-rw-r--r--lib/unistr/u32-mbtouc.c2
-rw-r--r--lib/unistr/u32-mbtoucr.c2
-rw-r--r--lib/unistr/u32-move.c2
-rw-r--r--lib/unistr/u32-next.c2
-rw-r--r--lib/unistr/u32-prev.c2
-rw-r--r--lib/unistr/u32-set.c2
-rw-r--r--lib/unistr/u32-startswith.c2
-rw-r--r--lib/unistr/u32-stpcpy.c2
-rw-r--r--lib/unistr/u32-stpncpy.c2
-rw-r--r--lib/unistr/u32-strcat.c2
-rw-r--r--lib/unistr/u32-strchr.c2
-rw-r--r--lib/unistr/u32-strcmp.c2
-rw-r--r--lib/unistr/u32-strcoll.c4
-rw-r--r--lib/unistr/u32-strcpy.c2
-rw-r--r--lib/unistr/u32-strcspn.c2
-rw-r--r--lib/unistr/u32-strdup.c2
-rw-r--r--lib/unistr/u32-strlen.c2
-rw-r--r--lib/unistr/u32-strmblen.c2
-rw-r--r--lib/unistr/u32-strmbtouc.c2
-rw-r--r--lib/unistr/u32-strncat.c2
-rw-r--r--lib/unistr/u32-strncmp.c2
-rw-r--r--lib/unistr/u32-strncpy.c2
-rw-r--r--lib/unistr/u32-strnlen.c2
-rw-r--r--lib/unistr/u32-strpbrk.c2
-rw-r--r--lib/unistr/u32-strrchr.c2
-rw-r--r--lib/unistr/u32-strspn.c2
-rw-r--r--lib/unistr/u32-strstr.c12
-rw-r--r--lib/unistr/u32-strtok.c3
-rw-r--r--lib/unistr/u32-to-u16.c2
-rw-r--r--lib/unistr/u32-to-u8.c2
-rw-r--r--lib/unistr/u32-uctomb.c2
-rw-r--r--lib/unistr/u8-check.c4
-rw-r--r--lib/unistr/u8-chr.c213
-rw-r--r--lib/unistr/u8-cmp.c2
-rw-r--r--lib/unistr/u8-cmp2.c2
-rw-r--r--lib/unistr/u8-cpy-alloc.c2
-rw-r--r--lib/unistr/u8-cpy.c2
-rw-r--r--lib/unistr/u8-endswith.c2
-rw-r--r--lib/unistr/u8-mblen.c4
-rw-r--r--lib/unistr/u8-mbsnlen.c6
-rw-r--r--lib/unistr/u8-mbtouc-aux.c166
-rw-r--r--lib/unistr/u8-mbtouc-unsafe-aux.c182
-rw-r--r--lib/unistr/u8-mbtouc-unsafe.c182
-rw-r--r--lib/unistr/u8-mbtouc.c166
-rw-r--r--lib/unistr/u8-mbtoucr.c2
-rw-r--r--lib/unistr/u8-move.c2
-rw-r--r--lib/unistr/u8-next.c2
-rw-r--r--lib/unistr/u8-prev.c4
-rw-r--r--lib/unistr/u8-set.c2
-rw-r--r--lib/unistr/u8-startswith.c2
-rw-r--r--lib/unistr/u8-stpcpy.c2
-rw-r--r--lib/unistr/u8-stpncpy.c4
-rw-r--r--lib/unistr/u8-strcat.c2
-rw-r--r--lib/unistr/u8-strchr.c191
-rw-r--r--lib/unistr/u8-strcmp.c2
-rw-r--r--lib/unistr/u8-strcoll.c4
-rw-r--r--lib/unistr/u8-strcpy.c2
-rw-r--r--lib/unistr/u8-strcspn.c2
-rw-r--r--lib/unistr/u8-strdup.c2
-rw-r--r--lib/unistr/u8-strlen.c2
-rw-r--r--lib/unistr/u8-strmblen.c4
-rw-r--r--lib/unistr/u8-strmbtouc.c4
-rw-r--r--lib/unistr/u8-strncat.c2
-rw-r--r--lib/unistr/u8-strncmp.c2
-rw-r--r--lib/unistr/u8-strncpy.c2
-rw-r--r--lib/unistr/u8-strnlen.c4
-rw-r--r--lib/unistr/u8-strpbrk.c2
-rw-r--r--lib/unistr/u8-strrchr.c2
-rw-r--r--lib/unistr/u8-strspn.c2
-rw-r--r--lib/unistr/u8-strstr.c6
-rw-r--r--lib/unistr/u8-strtok.c3
-rw-r--r--lib/unistr/u8-to-u16.c2
-rw-r--r--lib/unistr/u8-to-u32.c2
-rw-r--r--lib/unistr/u8-uctomb-aux.c2
-rw-r--r--lib/unistr/u8-uctomb.c2
151 files changed, 1145 insertions, 432 deletions
diff --git a/lib/unistr/u-cmp2.h b/lib/unistr/u-cmp2.h
index 6058c4a5..6ee062d7 100644
--- a/lib/unistr/u-cmp2.h
+++ b/lib/unistr/u-cmp2.h
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-cpy-alloc.h b/lib/unistr/u-cpy-alloc.h
index f36a8d03..5e3ad56d 100644
--- a/lib/unistr/u-cpy-alloc.h
+++ b/lib/unistr/u-cpy-alloc.h
@@ -1,5 +1,5 @@
/* Copy piece of UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u-cpy.h b/lib/unistr/u-cpy.h
index 6dad9525..2f9f9974 100644
--- a/lib/unistr/u-cpy.h
+++ b/lib/unistr/u-cpy.h
@@ -1,5 +1,5 @@
/* Copy piece of UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-endswith.h b/lib/unistr/u-endswith.h
index 094a87fb..cd366c1e 100644
--- a/lib/unistr/u-endswith.h
+++ b/lib/unistr/u-endswith.h
@@ -1,5 +1,5 @@
/* Substring test for UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-move.h b/lib/unistr/u-move.h
index d8d58b0a..13332a0f 100644
--- a/lib/unistr/u-move.h
+++ b/lib/unistr/u-move.h
@@ -1,5 +1,5 @@
/* Copy piece of UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-set.h b/lib/unistr/u-set.h
index de78a8eb..8cb27373 100644
--- a/lib/unistr/u-set.h
+++ b/lib/unistr/u-set.h
@@ -1,5 +1,5 @@
/* Fill UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-startswith.h b/lib/unistr/u-startswith.h
index 16966512..393dad54 100644
--- a/lib/unistr/u-startswith.h
+++ b/lib/unistr/u-startswith.h
@@ -1,5 +1,5 @@
/* Substring test for UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-stpcpy.h b/lib/unistr/u-stpcpy.h
index 483f3c2a..bbb36237 100644
--- a/lib/unistr/u-stpcpy.h
+++ b/lib/unistr/u-stpcpy.h
@@ -1,5 +1,5 @@
/* Copy UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-stpncpy.h b/lib/unistr/u-stpncpy.h
index 4d6dd3c7..545e1025 100644
--- a/lib/unistr/u-stpncpy.h
+++ b/lib/unistr/u-stpncpy.h
@@ -1,5 +1,5 @@
/* Copy UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strcat.h b/lib/unistr/u-strcat.h
index 84430fc9..4119d391 100644
--- a/lib/unistr/u-strcat.h
+++ b/lib/unistr/u-strcat.h
@@ -1,5 +1,5 @@
/* Concatenate UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strcoll.h b/lib/unistr/u-strcoll.h
index 9ec5c601..014e11bf 100644
--- a/lib/unistr/u-strcoll.h
+++ b/lib/unistr/u-strcoll.h
@@ -1,6 +1,6 @@
/* Compare UTF-8/UTF-16/UTF-32 strings using the collation rules of the current
locale.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -23,14 +23,19 @@ FUNC (const UNIT *s1, const UNIT *s2)
When it fails, it sets errno, but also returns a meaningful return value,
for the sake of callers which ignore errno. */
int final_errno = errno;
+ const char *encoding = locale_charset ();
char *sl1;
char *sl2;
int result;
- sl1 = U_STRCONV_TO_LOCALE (s1);
+ /* Pass iconveh_error here, not iconveh_question_mark. Otherwise the
+ conversion to locale encoding can do transliteration or map some
+ characters to question marks, leading to results that depend on the
+ iconv() implementation and are not obvious. */
+ sl1 = U_STRCONV_TO_ENCODING (s1, encoding, iconveh_error);
if (sl1 != NULL)
{
- sl2 = U_STRCONV_TO_LOCALE (s2);
+ sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error);
if (sl2 != NULL)
{
/* Compare sl1 and sl2. */
@@ -41,10 +46,10 @@ FUNC (const UNIT *s1, const UNIT *s2)
/* strcoll succeeded. */
free (sl1);
free (sl2);
- /* The conversion to locale encoding can do transliteration or
- map some characters to question marks. Therefore sl1 and sl2
- may be equal when s1 and s2 were in fact different. Return a
- nonzero result in this case. */
+ /* The conversion to locale encoding can drop Unicode TAG
+ characters. Therefore sl1 and sl2 may be equal when s1
+ and s2 were in fact different. Return a nonzero result
+ in this case. */
if (result == 0)
result = U_STRCMP (s1, s2);
}
@@ -68,7 +73,7 @@ FUNC (const UNIT *s1, const UNIT *s2)
else
{
final_errno = errno;
- sl2 = U_STRCONV_TO_LOCALE (s2);
+ sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error);
if (sl2 != NULL)
{
/* s2 could be converted to locale encoding, s1 not. */
diff --git a/lib/unistr/u-strcpy.h b/lib/unistr/u-strcpy.h
index b059aea4..aca68477 100644
--- a/lib/unistr/u-strcpy.h
+++ b/lib/unistr/u-strcpy.h
@@ -1,5 +1,5 @@
/* Copy UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strcspn.h b/lib/unistr/u-strcspn.h
index 77b95508..bb287d86 100644
--- a/lib/unistr/u-strcspn.h
+++ b/lib/unistr/u-strcspn.h
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strdup.h b/lib/unistr/u-strdup.h
index a92e1efa..80cb4549 100644
--- a/lib/unistr/u-strdup.h
+++ b/lib/unistr/u-strdup.h
@@ -1,5 +1,5 @@
/* Copy UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u-strlen.h b/lib/unistr/u-strlen.h
index a54d614c..11d9831d 100644
--- a/lib/unistr/u-strlen.h
+++ b/lib/unistr/u-strlen.h
@@ -1,5 +1,5 @@
/* Determine length of UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strncat.h b/lib/unistr/u-strncat.h
index e5c9a045..f202d326 100644
--- a/lib/unistr/u-strncat.h
+++ b/lib/unistr/u-strncat.h
@@ -1,5 +1,5 @@
/* Concatenate UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strncpy.h b/lib/unistr/u-strncpy.h
index 55a4f672..8354d0c7 100644
--- a/lib/unistr/u-strncpy.h
+++ b/lib/unistr/u-strncpy.h
@@ -1,5 +1,5 @@
/* Copy UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strnlen.h b/lib/unistr/u-strnlen.h
index 9bcc3da3..bcf1e032 100644
--- a/lib/unistr/u-strnlen.h
+++ b/lib/unistr/u-strnlen.h
@@ -1,5 +1,5 @@
/* Determine bounded length of UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strpbrk.h b/lib/unistr/u-strpbrk.h
index 34aabc53..60653abf 100644
--- a/lib/unistr/u-strpbrk.h
+++ b/lib/unistr/u-strpbrk.h
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strspn.h b/lib/unistr/u-strspn.h
index d6669af8..d15c46e6 100644
--- a/lib/unistr/u-strspn.h
+++ b/lib/unistr/u-strspn.h
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u-strstr.h b/lib/unistr/u-strstr.h
index df32be8b..459215ec 100644
--- a/lib/unistr/u-strstr.h
+++ b/lib/unistr/u-strstr.h
@@ -1,6 +1,6 @@
/* Substring test for UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
- Written by Bruno Haible <bruno@clisp.org>, 2002.
+ Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
@@ -24,26 +24,108 @@ FUNC (const UNIT *haystack, const UNIT *needle)
if (first == 0)
return (UNIT *) haystack;
- /* Is needle nearly empty? */
+ /* Is needle nearly empty (only one unit)? */
if (needle[1] == 0)
return U_STRCHR (haystack, first);
- /* Search for needle's first unit. */
- for (; *haystack != 0; haystack++)
- if (*haystack == first)
+#ifdef U_STRMBTOUC
+ /* Is needle nearly empty (only one character)? */
+ {
+ ucs4_t first_uc;
+ int count = U_STRMBTOUC (&first_uc, needle);
+ if (count > 0 && needle[count] == 0)
+ return U_STRCHR (haystack, first_uc);
+ }
+#endif
+
+#if UNIT_IS_UINT8_T
+ return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
+#else
+ {
+ /* Minimizing the worst-case complexity:
+ Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
+ The naïve algorithm is O(n*m) worst-case.
+ The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
+ memory allocation.
+ To achieve linear complexity and yet amortize the cost of the
+ memory allocation, we activate the Knuth-Morris-Pratt algorithm
+ only once the naïve algorithm has already run for some time; more
+ precisely, when
+ - the outer loop count is >= 10,
+ - the average number of comparisons per outer loop is >= 5,
+ - the total number of comparisons is >= m.
+ But we try it only once. If the memory allocation attempt failed,
+ we don't retry it. */
+ bool try_kmp = true;
+ size_t outer_loop_count = 0;
+ size_t comparison_count = 0;
+ size_t last_ccount = 0; /* last comparison count */
+ const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
+
+ /* Speed up the following searches of needle by caching its first
+ character. */
+ UNIT b = *needle++;
+
+ for (;; haystack++)
{
- /* Compare with needle's remaining units. */
- const UNIT *hptr = haystack + 1;
- const UNIT *nptr = needle + 1;
- for (;;)
+ if (*haystack == 0)
+ /* No match. */
+ return NULL;
+
+ /* See whether it's advisable to use an asymptotically faster
+ algorithm. */
+ if (try_kmp
+ && outer_loop_count >= 10
+ && comparison_count >= 5 * outer_loop_count)
{
- if (*hptr != *nptr)
- break;
- hptr++; nptr++;
- if (*nptr == 0)
- return (UNIT *) haystack;
+ /* See if needle + comparison_count now reaches the end of
+ needle. */
+ if (needle_last_ccount != NULL)
+ {
+ needle_last_ccount +=
+ U_STRNLEN (needle_last_ccount,
+ comparison_count - last_ccount);
+ if (*needle_last_ccount == 0)
+ needle_last_ccount = NULL;
+ last_ccount = comparison_count;
+ }
+ if (needle_last_ccount == NULL)
+ {
+ /* Try the Knuth-Morris-Pratt algorithm. */
+ const UNIT *result;
+ bool success =
+ knuth_morris_pratt (haystack,
+ needle - 1, U_STRLEN (needle - 1),
+ &result);
+ if (success)
+ return (UNIT *) result;
+ try_kmp = false;
+ }
}
- }
- return NULL;
+ outer_loop_count++;
+ comparison_count++;
+ if (*haystack == b)
+ /* The first character matches. */
+ {
+ const UNIT *rhaystack = haystack + 1;
+ const UNIT *rneedle = needle;
+
+ for (;; rhaystack++, rneedle++)
+ {
+ if (*rneedle == 0)
+ /* Found a match. */
+ return (UNIT *) haystack;
+ if (*rhaystack == 0)
+ /* No match. */
+ return NULL;
+ comparison_count++;
+ if (*rhaystack != *rneedle)
+ /* Nothing in this round. */
+ break;
+ }
+ }
+ }
+ }
+#endif
}
diff --git a/lib/unistr/u-strtok.h b/lib/unistr/u-strtok.h
index 7fdef578..edafa1ba 100644
--- a/lib/unistr/u-strtok.h
+++ b/lib/unistr/u-strtok.h
@@ -1,5 +1,5 @@
/* Tokenize UTF-8/UTF-16/UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -40,9 +40,9 @@ FUNC (UNIT *str, const UNIT *delim, UNIT **ptr)
UNIT *token_end = U_STRPBRK (str, delim);
if (token_end)
{
+ *ptr = token_end + U_STRMBLEN (token_end);
/* NUL-terminate the token. */
*token_end = 0;
- *ptr = token_end + 1;
}
else
*ptr = NULL;
diff --git a/lib/unistr/u16-check.c b/lib/unistr/u16-check.c
index 4f18383b..ef6e3340 100644
--- a/lib/unistr/u16-check.c
+++ b/lib/unistr/u16-check.c
@@ -1,5 +1,5 @@
/* Check UTF-16 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-chr.c b/lib/unistr/u16-chr.c
index 13deef4d..cbf8dd27 100644
--- a/lib/unistr/u16-chr.c
+++ b/lib/unistr/u16-chr.c
@@ -1,5 +1,5 @@
/* Search character in piece of UTF-16 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-cmp.c b/lib/unistr/u16-cmp.c
index b71c1c34..01059f84 100644
--- a/lib/unistr/u16-cmp.c
+++ b/lib/unistr/u16-cmp.c
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-16 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-cmp2.c b/lib/unistr/u16-cmp2.c
index 8c2a8d6e..687356fd 100644
--- a/lib/unistr/u16-cmp2.c
+++ b/lib/unistr/u16-cmp2.c
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-16 strings.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-cpy-alloc.c b/lib/unistr/u16-cpy-alloc.c
index 33984f9c..567855d6 100644
--- a/lib/unistr/u16-cpy-alloc.c
+++ b/lib/unistr/u16-cpy-alloc.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-cpy.c b/lib/unistr/u16-cpy.c
index 614d24c4..cdcf6af8 100644
--- a/lib/unistr/u16-cpy.c
+++ b/lib/unistr/u16-cpy.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-endswith.c b/lib/unistr/u16-endswith.c
index 27dcd5aa..ee7c392b 100644
--- a/lib/unistr/u16-endswith.c
+++ b/lib/unistr/u16-endswith.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-16 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-mblen.c b/lib/unistr/u16-mblen.c
index 9e7a93a1..6fab7e88 100644
--- a/lib/unistr/u16-mblen.c
+++ b/lib/unistr/u16-mblen.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-16 string.
- Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-mbsnlen.c b/lib/unistr/u16-mbsnlen.c
index 601d81d6..68f8d67b 100644
--- a/lib/unistr/u16-mbsnlen.c
+++ b/lib/unistr/u16-mbsnlen.c
@@ -1,5 +1,5 @@
/* Count characters in UTF-16 string.
- Copyright (C) 2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2007.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-mbtouc-aux.c b/lib/unistr/u16-mbtouc-aux.c
index bee77fcf..e4fb94b8 100644
--- a/lib/unistr/u16-mbtouc-aux.c
+++ b/lib/unistr/u16-mbtouc-aux.c
@@ -1,5 +1,5 @@
/* Conversion UTF-16 to UCS-4.
- Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-mbtouc-unsafe-aux.c b/lib/unistr/u16-mbtouc-unsafe-aux.c
index f2d72259..d616096d 100644
--- a/lib/unistr/u16-mbtouc-unsafe-aux.c
+++ b/lib/unistr/u16-mbtouc-unsafe-aux.c
@@ -1,5 +1,5 @@
/* Conversion UTF-16 to UCS-4.
- Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-mbtouc-unsafe.c b/lib/unistr/u16-mbtouc-unsafe.c
index a5a3638b..fef89114 100644
--- a/lib/unistr/u16-mbtouc-unsafe.c
+++ b/lib/unistr/u16-mbtouc-unsafe.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-16 string.
- Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-mbtouc.c b/lib/unistr/u16-mbtouc.c
index 26b60890..7855ed4c 100644
--- a/lib/unistr/u16-mbtouc.c
+++ b/lib/unistr/u16-mbtouc.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-16 string.
- Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-mbtoucr.c b/lib/unistr/u16-mbtoucr.c
index 00b7b708..65b33b79 100644
--- a/lib/unistr/u16-mbtoucr.c
+++ b/lib/unistr/u16-mbtoucr.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-16 string, returning an error code.
- Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-move.c b/lib/unistr/u16-move.c
index f6efb888..39a6fdd3 100644
--- a/lib/unistr/u16-move.c
+++ b/lib/unistr/u16-move.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-next.c b/lib/unistr/u16-next.c
index 8245f11a..1621b685 100644
--- a/lib/unistr/u16-next.c
+++ b/lib/unistr/u16-next.c
@@ -1,5 +1,5 @@
/* Iterate over next character in UTF-16 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-prev.c b/lib/unistr/u16-prev.c
index 4c902ad2..e1a68549 100644
--- a/lib/unistr/u16-prev.c
+++ b/lib/unistr/u16-prev.c
@@ -1,5 +1,5 @@
/* Iterate over previous character in UTF-16 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-set.c b/lib/unistr/u16-set.c
index 7bad3d61..58644b4f 100644
--- a/lib/unistr/u16-set.c
+++ b/lib/unistr/u16-set.c
@@ -1,5 +1,5 @@
/* Fill UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-startswith.c b/lib/unistr/u16-startswith.c
index 22a5d5fe..feb1ebda 100644
--- a/lib/unistr/u16-startswith.c
+++ b/lib/unistr/u16-startswith.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-16 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-stpcpy.c b/lib/unistr/u16-stpcpy.c
index 59467f7a..d49737fa 100644
--- a/lib/unistr/u16-stpcpy.c
+++ b/lib/unistr/u16-stpcpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-16 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-stpncpy.c b/lib/unistr/u16-stpncpy.c
index 49c616ef..315dad73 100644
--- a/lib/unistr/u16-stpncpy.c
+++ b/lib/unistr/u16-stpncpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strcat.c b/lib/unistr/u16-strcat.c
index 8b358683..9d933e12 100644
--- a/lib/unistr/u16-strcat.c
+++ b/lib/unistr/u16-strcat.c
@@ -1,5 +1,5 @@
/* Concatenate UTF-16 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strchr.c b/lib/unistr/u16-strchr.c
index 1f8719fa..c89c1744 100644
--- a/lib/unistr/u16-strchr.c
+++ b/lib/unistr/u16-strchr.c
@@ -1,5 +1,5 @@
/* Search character in UTF-16 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-strcmp.c b/lib/unistr/u16-strcmp.c
index b781211a..f9033efa 100644
--- a/lib/unistr/u16-strcmp.c
+++ b/lib/unistr/u16-strcmp.c
@@ -1,5 +1,5 @@
/* Compare UTF-16 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strcoll.c b/lib/unistr/u16-strcoll.c
index 5a504bf4..2a6d757b 100644
--- a/lib/unistr/u16-strcoll.c
+++ b/lib/unistr/u16-strcoll.c
@@ -1,5 +1,5 @@
/* Compare UTF-16 strings using the collation rules of the current locale.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -29,5 +29,5 @@
#define FUNC u16_strcoll
#define UNIT uint16_t
#define U_STRCMP u16_strcmp
-#define U_STRCONV_TO_LOCALE u16_strconv_to_locale
+#define U_STRCONV_TO_ENCODING u16_strconv_to_encoding
#include "u-strcoll.h"
diff --git a/lib/unistr/u16-strcpy.c b/lib/unistr/u16-strcpy.c
index ecde75da..5deb7660 100644
--- a/lib/unistr/u16-strcpy.c
+++ b/lib/unistr/u16-strcpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strcspn.c b/lib/unistr/u16-strcspn.c
index b18bb374..52309b31 100644
--- a/lib/unistr/u16-strcspn.c
+++ b/lib/unistr/u16-strcspn.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strdup.c b/lib/unistr/u16-strdup.c
index 4a944513..61144d70 100644
--- a/lib/unistr/u16-strdup.c
+++ b/lib/unistr/u16-strdup.c
@@ -1,5 +1,5 @@
/* Copy UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strlen.c b/lib/unistr/u16-strlen.c
index da613c2e..6a96c91e 100644
--- a/lib/unistr/u16-strlen.c
+++ b/lib/unistr/u16-strlen.c
@@ -1,5 +1,5 @@
/* Determine length of UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strmblen.c b/lib/unistr/u16-strmblen.c
index f07ca0b9..1004ef1d 100644
--- a/lib/unistr/u16-strmblen.c
+++ b/lib/unistr/u16-strmblen.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-16 string.
- Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-strmbtouc.c b/lib/unistr/u16-strmbtouc.c
index 9aa5d43c..89661e7b 100644
--- a/lib/unistr/u16-strmbtouc.c
+++ b/lib/unistr/u16-strmbtouc.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-16 string.
- Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-strncat.c b/lib/unistr/u16-strncat.c
index 70827160..cf1c4f9d 100644
--- a/lib/unistr/u16-strncat.c
+++ b/lib/unistr/u16-strncat.c
@@ -1,5 +1,5 @@
/* Concatenate UTF-16 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strncmp.c b/lib/unistr/u16-strncmp.c
index 3ed59c37..c76e7b72 100644
--- a/lib/unistr/u16-strncmp.c
+++ b/lib/unistr/u16-strncmp.c
@@ -1,5 +1,5 @@
/* Compare UTF-16 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strncpy.c b/lib/unistr/u16-strncpy.c
index bd2eb6f6..da7ebb82 100644
--- a/lib/unistr/u16-strncpy.c
+++ b/lib/unistr/u16-strncpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strnlen.c b/lib/unistr/u16-strnlen.c
index 49e6d58c..4940e026 100644
--- a/lib/unistr/u16-strnlen.c
+++ b/lib/unistr/u16-strnlen.c
@@ -1,5 +1,5 @@
/* Determine bounded length of UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strpbrk.c b/lib/unistr/u16-strpbrk.c
index 45353fa0..60ec1227 100644
--- a/lib/unistr/u16-strpbrk.c
+++ b/lib/unistr/u16-strpbrk.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strrchr.c b/lib/unistr/u16-strrchr.c
index 3cf3de2a..3179a8d9 100644
--- a/lib/unistr/u16-strrchr.c
+++ b/lib/unistr/u16-strrchr.c
@@ -1,5 +1,5 @@
/* Search character in UTF-16 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u16-strspn.c b/lib/unistr/u16-strspn.c
index ab812ebc..4a0194fc 100644
--- a/lib/unistr/u16-strspn.c
+++ b/lib/unistr/u16-strspn.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-strstr.c b/lib/unistr/u16-strstr.c
index 54a74d62..5b87e81a 100644
--- a/lib/unistr/u16-strstr.c
+++ b/lib/unistr/u16-strstr.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-16 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -20,9 +20,18 @@
/* Specification. */
#include "unistr.h"
+#include "malloca.h"
+
/* FIXME: Maybe walking the string via u16_mblen is a win? */
-#define FUNC u16_strstr
#define UNIT uint16_t
+
+#define CANON_ELEMENT(c) c
+#include "str-kmp.h"
+
+#define FUNC u16_strstr
#define U_STRCHR u16_strchr
+#define U_STRMBTOUC u16_strmbtouc
+#define U_STRLEN u16_strlen
+#define U_STRNLEN u16_strnlen
#include "u-strstr.h"
diff --git a/lib/unistr/u16-strtok.c b/lib/unistr/u16-strtok.c
index f84c4655..df36cf76 100644
--- a/lib/unistr/u16-strtok.c
+++ b/lib/unistr/u16-strtok.c
@@ -1,5 +1,5 @@
/* Tokenize UTF-16 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -24,4 +24,5 @@
#define UNIT uint16_t
#define U_STRSPN u16_strspn
#define U_STRPBRK u16_strpbrk
+#define U_STRMBLEN u16_strmblen
#include "u-strtok.h"
diff --git a/lib/unistr/u16-to-u32.c b/lib/unistr/u16-to-u32.c
index 3544cde3..8acee502 100644
--- a/lib/unistr/u16-to-u32.c
+++ b/lib/unistr/u16-to-u32.c
@@ -1,5 +1,5 @@
/* Convert UTF-16 string to UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-to-u8.c b/lib/unistr/u16-to-u8.c
index f92cc930..3e93a948 100644
--- a/lib/unistr/u16-to-u8.c
+++ b/lib/unistr/u16-to-u8.c
@@ -1,5 +1,5 @@
/* Convert UTF-16 string to UTF-8 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-uctomb-aux.c b/lib/unistr/u16-uctomb-aux.c
index e2acc9ee..55d7a14e 100644
--- a/lib/unistr/u16-uctomb-aux.c
+++ b/lib/unistr/u16-uctomb-aux.c
@@ -1,5 +1,5 @@
/* Conversion UCS-4 to UTF-16.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u16-uctomb.c b/lib/unistr/u16-uctomb.c
index cbc1abcf..b93fa6f9 100644
--- a/lib/unistr/u16-uctomb.c
+++ b/lib/unistr/u16-uctomb.c
@@ -1,5 +1,5 @@
/* Store a character in UTF-16 string.
- Copyright (C) 2002, 2005-2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-check.c b/lib/unistr/u32-check.c
index 8c5f5180..53b722bc 100644
--- a/lib/unistr/u32-check.c
+++ b/lib/unistr/u32-check.c
@@ -1,5 +1,5 @@
/* Check UTF-32 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-chr.c b/lib/unistr/u32-chr.c
index 19002889..1470c22f 100644
--- a/lib/unistr/u32-chr.c
+++ b/lib/unistr/u32-chr.c
@@ -1,5 +1,5 @@
/* Search character in piece of UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-cmp.c b/lib/unistr/u32-cmp.c
index 36496f74..7799e380 100644
--- a/lib/unistr/u32-cmp.c
+++ b/lib/unistr/u32-cmp.c
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-cmp2.c b/lib/unistr/u32-cmp2.c
index 32476025..b4e0a1b2 100644
--- a/lib/unistr/u32-cmp2.c
+++ b/lib/unistr/u32-cmp2.c
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-32 strings.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-cpy-alloc.c b/lib/unistr/u32-cpy-alloc.c
index b9ebefe0..3e94298a 100644
--- a/lib/unistr/u32-cpy-alloc.c
+++ b/lib/unistr/u32-cpy-alloc.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-cpy.c b/lib/unistr/u32-cpy.c
index 747430be..ab5a451a 100644
--- a/lib/unistr/u32-cpy.c
+++ b/lib/unistr/u32-cpy.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-endswith.c b/lib/unistr/u32-endswith.c
index cee0d939..3960c8a9 100644
--- a/lib/unistr/u32-endswith.c
+++ b/lib/unistr/u32-endswith.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-32 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-mblen.c b/lib/unistr/u32-mblen.c
index 4530fe42..45a9f49f 100644
--- a/lib/unistr/u32-mblen.c
+++ b/lib/unistr/u32-mblen.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-mbsnlen.c b/lib/unistr/u32-mbsnlen.c
index 0a71293e..371959e5 100644
--- a/lib/unistr/u32-mbsnlen.c
+++ b/lib/unistr/u32-mbsnlen.c
@@ -1,5 +1,5 @@
/* Count characters in UTF-32 string.
- Copyright (C) 2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2007.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-mbtouc-unsafe.c b/lib/unistr/u32-mbtouc-unsafe.c
index 8dbbfe45..32448118 100644
--- a/lib/unistr/u32-mbtouc-unsafe.c
+++ b/lib/unistr/u32-mbtouc-unsafe.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-mbtouc.c b/lib/unistr/u32-mbtouc.c
index 05f7c774..5d4de518 100644
--- a/lib/unistr/u32-mbtouc.c
+++ b/lib/unistr/u32-mbtouc.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-mbtoucr.c b/lib/unistr/u32-mbtoucr.c
index 83f77820..faa5695a 100644
--- a/lib/unistr/u32-mbtoucr.c
+++ b/lib/unistr/u32-mbtoucr.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-32 string, returning an error code.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-move.c b/lib/unistr/u32-move.c
index 6362d853..fc2aab1f 100644
--- a/lib/unistr/u32-move.c
+++ b/lib/unistr/u32-move.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-next.c b/lib/unistr/u32-next.c
index 987fb027..8be330ed 100644
--- a/lib/unistr/u32-next.c
+++ b/lib/unistr/u32-next.c
@@ -1,5 +1,5 @@
/* Iterate over next character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-prev.c b/lib/unistr/u32-prev.c
index f20b1b79..127029d8 100644
--- a/lib/unistr/u32-prev.c
+++ b/lib/unistr/u32-prev.c
@@ -1,5 +1,5 @@
/* Iterate over previous character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-set.c b/lib/unistr/u32-set.c
index 5c517cd1..a60f5a80 100644
--- a/lib/unistr/u32-set.c
+++ b/lib/unistr/u32-set.c
@@ -1,5 +1,5 @@
/* Fill UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-startswith.c b/lib/unistr/u32-startswith.c
index 7fcb5979..817310ff 100644
--- a/lib/unistr/u32-startswith.c
+++ b/lib/unistr/u32-startswith.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-32 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-stpcpy.c b/lib/unistr/u32-stpcpy.c
index 3cbfa924..fdd5f5ed 100644
--- a/lib/unistr/u32-stpcpy.c
+++ b/lib/unistr/u32-stpcpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-stpncpy.c b/lib/unistr/u32-stpncpy.c
index e55891e7..124101ec 100644
--- a/lib/unistr/u32-stpncpy.c
+++ b/lib/unistr/u32-stpncpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strcat.c b/lib/unistr/u32-strcat.c
index 4e26bb48..5e51c584 100644
--- a/lib/unistr/u32-strcat.c
+++ b/lib/unistr/u32-strcat.c
@@ -1,5 +1,5 @@
/* Concatenate UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strchr.c b/lib/unistr/u32-strchr.c
index b5df7b7d..355ad27f 100644
--- a/lib/unistr/u32-strchr.c
+++ b/lib/unistr/u32-strchr.c
@@ -1,5 +1,5 @@
/* Search character in UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strcmp.c b/lib/unistr/u32-strcmp.c
index 5f336ce9..16f9b1e0 100644
--- a/lib/unistr/u32-strcmp.c
+++ b/lib/unistr/u32-strcmp.c
@@ -1,5 +1,5 @@
/* Compare UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strcoll.c b/lib/unistr/u32-strcoll.c
index 97488553..7865f4cf 100644
--- a/lib/unistr/u32-strcoll.c
+++ b/lib/unistr/u32-strcoll.c
@@ -1,5 +1,5 @@
/* Compare UTF-32 strings using the collation rules of the current locale.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -29,5 +29,5 @@
#define FUNC u32_strcoll
#define UNIT uint32_t
#define U_STRCMP u32_strcmp
-#define U_STRCONV_TO_LOCALE u32_strconv_to_locale
+#define U_STRCONV_TO_ENCODING u32_strconv_to_encoding
#include "u-strcoll.h"
diff --git a/lib/unistr/u32-strcpy.c b/lib/unistr/u32-strcpy.c
index f3c70e6c..9f325bcb 100644
--- a/lib/unistr/u32-strcpy.c
+++ b/lib/unistr/u32-strcpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strcspn.c b/lib/unistr/u32-strcspn.c
index 595d32e9..963c7326 100644
--- a/lib/unistr/u32-strcspn.c
+++ b/lib/unistr/u32-strcspn.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strdup.c b/lib/unistr/u32-strdup.c
index da4cc5aa..3af46228 100644
--- a/lib/unistr/u32-strdup.c
+++ b/lib/unistr/u32-strdup.c
@@ -1,5 +1,5 @@
/* Copy UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strlen.c b/lib/unistr/u32-strlen.c
index 95d32abd..b87f55df 100644
--- a/lib/unistr/u32-strlen.c
+++ b/lib/unistr/u32-strlen.c
@@ -1,5 +1,5 @@
/* Determine length of UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strmblen.c b/lib/unistr/u32-strmblen.c
index 54b78d7f..362ea48f 100644
--- a/lib/unistr/u32-strmblen.c
+++ b/lib/unistr/u32-strmblen.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strmbtouc.c b/lib/unistr/u32-strmbtouc.c
index 4c067b9d..cfa89d02 100644
--- a/lib/unistr/u32-strmbtouc.c
+++ b/lib/unistr/u32-strmbtouc.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strncat.c b/lib/unistr/u32-strncat.c
index c9260b0f..4758f4d7 100644
--- a/lib/unistr/u32-strncat.c
+++ b/lib/unistr/u32-strncat.c
@@ -1,5 +1,5 @@
/* Concatenate UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strncmp.c b/lib/unistr/u32-strncmp.c
index dcbb9352..f4222f6a 100644
--- a/lib/unistr/u32-strncmp.c
+++ b/lib/unistr/u32-strncmp.c
@@ -1,5 +1,5 @@
/* Compare UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strncpy.c b/lib/unistr/u32-strncpy.c
index 9a54f97e..1f4c31bc 100644
--- a/lib/unistr/u32-strncpy.c
+++ b/lib/unistr/u32-strncpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strnlen.c b/lib/unistr/u32-strnlen.c
index 8102ac7f..df3601fa 100644
--- a/lib/unistr/u32-strnlen.c
+++ b/lib/unistr/u32-strnlen.c
@@ -1,5 +1,5 @@
/* Determine bounded length of UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strpbrk.c b/lib/unistr/u32-strpbrk.c
index 2c4b6bc8..c9cca79c 100644
--- a/lib/unistr/u32-strpbrk.c
+++ b/lib/unistr/u32-strpbrk.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strrchr.c b/lib/unistr/u32-strrchr.c
index f0030e5c..242d4b8d 100644
--- a/lib/unistr/u32-strrchr.c
+++ b/lib/unistr/u32-strrchr.c
@@ -1,5 +1,5 @@
/* Search character in UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strspn.c b/lib/unistr/u32-strspn.c
index e2571d5b..2541e790 100644
--- a/lib/unistr/u32-strspn.c
+++ b/lib/unistr/u32-strspn.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-strstr.c b/lib/unistr/u32-strstr.c
index cad06fcc..c8abe5ba 100644
--- a/lib/unistr/u32-strstr.c
+++ b/lib/unistr/u32-strstr.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-32 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -20,7 +20,15 @@
/* Specification. */
#include "unistr.h"
-#define FUNC u32_strstr
+#include "malloca.h"
+
#define UNIT uint32_t
+
+#define CANON_ELEMENT(c) c
+#include "str-kmp.h"
+
+#define FUNC u32_strstr
#define U_STRCHR u32_strchr
+#define U_STRLEN u32_strlen
+#define U_STRNLEN u32_strnlen
#include "u-strstr.h"
diff --git a/lib/unistr/u32-strtok.c b/lib/unistr/u32-strtok.c
index 067122f0..f8ef999f 100644
--- a/lib/unistr/u32-strtok.c
+++ b/lib/unistr/u32-strtok.c
@@ -1,5 +1,5 @@
/* Tokenize UTF-32 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -24,4 +24,5 @@
#define UNIT uint32_t
#define U_STRSPN u32_strspn
#define U_STRPBRK u32_strpbrk
+#define U_STRMBLEN u32_strmblen
#include "u-strtok.h"
diff --git a/lib/unistr/u32-to-u16.c b/lib/unistr/u32-to-u16.c
index be327703..e02e7825 100644
--- a/lib/unistr/u32-to-u16.c
+++ b/lib/unistr/u32-to-u16.c
@@ -1,5 +1,5 @@
/* Convert UTF-32 string to UTF-16 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-to-u8.c b/lib/unistr/u32-to-u8.c
index 36710d3c..0c4f4cb2 100644
--- a/lib/unistr/u32-to-u8.c
+++ b/lib/unistr/u32-to-u8.c
@@ -1,5 +1,5 @@
/* Convert UTF-32 string to UTF-8 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u32-uctomb.c b/lib/unistr/u32-uctomb.c
index 3ac58b43..18b2f0da 100644
--- a/lib/unistr/u32-uctomb.c
+++ b/lib/unistr/u32-uctomb.c
@@ -1,5 +1,5 @@
/* Store a character in UTF-32 string.
- Copyright (C) 2002, 2005-2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-check.c b/lib/unistr/u8-check.c
index 368042b0..53ece761 100644
--- a/lib/unistr/u8-check.c
+++ b/lib/unistr/u8-check.c
@@ -1,5 +1,5 @@
/* Check UTF-8 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -27,7 +27,7 @@ u8_check (const uint8_t *s, size_t n)
while (s < s_end)
{
- /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
uint8_t c = *s;
if (c < 0x80)
diff --git a/lib/unistr/u8-chr.c b/lib/unistr/u8-chr.c
index 435d1be6..c7779d2a 100644
--- a/lib/unistr/u8-chr.c
+++ b/lib/unistr/u8-chr.c
@@ -1,5 +1,5 @@
/* Search character in piece of UTF-8 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
@@ -21,68 +21,181 @@
/* Specification. */
#include "unistr.h"
+#include <string.h>
+
uint8_t *
u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
{
- uint8_t c[6];
-
if (uc < 0x80)
{
uint8_t c0 = uc;
- for (; n > 0; s++, n--)
- {
- if (*s == c0)
- return (uint8_t *) s;
- }
+ return (uint8_t *) memchr ((const char *) s, c0, n);
}
- else
- switch (u8_uctomb_aux (c, uc, 6))
+
+ {
+ uint8_t c[6];
+ size_t uc_size;
+ uc_size = u8_uctomb_aux (c, uc, 6);
+
+ if (n < uc_size)
+ return NULL;
+
+ /* For multibyte character matching we use a Boyer-Moore like
+ algorithm that searches for the last byte, skipping multi-byte
+ jumps, and matches back from there.
+
+ Instead of using a table as is usual for Boyer-Moore, we compare
+ the candidate last byte s[UC_SIZE-1] with each of the possible
+ bytes in the UTF-8 representation of UC. If the final byte does
+ not match, we will perform up to UC_SIZE comparisons per memory
+ load---but each comparison lets us skip one byte in the input!
+
+ If the final byte matches, the "real" Boyer-Moore algorithm
+ is approximated. Instead, u8_chr just looks for other cN that
+ are equal to the final byte and uses those to try realigning to
+ another possible match. For example, when searching for 0xF0
+ 0xAA 0xBB 0xAA it will always skip forward by two bytes, even if
+ the character in the string was for example 0xF1 0xAA 0xBB 0xAA.
+ The advantage of this scheme is that the skip count after a failed
+ match can be computed outside the loop, and that it keeps the
+ complexity low for a pretty rare case. In particular, since c[0]
+ is never between 0x80 and 0xBF, c[0] is never equal to c[UC_SIZE-1]
+ and this is optimal for two-byte UTF-8 characters. */
+ switch (uc_size)
{
case 2:
- if (n > 1)
- {
- uint8_t c0 = c[0];
- uint8_t c1 = c[1];
-
- for (n--; n > 0; s++, n--)
- {
- if (*s == c0 && s[1] == c1)
- return (uint8_t *) s;
- }
- }
- break;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ const uint8_t *end = s + n - 1;
+
+ do
+ {
+ /* Here s < end.
+ Test whether s[0..1] == { c0, c1 }. */
+ uint8_t s1 = s[1];
+ if (s1 == c1)
+ {
+ if (*s == c0)
+ return (uint8_t *) s;
+ else
+ /* Skip the search at s + 1, because s[1] = c1 < c0. */
+ s += 2;
+ }
+ else
+ {
+ if (s1 == c0)
+ s++;
+ else
+ /* Skip the search at s + 1, because s[1] != c0. */
+ s += 2;
+ }
+ }
+ while (s < end);
+ break;
+ }
case 3:
- if (n > 2)
- {
- uint8_t c0 = c[0];
- uint8_t c1 = c[1];
- uint8_t c2 = c[2];
-
- for (n -= 2; n > 0; s++, n--)
- {
- if (*s == c0 && s[1] == c1 && s[2] == c2)
- return (uint8_t *) s;
- }
- }
- break;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ const uint8_t *end = s + n - 2;
+ size_t skip;
+
+ if (c2 == c1)
+ skip = 1;
+ else
+ skip = 3;
+
+ do
+ {
+ /* Here s < end.
+ Test whether s[0..2] == { c0, c1, c2 }. */
+ uint8_t s2 = s[2];
+ if (s2 == c2)
+ {
+ if (s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c2 != c1:
+ Skip the search at s + 1, because s[2] == c2 != c1.
+ Skip the search at s + 2, because s[2] == c2 < c0. */
+ s += skip;
+ }
+ else
+ {
+ if (s2 == c1)
+ s++;
+ else if (s2 == c0)
+ /* Skip the search at s + 1, because s[2] != c1. */
+ s += 2;
+ else
+ /* Skip the search at s + 1, because s[2] != c1.
+ Skip the search at s + 2, because s[2] != c0. */
+ s += 3;
+ }
+ }
+ while (s < end);
+ break;
+ }
case 4:
- if (n > 3)
- {
- uint8_t c0 = c[0];
- uint8_t c1 = c[1];
- uint8_t c2 = c[2];
- uint8_t c3 = c[3];
-
- for (n -= 3; n > 0; s++, n--)
- {
- if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3)
- return (uint8_t *) s;
- }
- }
- break;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+ const uint8_t *end = s + n - 3;
+ size_t skip;
+
+ if (c3 == c2)
+ skip = 1;
+ else if (c3 == c1)
+ skip = 2;
+ else
+ skip = 4;
+
+ do
+ {
+ /* Here s < end.
+ Test whether s[0..3] == { c0, c1, c2, c3 }. */
+ uint8_t s3 = s[3];
+ if (s3 == c3)
+ {
+ if (s[2] == c2 && s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c3 != c2:
+ Skip the search at s + 1, because s[3] == c3 != c2.
+ If c3 != c1:
+ Skip the search at s + 2, because s[3] == c3 != c1.
+ Skip the search at s + 3, because s[3] == c3 < c0. */
+ s += skip;
+ }
+ else
+ {
+ if (s3 == c2)
+ s++;
+ else if (s3 == c1)
+ /* Skip the search at s + 1, because s[3] != c2. */
+ s += 2;
+ else if (s3 == c0)
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1. */
+ s += 3;
+ else
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1.
+ Skip the search at s + 3, because s[3] != c0. */
+ s += 4;
+ }
+ }
+ while (s < end);
+ break;
+ }
}
- return NULL;
+ return NULL;
+ }
}
diff --git a/lib/unistr/u8-cmp.c b/lib/unistr/u8-cmp.c
index d021b1ab..402de810 100644
--- a/lib/unistr/u8-cmp.c
+++ b/lib/unistr/u8-cmp.c
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-cmp2.c b/lib/unistr/u8-cmp2.c
index 8a97db40..dbbc8ede 100644
--- a/lib/unistr/u8-cmp2.c
+++ b/lib/unistr/u8-cmp2.c
@@ -1,5 +1,5 @@
/* Compare pieces of UTF-8 strings.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-cpy-alloc.c b/lib/unistr/u8-cpy-alloc.c
index 18845165..1e3910a3 100644
--- a/lib/unistr/u8-cpy-alloc.c
+++ b/lib/unistr/u8-cpy-alloc.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-cpy.c b/lib/unistr/u8-cpy.c
index 2f29200e..683200c4 100644
--- a/lib/unistr/u8-cpy.c
+++ b/lib/unistr/u8-cpy.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-endswith.c b/lib/unistr/u8-endswith.c
index 833b3e18..a835dc48 100644
--- a/lib/unistr/u8-endswith.c
+++ b/lib/unistr/u8-endswith.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-mblen.c b/lib/unistr/u8-mblen.c
index f6a20271..131149b8 100644
--- a/lib/unistr/u8-mblen.c
+++ b/lib/unistr/u8-mblen.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-8 string.
- Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
@@ -26,7 +26,7 @@ u8_mblen (const uint8_t *s, size_t n)
{
if (n > 0)
{
- /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
uint8_t c = *s;
if (c < 0x80)
diff --git a/lib/unistr/u8-mbsnlen.c b/lib/unistr/u8-mbsnlen.c
index 9ddc42ea..ce13c387 100644
--- a/lib/unistr/u8-mbsnlen.c
+++ b/lib/unistr/u8-mbsnlen.c
@@ -1,5 +1,5 @@
/* Count characters in UTF-8 string.
- Copyright (C) 2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2007.
This program is free software: you can redistribute it and/or modify it
@@ -33,7 +33,9 @@ u8_mbsnlen (const uint8_t *s, size_t n)
characters++;
if (count == -2)
break;
- if (count <= 0)
+ if (count < 0)
+ count = u8_mbtouc (&uc, s, n);
+ else if (count == 0)
count = 1;
s += count;
n -= count;
diff --git a/lib/unistr/u8-mbtouc-aux.c b/lib/unistr/u8-mbtouc-aux.c
index c9975896..834725fe 100644
--- a/lib/unistr/u8-mbtouc-aux.c
+++ b/lib/unistr/u8-mbtouc-aux.c
@@ -1,5 +1,5 @@
/* Conversion UTF-8 to UCS-4.
- Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
@@ -45,21 +45,32 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ return 1;
}
}
else if (c < 0xf0)
{
if (n >= 3)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (c >= 0xe1 || s[1] >= 0xa0)
- && (c != 0xed || s[1] < 0xa0))
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x0f) << 12)
- | ((unsigned int) (s[1] ^ 0x80) << 6)
- | (unsigned int) (s[2] ^ 0x80);
- return 3;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
}
@@ -67,26 +78,45 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
}
}
else if (c < 0xf8)
{
if (n >= 4)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40
- && (c >= 0xf1 || s[1] >= 0x90)
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
#if 1
- && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
- )
- {
- *puc = ((unsigned int) (c & 0x07) << 18)
- | ((unsigned int) (s[1] ^ 0x80) << 12)
- | ((unsigned int) (s[2] ^ 0x80) << 6)
- | (unsigned int) (s[3] ^ 0x80);
- return 4;
+ )
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
}
@@ -94,7 +124,12 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
}
}
#if 0
@@ -102,16 +137,37 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
if (n >= 5)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (c >= 0xf9 || s[1] >= 0x88))
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x03) << 24)
- | ((unsigned int) (s[1] ^ 0x80) << 18)
- | ((unsigned int) (s[2] ^ 0x80) << 12)
- | ((unsigned int) (s[3] ^ 0x80) << 6)
- | (unsigned int) (s[4] ^ 0x80);
- return 5;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
}
@@ -126,18 +182,44 @@ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
if (n >= 6)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (s[5] ^ 0x80) < 0x40
- && (c >= 0xfd || s[1] >= 0x84))
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x01) << 30)
- | ((unsigned int) (s[1] ^ 0x80) << 24)
- | ((unsigned int) (s[2] ^ 0x80) << 18)
- | ((unsigned int) (s[3] ^ 0x80) << 12)
- | ((unsigned int) (s[4] ^ 0x80) << 6)
- | (unsigned int) (s[5] ^ 0x80);
- return 6;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
}
diff --git a/lib/unistr/u8-mbtouc-unsafe-aux.c b/lib/unistr/u8-mbtouc-unsafe-aux.c
index 47590e39..b406d3eb 100644
--- a/lib/unistr/u8-mbtouc-unsafe-aux.c
+++ b/lib/unistr/u8-mbtouc-unsafe-aux.c
@@ -1,5 +1,5 @@
/* Conversion UTF-8 to UCS-4.
- Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
@@ -41,13 +41,15 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
+#if CONFIG_UNICODE_SAFETY
/* invalid multibyte character */
+#endif
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ return 1;
}
}
else if (c < 0xf0)
@@ -55,23 +57,39 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 3)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (c >= 0xe1 || s[1] >= 0xa0)
- && (c != 0xed || s[1] < 0xa0))
-#endif
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x0f) << 12)
- | ((unsigned int) (s[1] ^ 0x80) << 6)
- | (unsigned int) (s[2] ^ 0x80);
- return 3;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
}
}
else if (c < 0xf8)
@@ -79,28 +97,51 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 4)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40
- && (c >= 0xf1 || s[1] >= 0x90)
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
#if 1
- && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
- )
+ )
#endif
- {
- *puc = ((unsigned int) (c & 0x07) << 18)
- | ((unsigned int) (s[1] ^ 0x80) << 12)
- | ((unsigned int) (s[2] ^ 0x80) << 6)
- | (unsigned int) (s[3] ^ 0x80);
- return 4;
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
}
}
#if 0
@@ -109,19 +150,42 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 5)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (c >= 0xf9 || s[1] >= 0x88))
-#endif
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x03) << 24)
- | ((unsigned int) (s[1] ^ 0x80) << 18)
- | ((unsigned int) (s[2] ^ 0x80) << 12)
- | ((unsigned int) (s[3] ^ 0x80) << 6)
- | (unsigned int) (s[4] ^ 0x80);
- return 5;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
@@ -135,21 +199,49 @@ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 6)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (s[5] ^ 0x80) < 0x40
- && (c >= 0xfd || s[1] >= 0x84))
-#endif
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x01) << 30)
- | ((unsigned int) (s[1] ^ 0x80) << 24)
- | ((unsigned int) (s[2] ^ 0x80) << 18)
- | ((unsigned int) (s[3] ^ 0x80) << 12)
- | ((unsigned int) (s[4] ^ 0x80) << 6)
- | (unsigned int) (s[5] ^ 0x80);
- return 6;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
diff --git a/lib/unistr/u8-mbtouc-unsafe.c b/lib/unistr/u8-mbtouc-unsafe.c
index 41583f96..01d12dc4 100644
--- a/lib/unistr/u8-mbtouc-unsafe.c
+++ b/lib/unistr/u8-mbtouc-unsafe.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-8 string.
- Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
@@ -52,13 +52,15 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
+#if CONFIG_UNICODE_SAFETY
/* invalid multibyte character */
+#endif
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ return 1;
}
}
else if (c < 0xf0)
@@ -66,23 +68,39 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 3)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (c >= 0xe1 || s[1] >= 0xa0)
- && (c != 0xed || s[1] < 0xa0))
-#endif
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x0f) << 12)
- | ((unsigned int) (s[1] ^ 0x80) << 6)
- | (unsigned int) (s[2] ^ 0x80);
- return 3;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
}
}
else if (c < 0xf8)
@@ -90,28 +108,51 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 4)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40
- && (c >= 0xf1 || s[1] >= 0x90)
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
#if 1
- && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
- )
+ )
#endif
- {
- *puc = ((unsigned int) (c & 0x07) << 18)
- | ((unsigned int) (s[1] ^ 0x80) << 12)
- | ((unsigned int) (s[2] ^ 0x80) << 6)
- | (unsigned int) (s[3] ^ 0x80);
- return 4;
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
}
}
#if 0
@@ -120,19 +161,42 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 5)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (c >= 0xf9 || s[1] >= 0x88))
-#endif
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x03) << 24)
- | ((unsigned int) (s[1] ^ 0x80) << 18)
- | ((unsigned int) (s[2] ^ 0x80) << 12)
- | ((unsigned int) (s[3] ^ 0x80) << 6)
- | (unsigned int) (s[4] ^ 0x80);
- return 5;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
@@ -146,21 +210,49 @@ u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
if (n >= 6)
{
#if CONFIG_UNICODE_SAFETY
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (s[5] ^ 0x80) < 0x40
- && (c >= 0xfd || s[1] >= 0x84))
-#endif
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x01) << 30)
- | ((unsigned int) (s[1] ^ 0x80) << 24)
- | ((unsigned int) (s[2] ^ 0x80) << 18)
- | ((unsigned int) (s[3] ^ 0x80) << 12)
- | ((unsigned int) (s[4] ^ 0x80) << 6)
- | (unsigned int) (s[5] ^ 0x80);
- return 6;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
+#endif
}
else
{
diff --git a/lib/unistr/u8-mbtouc.c b/lib/unistr/u8-mbtouc.c
index 96cd5b74..dc4607f6 100644
--- a/lib/unistr/u8-mbtouc.c
+++ b/lib/unistr/u8-mbtouc.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-8 string.
- Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
@@ -55,21 +55,32 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ return 1;
}
}
else if (c < 0xf0)
{
if (n >= 3)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (c >= 0xe1 || s[1] >= 0xa0)
- && (c != 0xed || s[1] < 0xa0))
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x0f) << 12)
- | ((unsigned int) (s[1] ^ 0x80) << 6)
- | (unsigned int) (s[2] ^ 0x80);
- return 3;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
}
@@ -77,26 +88,45 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
}
}
else if (c < 0xf8)
{
if (n >= 4)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40
- && (c >= 0xf1 || s[1] >= 0x90)
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
#if 1
- && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
- )
- {
- *puc = ((unsigned int) (c & 0x07) << 18)
- | ((unsigned int) (s[1] ^ 0x80) << 12)
- | ((unsigned int) (s[2] ^ 0x80) << 6)
- | (unsigned int) (s[3] ^ 0x80);
- return 4;
+ )
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
}
/* invalid multibyte character */
}
@@ -104,7 +134,12 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
/* incomplete multibyte character */
*puc = 0xfffd;
- return n;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
}
}
#if 0
@@ -112,16 +147,37 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
if (n >= 5)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (c >= 0xf9 || s[1] >= 0x88))
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x03) << 24)
- | ((unsigned int) (s[1] ^ 0x80) << 18)
- | ((unsigned int) (s[2] ^ 0x80) << 12)
- | ((unsigned int) (s[3] ^ 0x80) << 6)
- | (unsigned int) (s[4] ^ 0x80);
- return 5;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
}
@@ -136,18 +192,44 @@ u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
if (n >= 6)
{
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (s[5] ^ 0x80) < 0x40
- && (c >= 0xfd || s[1] >= 0x84))
+ if ((s[1] ^ 0x80) < 0x40)
{
- *puc = ((unsigned int) (c & 0x01) << 30)
- | ((unsigned int) (s[1] ^ 0x80) << 24)
- | ((unsigned int) (s[2] ^ 0x80) << 18)
- | ((unsigned int) (s[3] ^ 0x80) << 12)
- | ((unsigned int) (s[4] ^ 0x80) << 6)
- | (unsigned int) (s[5] ^ 0x80);
- return 6;
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
}
/* invalid multibyte character */
}
diff --git a/lib/unistr/u8-mbtoucr.c b/lib/unistr/u8-mbtoucr.c
index a749c48c..3d8c05f7 100644
--- a/lib/unistr/u8-mbtoucr.c
+++ b/lib/unistr/u8-mbtoucr.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-8 string, returning an error code.
- Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-move.c b/lib/unistr/u8-move.c
index 5c48411b..5c872ca7 100644
--- a/lib/unistr/u8-move.c
+++ b/lib/unistr/u8-move.c
@@ -1,5 +1,5 @@
/* Copy piece of UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-next.c b/lib/unistr/u8-next.c
index 34249aa5..8c218a20 100644
--- a/lib/unistr/u8-next.c
+++ b/lib/unistr/u8-next.c
@@ -1,5 +1,5 @@
/* Iterate over next character in UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-prev.c b/lib/unistr/u8-prev.c
index 97a27f55..e01551e5 100644
--- a/lib/unistr/u8-prev.c
+++ b/lib/unistr/u8-prev.c
@@ -1,5 +1,5 @@
/* Iterate over previous character in UTF-8 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
const uint8_t *
u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start)
{
- /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
if (s != start)
{
uint8_t c_1 = s[-1];
diff --git a/lib/unistr/u8-set.c b/lib/unistr/u8-set.c
index 3cca23b2..5f9e5e62 100644
--- a/lib/unistr/u8-set.c
+++ b/lib/unistr/u8-set.c
@@ -1,5 +1,5 @@
/* Fill UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-startswith.c b/lib/unistr/u8-startswith.c
index 4cc436a2..b22fed41 100644
--- a/lib/unistr/u8-startswith.c
+++ b/lib/unistr/u8-startswith.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-stpcpy.c b/lib/unistr/u8-stpcpy.c
index 0faba413..8fb3ca56 100644
--- a/lib/unistr/u8-stpcpy.c
+++ b/lib/unistr/u8-stpcpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-stpncpy.c b/lib/unistr/u8-stpncpy.c
index a456beac..62d4972e 100644
--- a/lib/unistr/u8-stpncpy.c
+++ b/lib/unistr/u8-stpncpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
/* Specification. */
#include "unistr.h"
-#if __GLIBC__ >= 2
+#if __GLIBC__ >= 2 && !defined __UCLIBC__
# include <string.h>
diff --git a/lib/unistr/u8-strcat.c b/lib/unistr/u8-strcat.c
index 2b54badf..b6892017 100644
--- a/lib/unistr/u8-strcat.c
+++ b/lib/unistr/u8-strcat.c
@@ -1,5 +1,5 @@
/* Concatenate UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strchr.c b/lib/unistr/u8-strchr.c
index 3be14c79..dd1cb84c 100644
--- a/lib/unistr/u8-strchr.c
+++ b/lib/unistr/u8-strchr.c
@@ -1,5 +1,5 @@
/* Search character in UTF-8 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
@@ -21,6 +21,8 @@
/* Specification. */
#include "unistr.h"
+#include <string.h>
+
uint8_t *
u8_strchr (const uint8_t *s, ucs4_t uc)
{
@@ -30,72 +32,209 @@ u8_strchr (const uint8_t *s, ucs4_t uc)
{
uint8_t c0 = uc;
- for (;; s++)
+ if (false)
+ {
+ /* Unoptimized code. */
+ for (;;)
+ {
+ uint8_t s0 = *s;
+ if (s0 == c0)
+ return (uint8_t *) s;
+ s++;
+ if (s0 == 0)
+ break;
+ }
+ }
+ else
{
- if (*s == c0)
- break;
- if (*s == 0)
- goto notfound;
+ /* Optimized code.
+ strchr() is often so well optimized, that it's worth the
+ added function call. */
+ return (uint8_t *) strchr ((const char *) s, c0);
}
- return (uint8_t *) s;
}
else
+ /* Loops equivalent to strstr, optimized for a specific length (2, 3, 4)
+ of the needle. We use an algorithm similar to Boyer-Moore which
+ is documented in lib/unistr/u8-chr.c. There is additional
+ complication because we need to check after every byte for
+ a NUL byte, but the idea is the same. */
switch (u8_uctomb_aux (c, uc, 6))
{
case 2:
- if (*s == 0)
- goto notfound;
+ if (*s == 0 || s[1] == 0)
+ break;
{
uint8_t c0 = c[0];
uint8_t c1 = c[1];
+ /* Search for { c0, c1 }. */
+ uint8_t s1 = s[1];
- for (;; s++)
+ for (;;)
{
+ /* Here s[0] != 0, s[1] != 0.
+ Test whether s[0..1] == { c0, c1 }. */
+ if (s1 == c1)
+ {
+ if (*s == c0)
+ return (uint8_t *) s;
+ else
+ /* Skip the search at s + 1, because s[1] = c1 < c0. */
+ goto case2_skip2;
+ }
+ else
+ {
+ if (s1 == c0)
+ goto case2_skip1;
+ else
+ /* Skip the search at s + 1, because s[1] != c0. */
+ goto case2_skip2;
+ }
+ case2_skip2:
+ s++;
+ s1 = s[1];
+ if (s[1] == 0)
+ break;
+ case2_skip1:
+ s++;
+ s1 = s[1];
if (s[1] == 0)
- goto notfound;
- if (*s == c0 && s[1] == c1)
break;
}
- return (uint8_t *) s;
}
+ break;
case 3:
- if (*s == 0 || s[1] == 0)
- goto notfound;
+ if (*s == 0 || s[1] == 0 || s[2] == 0)
+ break;
{
uint8_t c0 = c[0];
uint8_t c1 = c[1];
uint8_t c2 = c[2];
+ /* Search for { c0, c1, c2 }. */
+ uint8_t s2 = s[2];
- for (;; s++)
+ for (;;)
{
+ /* Here s[0] != 0, s[1] != 0, s[2] != 0.
+ Test whether s[0..2] == { c0, c1, c2 }. */
+ if (s2 == c2)
+ {
+ if (s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c2 != c1:
+ Skip the search at s + 1, because s[2] == c2 != c1.
+ Skip the search at s + 2, because s[2] == c2 < c0. */
+ if (c2 == c1)
+ goto case3_skip1;
+ else
+ goto case3_skip3;
+ }
+ else
+ {
+ if (s2 == c1)
+ goto case3_skip1;
+ else if (s2 == c0)
+ /* Skip the search at s + 1, because s[2] != c1. */
+ goto case3_skip2;
+ else
+ /* Skip the search at s + 1, because s[2] != c1.
+ Skip the search at s + 2, because s[2] != c0. */
+ goto case3_skip3;
+ }
+ case3_skip3:
+ s++;
+ s2 = s[2];
+ if (s[2] == 0)
+ break;
+ case3_skip2:
+ s++;
+ s2 = s[2];
+ if (s[2] == 0)
+ break;
+ case3_skip1:
+ s++;
+ s2 = s[2];
if (s[2] == 0)
- goto notfound;
- if (*s == c0 && s[1] == c1 && s[2] == c2)
break;
}
- return (uint8_t *) s;
}
+ break;
case 4:
- if (*s == 0 || s[1] == 0 || s[2] == 0)
- goto notfound;
+ if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
+ break;
{
uint8_t c0 = c[0];
uint8_t c1 = c[1];
uint8_t c2 = c[2];
uint8_t c3 = c[3];
+ /* Search for { c0, c1, c2, c3 }. */
+ uint8_t s3 = s[3];
- for (;; s++)
+ for (;;)
{
+ /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0.
+ Test whether s[0..3] == { c0, c1, c2, c3 }. */
+ if (s3 == c3)
+ {
+ if (s[2] == c2 && s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c3 != c2:
+ Skip the search at s + 1, because s[3] == c3 != c2.
+ If c3 != c1:
+ Skip the search at s + 2, because s[3] == c3 != c1.
+ Skip the search at s + 3, because s[3] == c3 < c0. */
+ if (c3 == c2)
+ goto case4_skip1;
+ else if (c3 == c1)
+ goto case4_skip2;
+ else
+ goto case4_skip4;
+ }
+ else
+ {
+ if (s3 == c2)
+ goto case4_skip1;
+ else if (s3 == c1)
+ /* Skip the search at s + 1, because s[3] != c2. */
+ goto case4_skip2;
+ else if (s3 == c0)
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1. */
+ goto case4_skip3;
+ else
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1.
+ Skip the search at s + 3, because s[3] != c0. */
+ goto case4_skip4;
+ }
+ case4_skip4:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ case4_skip3:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ case4_skip2:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ case4_skip1:
+ s++;
+ s3 = s[3];
if (s[3] == 0)
- goto notfound;
- if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3)
break;
}
- return (uint8_t *) s;
}
+ break;
}
-notfound:
+
return NULL;
}
diff --git a/lib/unistr/u8-strcmp.c b/lib/unistr/u8-strcmp.c
index 82a44147..93f06ad1 100644
--- a/lib/unistr/u8-strcmp.c
+++ b/lib/unistr/u8-strcmp.c
@@ -1,5 +1,5 @@
/* Compare UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strcoll.c b/lib/unistr/u8-strcoll.c
index 9ffa135e..8a2a3075 100644
--- a/lib/unistr/u8-strcoll.c
+++ b/lib/unistr/u8-strcoll.c
@@ -1,5 +1,5 @@
/* Compare UTF-8 strings using the collation rules of the current locale.
- Copyright (C) 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -29,5 +29,5 @@
#define FUNC u8_strcoll
#define UNIT uint8_t
#define U_STRCMP u8_strcmp
-#define U_STRCONV_TO_LOCALE u8_strconv_to_locale
+#define U_STRCONV_TO_ENCODING u8_strconv_to_encoding
#include "u-strcoll.h"
diff --git a/lib/unistr/u8-strcpy.c b/lib/unistr/u8-strcpy.c
index 9662de58..40d544e3 100644
--- a/lib/unistr/u8-strcpy.c
+++ b/lib/unistr/u8-strcpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strcspn.c b/lib/unistr/u8-strcspn.c
index 4b5b8e07..357f480d 100644
--- a/lib/unistr/u8-strcspn.c
+++ b/lib/unistr/u8-strcspn.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strdup.c b/lib/unistr/u8-strdup.c
index 58a30772..1ac590f4 100644
--- a/lib/unistr/u8-strdup.c
+++ b/lib/unistr/u8-strdup.c
@@ -1,5 +1,5 @@
/* Copy UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strlen.c b/lib/unistr/u8-strlen.c
index b8bebf0e..08f011ef 100644
--- a/lib/unistr/u8-strlen.c
+++ b/lib/unistr/u8-strlen.c
@@ -1,5 +1,5 @@
/* Determine length of UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strmblen.c b/lib/unistr/u8-strmblen.c
index 52242c58..09b876b8 100644
--- a/lib/unistr/u8-strmblen.c
+++ b/lib/unistr/u8-strmblen.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-8 string.
- Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
@@ -24,7 +24,7 @@
int
u8_strmblen (const uint8_t *s)
{
- /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
uint8_t c = *s;
if (c < 0x80)
diff --git a/lib/unistr/u8-strmbtouc.c b/lib/unistr/u8-strmbtouc.c
index 67016c65..abfb179f 100644
--- a/lib/unistr/u8-strmbtouc.c
+++ b/lib/unistr/u8-strmbtouc.c
@@ -1,5 +1,5 @@
/* Look at first character in UTF-8 string.
- Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2010 Free Software
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2015 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
@@ -24,7 +24,7 @@
int
u8_strmbtouc (ucs4_t *puc, const uint8_t *s)
{
- /* Keep in sync with unistr.h and utf8-ucs4.c. */
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
uint8_t c = *s;
if (c < 0x80)
diff --git a/lib/unistr/u8-strncat.c b/lib/unistr/u8-strncat.c
index 47803505..533355b0 100644
--- a/lib/unistr/u8-strncat.c
+++ b/lib/unistr/u8-strncat.c
@@ -1,5 +1,5 @@
/* Concatenate UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strncmp.c b/lib/unistr/u8-strncmp.c
index 286450be..1b0d0f44 100644
--- a/lib/unistr/u8-strncmp.c
+++ b/lib/unistr/u8-strncmp.c
@@ -1,5 +1,5 @@
/* Compare UTF-8 strings.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strncpy.c b/lib/unistr/u8-strncpy.c
index 5ef757bd..306b02f4 100644
--- a/lib/unistr/u8-strncpy.c
+++ b/lib/unistr/u8-strncpy.c
@@ -1,5 +1,5 @@
/* Copy UTF-8 string.
- Copyright (C) 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strnlen.c b/lib/unistr/u8-strnlen.c
index e732ea2e..5ae8b71f 100644
--- a/lib/unistr/u8-strnlen.c
+++ b/lib/unistr/u8-strnlen.c
@@ -1,5 +1,5 @@
/* Determine bounded length of UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
/* Specification. */
#include "unistr.h"
-#if __GLIBC__ >= 2
+#if __GLIBC__ >= 2 || defined __UCLIBC__
# include <string.h>
diff --git a/lib/unistr/u8-strpbrk.c b/lib/unistr/u8-strpbrk.c
index ec6dc278..e7a8ad28 100644
--- a/lib/unistr/u8-strpbrk.c
+++ b/lib/unistr/u8-strpbrk.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strrchr.c b/lib/unistr/u8-strrchr.c
index 6d8e2971..4efd42dc 100644
--- a/lib/unistr/u8-strrchr.c
+++ b/lib/unistr/u8-strrchr.c
@@ -1,5 +1,5 @@
/* Search character in UTF-8 string.
- Copyright (C) 1999, 2002, 2006-2007, 2009-2010 Free Software Foundation,
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2015 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
diff --git a/lib/unistr/u8-strspn.c b/lib/unistr/u8-strspn.c
index 7747815b..23ab7b4f 100644
--- a/lib/unistr/u8-strspn.c
+++ b/lib/unistr/u8-strspn.c
@@ -1,5 +1,5 @@
/* Search for some characters in UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-strstr.c b/lib/unistr/u8-strstr.c
index cce37adc..59d35e77 100644
--- a/lib/unistr/u8-strstr.c
+++ b/lib/unistr/u8-strstr.c
@@ -1,5 +1,5 @@
/* Substring test for UTF-8 strings.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2010-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -20,9 +20,13 @@
/* Specification. */
#include "unistr.h"
+#include <string.h>
+
/* FIXME: Maybe walking the string via u8_mblen is a win? */
#define FUNC u8_strstr
#define UNIT uint8_t
#define U_STRCHR u8_strchr
+#define U_STRMBTOUC u8_strmbtouc
+#define UNIT_IS_UINT8_T 1
#include "u-strstr.h"
diff --git a/lib/unistr/u8-strtok.c b/lib/unistr/u8-strtok.c
index e5c72037..1e4e6efb 100644
--- a/lib/unistr/u8-strtok.c
+++ b/lib/unistr/u8-strtok.c
@@ -1,5 +1,5 @@
/* Tokenize UTF-8 string.
- Copyright (C) 1999, 2002, 2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
@@ -24,4 +24,5 @@
#define UNIT uint8_t
#define U_STRSPN u8_strspn
#define U_STRPBRK u8_strpbrk
+#define U_STRMBLEN u8_strmblen
#include "u-strtok.h"
diff --git a/lib/unistr/u8-to-u16.c b/lib/unistr/u8-to-u16.c
index 3745c2bd..cb0f2982 100644
--- a/lib/unistr/u8-to-u16.c
+++ b/lib/unistr/u8-to-u16.c
@@ -1,5 +1,5 @@
/* Convert UTF-8 string to UTF-16 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-to-u32.c b/lib/unistr/u8-to-u32.c
index de2a35e6..1b3cbc4e 100644
--- a/lib/unistr/u8-to-u32.c
+++ b/lib/unistr/u8-to-u32.c
@@ -1,5 +1,5 @@
/* Convert UTF-8 string to UTF-32 string.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-uctomb-aux.c b/lib/unistr/u8-uctomb-aux.c
index 695921d6..cc9c5441 100644
--- a/lib/unistr/u8-uctomb-aux.c
+++ b/lib/unistr/u8-uctomb-aux.c
@@ -1,5 +1,5 @@
/* Conversion UCS-4 to UTF-8.
- Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
diff --git a/lib/unistr/u8-uctomb.c b/lib/unistr/u8-uctomb.c
index fd33c05c..43ef23d8 100644
--- a/lib/unistr/u8-uctomb.c
+++ b/lib/unistr/u8-uctomb.c
@@ -1,5 +1,5 @@
/* Store a character in UTF-8 string.
- Copyright (C) 2002, 2005-2006, 2009-2010 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005-2006, 2009-2015 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it