diff options
Diffstat (limited to 'lib/unistr/u16-cmp.c')
-rw-r--r-- | lib/unistr/u16-cmp.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/lib/unistr/u16-cmp.c b/lib/unistr/u16-cmp.c new file mode 100644 index 00000000..0130d271 --- /dev/null +++ b/lib/unistr/u16-cmp.c @@ -0,0 +1,54 @@ +/* Compare pieces of UTF-16 strings. + Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2002. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unistr.h" + +int +u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n) +{ + /* Note that the UTF-16 encoding does NOT preserve lexicographic order. + Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair, + we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */ + for (; n > 0;) + { + uint16_t c1 = *s1++; + uint16_t c2 = *s2++; + if (c1 == c2) + { + n--; + continue; + } + if (c1 < 0xd800 || c1 >= 0xe000) + { + if (!(c2 < 0xd800 || c2 >= 0xe000)) + /* c2 is a surrogate, but c1 is not. */ + return -1; + } + else + { + if (c2 < 0xd800 || c2 >= 0xe000) + /* c1 is a surrogate, but c2 is not. */ + return 1; + } + return (int)c1 - (int)c2; + /* > 0 if c1 > c2, < 0 if c1 < c2. */ + } + return 0; +} |