diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2026-03-08 17:28:33 +0100 |
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2026-03-08 17:28:33 +0100 |
| commit | 5f59a34ab747dde8ede7357f3431bf06bd6002fe (patch) | |
| tree | 056a4477fd870d454d5be5868cddab829a47f4d2 /lib/uninorm | |
| parent | 27dae84ed92f1ef0300263091972338d12e78348 (diff) | |
New upstream version 1.4.2upstream/1.4.2upstream
Diffstat (limited to 'lib/uninorm')
31 files changed, 158 insertions, 237 deletions
diff --git a/lib/uninorm/canonical-decomposition.c b/lib/uninorm/canonical-decomposition.c index 628d2f12..93169bf2 100644 --- a/lib/uninorm/canonical-decomposition.c +++ b/lib/uninorm/canonical-decomposition.c @@ -1,5 +1,5 @@ /* Canonical decomposition of Unicode characters. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify @@ -33,18 +33,14 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition) "Hangul Syllable Decomposition", See also the clarification at <https://www.unicode.org/versions/Unicode5.1.0/>, section "Clarification of Hangul Jamo Handling". */ - unsigned int t; - uc -= 0xAC00; - t = uc % 28; + unsigned int t = uc % 28; if (t == 0) { - unsigned int v, l; - uc = uc / 28; - v = uc % 21; - l = uc / 21; + unsigned int v = uc % 21; + unsigned int l = uc / 21; decomposition[0] = 0x1100 + l; decomposition[1] = 0x1161 + v; @@ -57,11 +53,9 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition) decomposition[1] = 0x11A7 + t; return 2; #else - unsigned int v, l; - uc = uc / 28; - v = uc % 21; - l = uc / 21; + unsigned int v = uc % 21; + unsigned int l = uc / 21; decomposition[0] = 0x1100 + l; decomposition[1] = 0x1161 + v; @@ -78,16 +72,12 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition) is a canonical one. */ if (entry < 0x8000) { - const unsigned char *p; - unsigned int element; - unsigned int length; - - p = &gl_uninorm_decomp_chars_table[3 * entry]; - element = (p[0] << 16) | (p[1] << 8) | p[2]; + const unsigned char *p = &gl_uninorm_decomp_chars_table[3 * entry]; + unsigned int element = (p[0] << 16) | (p[1] << 8) | p[2]; /* The first element has 5 bits for the decomposition type. */ if (((element >> 18) & 0x1f) != UC_DECOMP_CANONICAL) abort (); - length = 1; + unsigned int length = 1; for (;;) { /* Every element has an 18 bits wide Unicode code point. */ diff --git a/lib/uninorm/compat-decomposition.c b/lib/uninorm/compat-decomposition.c index 479482b4..a2ed7e97 100644 --- a/lib/uninorm/compat-decomposition.c +++ b/lib/uninorm/compat-decomposition.c @@ -1,5 +1,5 @@ /* Compatibility decomposition of Unicode characters. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/composition.c b/lib/uninorm/composition.c index d9a302be..c5d088bc 100644 --- a/lib/uninorm/composition.c +++ b/lib/uninorm/composition.c @@ -1,5 +1,5 @@ /* Canonical composition of Unicode characters. - Copyright (C) 2002, 2006, 2009, 2011-2025 Free Software Foundation, Inc. + Copyright (C) 2002, 2006, 2009, 2011-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify @@ -69,8 +69,6 @@ uc_composition (ucs4_t uc1, ucs4_t uc2) } #else char codes[6]; - const struct composition_rule *rule; - codes[0] = (uc1 >> 16) & 0xff; codes[1] = (uc1 >> 8) & 0xff; codes[2] = uc1 & 0xff; @@ -78,7 +76,7 @@ uc_composition (ucs4_t uc1, ucs4_t uc2) codes[4] = (uc2 >> 8) & 0xff; codes[5] = uc2 & 0xff; - rule = gl_uninorm_compose_lookup (codes, 6); + const struct composition_rule *rule = gl_uninorm_compose_lookup (codes, 6); if (rule != NULL) return rule->combined; #endif diff --git a/lib/uninorm/decompose-internal.c b/lib/uninorm/decompose-internal.c index 215f9b16..1a5c0f21 100644 --- a/lib/uninorm/decompose-internal.c +++ b/lib/uninorm/decompose-internal.c @@ -1,5 +1,5 @@ /* Decomposition of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/decompose-internal.h b/lib/uninorm/decompose-internal.h index 10a88a1b..6273b4b0 100644 --- a/lib/uninorm/decompose-internal.h +++ b/lib/uninorm/decompose-internal.h @@ -1,5 +1,5 @@ /* Decomposition of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/decomposing-form.c b/lib/uninorm/decomposing-form.c index e8166d93..99c28586 100644 --- a/lib/uninorm/decomposing-form.c +++ b/lib/uninorm/decomposing-form.c @@ -1,5 +1,5 @@ /* Decomposing variant of a normalization form. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/decomposition-table.c b/lib/uninorm/decomposition-table.c index 66e4aded..d397a443 100644 --- a/lib/uninorm/decomposition-table.c +++ b/lib/uninorm/decomposition-table.c @@ -1,5 +1,5 @@ /* Decomposition of Unicode characters. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/decomposition-table.h b/lib/uninorm/decomposition-table.h index 5ff87c53..f93209e4 100644 --- a/lib/uninorm/decomposition-table.h +++ b/lib/uninorm/decomposition-table.h @@ -1,5 +1,5 @@ /* Decomposition of Unicode characters. - Copyright (C) 2001-2003, 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/decomposition.c b/lib/uninorm/decomposition.c index b8deeed4..d850a612 100644 --- a/lib/uninorm/decomposition.c +++ b/lib/uninorm/decomposition.c @@ -1,5 +1,5 @@ /* Decomposition of Unicode characters. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. @@ -39,19 +39,15 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition) "Hangul Syllable Decomposition", See also the clarification at <https://www.unicode.org/versions/Unicode5.1.0/>, section "Clarification of Hangul Jamo Handling". */ - unsigned int t; - uc -= 0xAC00; - t = uc % 28; + unsigned int t = uc % 28; *decomp_tag = UC_DECOMP_CANONICAL; if (t == 0) { - unsigned int v, l; - uc = uc / 28; - v = uc % 21; - l = uc / 21; + unsigned int v = uc % 21; + unsigned int l = uc / 21; decomposition[0] = 0x1100 + l; decomposition[1] = 0x1161 + v; @@ -64,11 +60,9 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition) decomposition[1] = 0x11A7 + t; return 2; #else - unsigned int v, l; - uc = uc / 28; - v = uc % 21; - l = uc / 21; + unsigned int v = uc % 21; + unsigned int l = uc / 21; decomposition[0] = 0x1100 + l; decomposition[1] = 0x1161 + v; @@ -82,15 +76,11 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition) unsigned short entry = decomp_index (uc); if (entry != (unsigned short)(-1)) { - const unsigned char *p; - unsigned int element; - unsigned int length; - - p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)]; - element = (p[0] << 16) | (p[1] << 8) | p[2]; + const unsigned char *p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)]; + unsigned int element = (p[0] << 16) | (p[1] << 8) | p[2]; /* The first element has 5 bits for the decomposition type. */ *decomp_tag = (element >> 18) & 0x1f; - length = 1; + unsigned int length = 1; for (;;) { /* Every element has an 18 bits wide Unicode code point. */ diff --git a/lib/uninorm/nfc.c b/lib/uninorm/nfc.c index 156dfea8..42aa47fb 100644 --- a/lib/uninorm/nfc.c +++ b/lib/uninorm/nfc.c @@ -1,5 +1,5 @@ /* Unicode Normalization Form C. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/nfd.c b/lib/uninorm/nfd.c index 83739df9..d9760fd1 100644 --- a/lib/uninorm/nfd.c +++ b/lib/uninorm/nfd.c @@ -1,5 +1,5 @@ /* Unicode Normalization Form D. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/nfkc.c b/lib/uninorm/nfkc.c index c036535d..045b21ea 100644 --- a/lib/uninorm/nfkc.c +++ b/lib/uninorm/nfkc.c @@ -1,5 +1,5 @@ /* Unicode Normalization Form KC. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/nfkd.c b/lib/uninorm/nfkd.c index 70df2fd8..65bbdde5 100644 --- a/lib/uninorm/nfkd.c +++ b/lib/uninorm/nfkd.c @@ -1,5 +1,5 @@ /* Unicode Normalization Form KD. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/normalize-internal.h b/lib/uninorm/normalize-internal.h index 430eb1d7..0c2f32e2 100644 --- a/lib/uninorm/normalize-internal.h +++ b/lib/uninorm/normalize-internal.h @@ -1,5 +1,5 @@ /* Normalization of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/u-normalize-internal.h b/lib/uninorm/u-normalize-internal.h index f910f52f..826e1ec3 100644 --- a/lib/uninorm/u-normalize-internal.h +++ b/lib/uninorm/u-normalize-internal.h @@ -1,5 +1,5 @@ /* Decomposition and composition of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify @@ -24,16 +24,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, /* The result being accumulated. */ UNIT *result; - size_t length; size_t allocated; - /* The buffer for sorting. */ - #define SORTBUF_PREALLOCATED 64 - struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED]; - struct ucs4_with_ccc *sortbuf; /* array of size 2 * sortbuf_allocated */ - size_t sortbuf_allocated; - size_t sortbuf_count; - - /* Initialize the accumulator. */ if (resultbuf == NULL) { result = NULL; @@ -44,12 +35,15 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, result = resultbuf; allocated = *lengthp; } - length = 0; + size_t length = 0; - /* Initialize the buffer for sorting. */ - sortbuf = sortbuf_preallocated; - sortbuf_allocated = SORTBUF_PREALLOCATED; - sortbuf_count = 0; + /* The buffer for sorting. */ + #define SORTBUF_PREALLOCATED 64 + struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED]; + struct ucs4_with_ccc *sortbuf = /* array of size 2 * sortbuf_allocated */ + sortbuf_preallocated; + size_t sortbuf_allocated = SORTBUF_PREALLOCATED; + size_t sortbuf_count = 0; { const UNIT *s_end = s + n; @@ -59,7 +53,6 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, int count; ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH]; int decomposed_count; - int i; if (s < s_end) { @@ -73,46 +66,40 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, the size of the decomposition tables, because for example for U+1FC1 the recursive canonical decomposition and the recursive compatibility decomposition are different. */ - { - int curr; - - for (curr = 0; curr < decomposed_count; ) - { - /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e. - all elements are atomic. */ - ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH]; - int curr_decomposed_count; - - curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed); - if (curr_decomposed_count >= 0) - { - /* Move curr_decomposed[0..curr_decomposed_count-1] over - decomposed[curr], making room. It's not worth using - memcpy() here, since the counts are so small. */ - int shift = curr_decomposed_count - 1; - - if (shift < 0) - abort (); - if (shift > 0) - { - int j; + for (int curr = 0; curr < decomposed_count; ) + { + /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e. + all elements are atomic. */ + ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH]; + int curr_decomposed_count; - decomposed_count += shift; - if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH) - abort (); - for (j = decomposed_count - 1 - shift; j > curr; j--) - decomposed[j + shift] = decomposed[j]; - } - for (; shift >= 0; shift--) - decomposed[curr + shift] = curr_decomposed[shift]; - } - else - { - /* decomposed[curr] is atomic. */ - curr++; - } - } - } + curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed); + if (curr_decomposed_count >= 0) + { + /* Move curr_decomposed[0..curr_decomposed_count-1] over + decomposed[curr], making room. It's not worth using + memcpy() here, since the counts are so small. */ + int shift = curr_decomposed_count - 1; + + if (shift < 0) + abort (); + if (shift > 0) + { + decomposed_count += shift; + if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH) + abort (); + for (int j = decomposed_count - 1 - shift; j > curr; j--) + decomposed[j + shift] = decomposed[j]; + } + for (; shift >= 0; shift--) + decomposed[curr + shift] = curr_decomposed[shift]; + } + else + { + /* decomposed[curr] is atomic. */ + curr++; + } + } } else { @@ -120,7 +107,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, decomposed_count = 0; } - i = 0; + int i = 0; for (;;) { ucs4_t uc; @@ -143,8 +130,6 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, if (ccc == 0) { - size_t j; - /* Apply the canonical ordering algorithm to the accumulated sequence of characters. */ if (sortbuf_count > 1) @@ -175,7 +160,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, (also a starter). */ if (sortbuf_count > 0 && sortbuf[0].ccc == 0) { - for (j = 1; j < sortbuf_count; ) + for (size_t j = 1; j < sortbuf_count; ) { if (sortbuf[j].ccc > sortbuf[j - 1].ccc) { @@ -183,11 +168,9 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, composer (sortbuf[0].code, sortbuf[j].code); if (combined) { - size_t k; - sortbuf[0].code = combined; /* sortbuf[0].ccc = 0, still valid. */ - for (k = j + 1; k < sortbuf_count; k++) + for (size_t k = j + 1; k < sortbuf_count; k++) sortbuf[k - 1] = sortbuf[k]; sortbuf_count--; continue; @@ -212,7 +195,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, } } - for (j = 0; j < sortbuf_count; j++) + for (size_t j = 0; j < sortbuf_count; j++) { ucs4_t muc = sortbuf[j].code; @@ -303,12 +286,10 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, /* Append (uc, ccc) to sortbuf. */ if (sortbuf_count == sortbuf_allocated) { - struct ucs4_with_ccc *new_sortbuf; - sortbuf_allocated = 2 * sortbuf_allocated; if (sortbuf_allocated < sortbuf_count) /* integer overflow? */ abort (); - new_sortbuf = + struct ucs4_with_ccc *new_sortbuf = (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc)); if (new_sortbuf == NULL) { @@ -352,9 +333,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n, else if (result != resultbuf && length < allocated) { /* Shrink the allocated memory if possible. */ - UNIT *memory; - - memory = (UNIT *) realloc (result, length * sizeof (UNIT)); + UNIT *memory = (UNIT *) realloc (result, length * sizeof (UNIT)); if (memory != NULL) result = memory; } diff --git a/lib/uninorm/u-normcmp.h b/lib/uninorm/u-normcmp.h index 12f89d2b..ca1b24ba 100644 --- a/lib/uninorm/u-normcmp.h +++ b/lib/uninorm/u-normcmp.h @@ -1,5 +1,5 @@ /* Normalization insensitive comparison of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. @@ -27,24 +27,18 @@ int FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, uninorm_t nf, int *resultp) { - UNIT buf1[2048 / sizeof (UNIT)]; - UNIT buf2[2048 / sizeof (UNIT)]; - UNIT *norms1; - size_t norms1_length; - UNIT *norms2; - size_t norms2_length; - int cmp; - /* Normalize S1. */ - norms1_length = sizeof (buf1) / sizeof (UNIT); - norms1 = U_NORMALIZE (nf, s1, n1, buf1, &norms1_length); + UNIT buf1[2048 / sizeof (UNIT)]; + size_t norms1_length = sizeof (buf1) / sizeof (UNIT); + UNIT *norms1 = U_NORMALIZE (nf, s1, n1, buf1, &norms1_length); if (norms1 == NULL) /* errno is set here. */ return -1; /* Normalize S2. */ - norms2_length = sizeof (buf2) / sizeof (UNIT); - norms2 = U_NORMALIZE (nf, s2, n2, buf2, &norms2_length); + UNIT buf2[2048 / sizeof (UNIT)]; + size_t norms2_length = sizeof (buf2) / sizeof (UNIT); + UNIT *norms2 = U_NORMALIZE (nf, s2, n2, buf2, &norms2_length); if (norms2 == NULL) { if (norms1 != buf1) @@ -57,7 +51,7 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, } /* Compare the normalized strings. */ - cmp = U_CMP2 (norms1, norms1_length, norms2, norms2_length); + int cmp = U_CMP2 (norms1, norms1_length, norms2, norms2_length); if (cmp > 0) cmp = 1; else if (cmp < 0) diff --git a/lib/uninorm/u-normcoll.h b/lib/uninorm/u-normcoll.h index 948462c0..fd2dd71f 100644 --- a/lib/uninorm/u-normcoll.h +++ b/lib/uninorm/u-normcoll.h @@ -1,5 +1,5 @@ /* Locale dependent, normalization insensitive comparison of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. @@ -27,24 +27,18 @@ int FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, uninorm_t nf, int *resultp) { - char buf1[2048]; - char buf2[2048]; - char *transformed1; - size_t transformed1_length; - char *transformed2; - size_t transformed2_length; - int cmp; - /* Normalize and transform S1. */ - transformed1_length = sizeof (buf1); - transformed1 = U_NORMXFRM (s1, n1, nf, buf1, &transformed1_length); + char buf1[2048]; + size_t transformed1_length = sizeof (buf1); + char *transformed1 = U_NORMXFRM (s1, n1, nf, buf1, &transformed1_length); if (transformed1 == NULL) /* errno is set here. */ return -1; /* Normalize and transform S2. */ - transformed2_length = sizeof (buf2); - transformed2 = U_NORMXFRM (s2, n2, nf, buf2, &transformed2_length); + char buf2[2048]; + size_t transformed2_length = sizeof (buf2); + char *transformed2 = U_NORMXFRM (s2, n2, nf, buf2, &transformed2_length); if (transformed2 == NULL) { if (transformed1 != buf1) @@ -57,8 +51,8 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, } /* Compare the transformed strings. */ - cmp = memcmp2 (transformed1, transformed1_length, - transformed2, transformed2_length); + int cmp = memcmp2 (transformed1, transformed1_length, + transformed2, transformed2_length); if (cmp < 0) cmp = -1; else if (cmp > 0) diff --git a/lib/uninorm/u-normxfrm.h b/lib/uninorm/u-normxfrm.h index 92bf051e..deee268a 100644 --- a/lib/uninorm/u-normxfrm.h +++ b/lib/uninorm/u-normxfrm.h @@ -1,5 +1,5 @@ /* Locale dependent transformation for comparison of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. @@ -27,24 +27,18 @@ char * FUNC (const UNIT *s, size_t n, uninorm_t nf, char *resultbuf, size_t *lengthp) { - UNIT normsbuf[2048 / sizeof (UNIT)]; - UNIT *norms; - size_t norms_length; - char convsbuf[2048]; - char *convs; - size_t convs_length; - char *result; - /* Normalize the Unicode string. */ - norms_length = sizeof (normsbuf) / sizeof (UNIT); - norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length); + UNIT normsbuf[2048 / sizeof (UNIT)]; + size_t norms_length = sizeof (normsbuf) / sizeof (UNIT); + UNIT *norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length); if (norms == NULL) /* errno is set here. */ return NULL; /* Convert it to locale encoding. */ - convs_length = sizeof (convsbuf) - 1; - convs = U_CONV_TO_ENCODING (locale_charset (), + char convsbuf[2048]; + size_t convs_length = sizeof (convsbuf) - 1; + char *convs = U_CONV_TO_ENCODING (locale_charset (), iconveh_error, norms, norms_length, NULL, @@ -77,7 +71,7 @@ FUNC (const UNIT *s, size_t n, uninorm_t nf, } /* Apply locale dependent transformations for comparison. */ - result = amemxfrm (convs, convs_length, resultbuf, lengthp); + char *result = amemxfrm (convs, convs_length, resultbuf, lengthp); if (result == NULL) { if (convs != convsbuf) diff --git a/lib/uninorm/u16-normalize.c b/lib/uninorm/u16-normalize.c index 13651747..60dc575a 100644 --- a/lib/uninorm/u16-normalize.c +++ b/lib/uninorm/u16-normalize.c @@ -1,5 +1,5 @@ /* Normalization of UTF-16 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u16-normcmp.c b/lib/uninorm/u16-normcmp.c index 81c274b0..d715a3f8 100644 --- a/lib/uninorm/u16-normcmp.c +++ b/lib/uninorm/u16-normcmp.c @@ -1,5 +1,5 @@ /* Normalization insensitive comparison of UTF-16 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u16-normcoll.c b/lib/uninorm/u16-normcoll.c index af686bd6..9573c7d5 100644 --- a/lib/uninorm/u16-normcoll.c +++ b/lib/uninorm/u16-normcoll.c @@ -1,5 +1,5 @@ /* Locale dependent, normalization insensitive comparison of UTF-16 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u16-normxfrm.c b/lib/uninorm/u16-normxfrm.c index 5378cc51..8230c617 100644 --- a/lib/uninorm/u16-normxfrm.c +++ b/lib/uninorm/u16-normxfrm.c @@ -1,5 +1,5 @@ /* Locale dependent transformation for comparison of UTF-16 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u32-normalize.c b/lib/uninorm/u32-normalize.c index 60f05233..e2312576 100644 --- a/lib/uninorm/u32-normalize.c +++ b/lib/uninorm/u32-normalize.c @@ -1,5 +1,5 @@ /* Normalization of UTF-32 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software: you can redistribute it and/or modify diff --git a/lib/uninorm/u32-normcmp.c b/lib/uninorm/u32-normcmp.c index 108ec242..570fae4c 100644 --- a/lib/uninorm/u32-normcmp.c +++ b/lib/uninorm/u32-normcmp.c @@ -1,5 +1,5 @@ /* Normalization insensitive comparison of UTF-32 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u32-normcoll.c b/lib/uninorm/u32-normcoll.c index f2beb177..4be11ba7 100644 --- a/lib/uninorm/u32-normcoll.c +++ b/lib/uninorm/u32-normcoll.c @@ -1,5 +1,5 @@ /* Locale dependent, normalization insensitive comparison of UTF-32 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u32-normxfrm.c b/lib/uninorm/u32-normxfrm.c index 88471a15..a434e9fc 100644 --- a/lib/uninorm/u32-normxfrm.c +++ b/lib/uninorm/u32-normxfrm.c @@ -1,5 +1,5 @@ /* Locale dependent transformation for comparison of UTF-32 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u8-normalize.c b/lib/uninorm/u8-normalize.c index bcc97a33..aea9e2b5 100644 --- a/lib/uninorm/u8-normalize.c +++ b/lib/uninorm/u8-normalize.c @@ -1,5 +1,5 @@ /* Normalization of UTF-8 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u8-normcmp.c b/lib/uninorm/u8-normcmp.c index aabef0e5..c77bf2fc 100644 --- a/lib/uninorm/u8-normcmp.c +++ b/lib/uninorm/u8-normcmp.c @@ -1,5 +1,5 @@ /* Normalization insensitive comparison of UTF-8 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u8-normcoll.c b/lib/uninorm/u8-normcoll.c index ce919f5f..31439dce 100644 --- a/lib/uninorm/u8-normcoll.c +++ b/lib/uninorm/u8-normcoll.c @@ -1,5 +1,5 @@ /* Locale dependent, normalization insensitive comparison of UTF-8 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/u8-normxfrm.c b/lib/uninorm/u8-normxfrm.c index aec7f2a5..7a4cc833 100644 --- a/lib/uninorm/u8-normxfrm.c +++ b/lib/uninorm/u8-normxfrm.c @@ -1,5 +1,5 @@ /* Locale dependent transformation for comparison of UTF-8 strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. diff --git a/lib/uninorm/uninorm-filter.c b/lib/uninorm/uninorm-filter.c index 542013d6..c343a16a 100644 --- a/lib/uninorm/uninorm-filter.c +++ b/lib/uninorm/uninorm-filter.c @@ -1,5 +1,5 @@ /* Stream-based normalization of Unicode strings. - Copyright (C) 2009-2025 Free Software Foundation, Inc. + Copyright (C) 2009-2026 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This file is free software. @@ -95,55 +95,46 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg) the size of the decomposition tables, because for example for U+1FC1 the recursive canonical decomposition and the recursive compatibility decomposition are different. */ - { - int curr; - - for (curr = 0; curr < decomposed_count; ) - { - /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e. - all elements are atomic. */ - ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH]; - int curr_decomposed_count; - - curr_decomposed_count = - filter->decomposer (decomposed[curr], curr_decomposed); - if (curr_decomposed_count >= 0) - { - /* Move curr_decomposed[0..curr_decomposed_count-1] over - decomposed[curr], making room. It's not worth using - memcpy() here, since the counts are so small. */ - int shift = curr_decomposed_count - 1; - - if (shift < 0) - abort (); - if (shift > 0) - { - int j; - - decomposed_count += shift; - if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH) - abort (); - for (j = decomposed_count - 1 - shift; j > curr; j--) - decomposed[j + shift] = decomposed[j]; - } - for (; shift >= 0; shift--) - decomposed[curr + shift] = curr_decomposed[shift]; - } - else - { - /* decomposed[curr] is atomic. */ - curr++; - } - } - } + for (int curr = 0; curr < decomposed_count; ) + { + /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e. + all elements are atomic. */ + ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH]; + int curr_decomposed_count = + filter->decomposer (decomposed[curr], curr_decomposed); + if (curr_decomposed_count >= 0) + { + /* Move curr_decomposed[0..curr_decomposed_count-1] over + decomposed[curr], making room. It's not worth using + memcpy() here, since the counts are so small. */ + int shift = curr_decomposed_count - 1; + + if (shift < 0) + abort (); + if (shift > 0) + { + decomposed_count += shift; + if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH) + abort (); + for (int j = decomposed_count - 1 - shift; j > curr; j--) + decomposed[j + shift] = decomposed[j]; + } + for (; shift >= 0; shift--) + decomposed[curr + shift] = curr_decomposed[shift]; + } + else + { + /* decomposed[curr] is atomic. */ + curr++; + } + } { /* Cache sortbuf and sortbuf_count in local register variables. */ struct ucs4_with_ccc *sortbuf = filter->sortbuf; size_t sortbuf_count = filter->sortbuf_count; - int i; - for (i = 0; i < decomposed_count; i++) + for (int i = 0; i < decomposed_count; i++) { /* Fetch the next character from the decomposition. */ ucs4_t uc = decomposed[i]; @@ -151,8 +142,6 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg) if (ccc == 0) { - size_t j; - /* Apply the canonical ordering algorithm to the accumulated sequence of characters. */ if (sortbuf_count > 1) @@ -183,7 +172,7 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg) (also a starter). */ if (sortbuf_count > 0 && sortbuf[0].ccc == 0) { - for (j = 1; j < sortbuf_count; ) + for (size_t j = 1; j < sortbuf_count; ) { if (sortbuf[j].ccc > sortbuf[j - 1].ccc) { @@ -191,11 +180,9 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg) filter->composer (sortbuf[0].code, sortbuf[j].code); if (combined) { - size_t k; - sortbuf[0].code = combined; /* sortbuf[0].ccc = 0, still valid. */ - for (k = j + 1; k < sortbuf_count; k++) + for (size_t k = j + 1; k < sortbuf_count; k++) sortbuf[k - 1] = sortbuf[k]; sortbuf_count--; continue; @@ -220,7 +207,7 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg) } } - for (j = 0; j < sortbuf_count; j++) + for (size_t j = 0; j < sortbuf_count; j++) { ucs4_t muc = sortbuf[j].code; @@ -241,12 +228,10 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg) /* Append (uc, ccc) to sortbuf. */ if (sortbuf_count == filter->sortbuf_allocated) { - struct ucs4_with_ccc *new_sortbuf; - filter->sortbuf_allocated = 2 * filter->sortbuf_allocated; if (filter->sortbuf_allocated < sortbuf_count) /* integer overflow? */ abort (); - new_sortbuf = + struct ucs4_with_ccc *new_sortbuf = (struct ucs4_with_ccc *) malloc (2 * filter->sortbuf_allocated * sizeof (struct ucs4_with_ccc)); if (new_sortbuf == NULL) @@ -286,7 +271,6 @@ uninorm_filter_flush (struct uninorm_filter *filter) /* Cache sortbuf and sortbuf_count in local register variables. */ struct ucs4_with_ccc * const sortbuf = filter->sortbuf; size_t sortbuf_count = filter->sortbuf_count; - size_t j; /* Apply the canonical ordering algorithm to the accumulated sequence of characters. */ @@ -318,7 +302,7 @@ uninorm_filter_flush (struct uninorm_filter *filter) (also a starter). */ if (sortbuf_count > 0 && sortbuf[0].ccc == 0) { - for (j = 1; j < sortbuf_count; ) + for (size_t j = 1; j < sortbuf_count; ) { if (sortbuf[j].ccc > sortbuf[j - 1].ccc) { @@ -326,11 +310,9 @@ uninorm_filter_flush (struct uninorm_filter *filter) filter->composer (sortbuf[0].code, sortbuf[j].code); if (combined) { - size_t k; - sortbuf[0].code = combined; /* sortbuf[0].ccc = 0, still valid. */ - for (k = j + 1; k < sortbuf_count; k++) + for (size_t k = j + 1; k < sortbuf_count; k++) sortbuf[k - 1] = sortbuf[k]; sortbuf_count--; continue; @@ -341,7 +323,7 @@ uninorm_filter_flush (struct uninorm_filter *filter) } } - for (j = 0; j < sortbuf_count; j++) + for (size_t j = 0; j < sortbuf_count; j++) { ucs4_t muc = sortbuf[j].code; |
