summaryrefslogtreecommitdiff
path: root/lib/uninorm
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2026-03-08 17:28:33 +0100
committerJörg Frings-Fürst <debian@jff.email>2026-03-08 17:28:33 +0100
commit5f59a34ab747dde8ede7357f3431bf06bd6002fe (patch)
tree056a4477fd870d454d5be5868cddab829a47f4d2 /lib/uninorm
parent27dae84ed92f1ef0300263091972338d12e78348 (diff)
New upstream version 1.4.2upstream/1.4.2upstream
Diffstat (limited to 'lib/uninorm')
-rw-r--r--lib/uninorm/canonical-decomposition.c28
-rw-r--r--lib/uninorm/compat-decomposition.c2
-rw-r--r--lib/uninorm/composition.c6
-rw-r--r--lib/uninorm/decompose-internal.c2
-rw-r--r--lib/uninorm/decompose-internal.h2
-rw-r--r--lib/uninorm/decomposing-form.c2
-rw-r--r--lib/uninorm/decomposition-table.c2
-rw-r--r--lib/uninorm/decomposition-table.h2
-rw-r--r--lib/uninorm/decomposition.c28
-rw-r--r--lib/uninorm/nfc.c2
-rw-r--r--lib/uninorm/nfd.c2
-rw-r--r--lib/uninorm/nfkc.c2
-rw-r--r--lib/uninorm/nfkd.c2
-rw-r--r--lib/uninorm/normalize-internal.h2
-rw-r--r--lib/uninorm/u-normalize-internal.h117
-rw-r--r--lib/uninorm/u-normcmp.h22
-rw-r--r--lib/uninorm/u-normcoll.h24
-rw-r--r--lib/uninorm/u-normxfrm.h22
-rw-r--r--lib/uninorm/u16-normalize.c2
-rw-r--r--lib/uninorm/u16-normcmp.c2
-rw-r--r--lib/uninorm/u16-normcoll.c2
-rw-r--r--lib/uninorm/u16-normxfrm.c2
-rw-r--r--lib/uninorm/u32-normalize.c2
-rw-r--r--lib/uninorm/u32-normcmp.c2
-rw-r--r--lib/uninorm/u32-normcoll.c2
-rw-r--r--lib/uninorm/u32-normxfrm.c2
-rw-r--r--lib/uninorm/u8-normalize.c2
-rw-r--r--lib/uninorm/u8-normcmp.c2
-rw-r--r--lib/uninorm/u8-normcoll.c2
-rw-r--r--lib/uninorm/u8-normxfrm.c2
-rw-r--r--lib/uninorm/uninorm-filter.c102
31 files changed, 158 insertions, 237 deletions
diff --git a/lib/uninorm/canonical-decomposition.c b/lib/uninorm/canonical-decomposition.c
index 628d2f12..93169bf2 100644
--- a/lib/uninorm/canonical-decomposition.c
+++ b/lib/uninorm/canonical-decomposition.c
@@ -1,5 +1,5 @@
/* Canonical decomposition of Unicode characters.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
@@ -33,18 +33,14 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition)
"Hangul Syllable Decomposition", See also the clarification at
<https://www.unicode.org/versions/Unicode5.1.0/>, section
"Clarification of Hangul Jamo Handling". */
- unsigned int t;
-
uc -= 0xAC00;
- t = uc % 28;
+ unsigned int t = uc % 28;
if (t == 0)
{
- unsigned int v, l;
-
uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ unsigned int v = uc % 21;
+ unsigned int l = uc / 21;
decomposition[0] = 0x1100 + l;
decomposition[1] = 0x1161 + v;
@@ -57,11 +53,9 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition)
decomposition[1] = 0x11A7 + t;
return 2;
#else
- unsigned int v, l;
-
uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ unsigned int v = uc % 21;
+ unsigned int l = uc / 21;
decomposition[0] = 0x1100 + l;
decomposition[1] = 0x1161 + v;
@@ -78,16 +72,12 @@ uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition)
is a canonical one. */
if (entry < 0x8000)
{
- const unsigned char *p;
- unsigned int element;
- unsigned int length;
-
- p = &gl_uninorm_decomp_chars_table[3 * entry];
- element = (p[0] << 16) | (p[1] << 8) | p[2];
+ const unsigned char *p = &gl_uninorm_decomp_chars_table[3 * entry];
+ unsigned int element = (p[0] << 16) | (p[1] << 8) | p[2];
/* The first element has 5 bits for the decomposition type. */
if (((element >> 18) & 0x1f) != UC_DECOMP_CANONICAL)
abort ();
- length = 1;
+ unsigned int length = 1;
for (;;)
{
/* Every element has an 18 bits wide Unicode code point. */
diff --git a/lib/uninorm/compat-decomposition.c b/lib/uninorm/compat-decomposition.c
index 479482b4..a2ed7e97 100644
--- a/lib/uninorm/compat-decomposition.c
+++ b/lib/uninorm/compat-decomposition.c
@@ -1,5 +1,5 @@
/* Compatibility decomposition of Unicode characters.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/composition.c b/lib/uninorm/composition.c
index d9a302be..c5d088bc 100644
--- a/lib/uninorm/composition.c
+++ b/lib/uninorm/composition.c
@@ -1,5 +1,5 @@
/* Canonical composition of Unicode characters.
- Copyright (C) 2002, 2006, 2009, 2011-2025 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006, 2009, 2011-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
@@ -69,8 +69,6 @@ uc_composition (ucs4_t uc1, ucs4_t uc2)
}
#else
char codes[6];
- const struct composition_rule *rule;
-
codes[0] = (uc1 >> 16) & 0xff;
codes[1] = (uc1 >> 8) & 0xff;
codes[2] = uc1 & 0xff;
@@ -78,7 +76,7 @@ uc_composition (ucs4_t uc1, ucs4_t uc2)
codes[4] = (uc2 >> 8) & 0xff;
codes[5] = uc2 & 0xff;
- rule = gl_uninorm_compose_lookup (codes, 6);
+ const struct composition_rule *rule = gl_uninorm_compose_lookup (codes, 6);
if (rule != NULL)
return rule->combined;
#endif
diff --git a/lib/uninorm/decompose-internal.c b/lib/uninorm/decompose-internal.c
index 215f9b16..1a5c0f21 100644
--- a/lib/uninorm/decompose-internal.c
+++ b/lib/uninorm/decompose-internal.c
@@ -1,5 +1,5 @@
/* Decomposition of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/decompose-internal.h b/lib/uninorm/decompose-internal.h
index 10a88a1b..6273b4b0 100644
--- a/lib/uninorm/decompose-internal.h
+++ b/lib/uninorm/decompose-internal.h
@@ -1,5 +1,5 @@
/* Decomposition of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/decomposing-form.c b/lib/uninorm/decomposing-form.c
index e8166d93..99c28586 100644
--- a/lib/uninorm/decomposing-form.c
+++ b/lib/uninorm/decomposing-form.c
@@ -1,5 +1,5 @@
/* Decomposing variant of a normalization form.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/decomposition-table.c b/lib/uninorm/decomposition-table.c
index 66e4aded..d397a443 100644
--- a/lib/uninorm/decomposition-table.c
+++ b/lib/uninorm/decomposition-table.c
@@ -1,5 +1,5 @@
/* Decomposition of Unicode characters.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/decomposition-table.h b/lib/uninorm/decomposition-table.h
index 5ff87c53..f93209e4 100644
--- a/lib/uninorm/decomposition-table.h
+++ b/lib/uninorm/decomposition-table.h
@@ -1,5 +1,5 @@
/* Decomposition of Unicode characters.
- Copyright (C) 2001-2003, 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/decomposition.c b/lib/uninorm/decomposition.c
index b8deeed4..d850a612 100644
--- a/lib/uninorm/decomposition.c
+++ b/lib/uninorm/decomposition.c
@@ -1,5 +1,5 @@
/* Decomposition of Unicode characters.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
@@ -39,19 +39,15 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
"Hangul Syllable Decomposition", See also the clarification at
<https://www.unicode.org/versions/Unicode5.1.0/>, section
"Clarification of Hangul Jamo Handling". */
- unsigned int t;
-
uc -= 0xAC00;
- t = uc % 28;
+ unsigned int t = uc % 28;
*decomp_tag = UC_DECOMP_CANONICAL;
if (t == 0)
{
- unsigned int v, l;
-
uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ unsigned int v = uc % 21;
+ unsigned int l = uc / 21;
decomposition[0] = 0x1100 + l;
decomposition[1] = 0x1161 + v;
@@ -64,11 +60,9 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
decomposition[1] = 0x11A7 + t;
return 2;
#else
- unsigned int v, l;
-
uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
+ unsigned int v = uc % 21;
+ unsigned int l = uc / 21;
decomposition[0] = 0x1100 + l;
decomposition[1] = 0x1161 + v;
@@ -82,15 +76,11 @@ uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
unsigned short entry = decomp_index (uc);
if (entry != (unsigned short)(-1))
{
- const unsigned char *p;
- unsigned int element;
- unsigned int length;
-
- p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
- element = (p[0] << 16) | (p[1] << 8) | p[2];
+ const unsigned char *p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
+ unsigned int element = (p[0] << 16) | (p[1] << 8) | p[2];
/* The first element has 5 bits for the decomposition type. */
*decomp_tag = (element >> 18) & 0x1f;
- length = 1;
+ unsigned int length = 1;
for (;;)
{
/* Every element has an 18 bits wide Unicode code point. */
diff --git a/lib/uninorm/nfc.c b/lib/uninorm/nfc.c
index 156dfea8..42aa47fb 100644
--- a/lib/uninorm/nfc.c
+++ b/lib/uninorm/nfc.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form C.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/nfd.c b/lib/uninorm/nfd.c
index 83739df9..d9760fd1 100644
--- a/lib/uninorm/nfd.c
+++ b/lib/uninorm/nfd.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form D.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/nfkc.c b/lib/uninorm/nfkc.c
index c036535d..045b21ea 100644
--- a/lib/uninorm/nfkc.c
+++ b/lib/uninorm/nfkc.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form KC.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/nfkd.c b/lib/uninorm/nfkd.c
index 70df2fd8..65bbdde5 100644
--- a/lib/uninorm/nfkd.c
+++ b/lib/uninorm/nfkd.c
@@ -1,5 +1,5 @@
/* Unicode Normalization Form KD.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/normalize-internal.h b/lib/uninorm/normalize-internal.h
index 430eb1d7..0c2f32e2 100644
--- a/lib/uninorm/normalize-internal.h
+++ b/lib/uninorm/normalize-internal.h
@@ -1,5 +1,5 @@
/* Normalization of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/u-normalize-internal.h b/lib/uninorm/u-normalize-internal.h
index f910f52f..826e1ec3 100644
--- a/lib/uninorm/u-normalize-internal.h
+++ b/lib/uninorm/u-normalize-internal.h
@@ -1,5 +1,5 @@
/* Decomposition and composition of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
@@ -24,16 +24,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
/* The result being accumulated. */
UNIT *result;
- size_t length;
size_t allocated;
- /* The buffer for sorting. */
- #define SORTBUF_PREALLOCATED 64
- struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED];
- struct ucs4_with_ccc *sortbuf; /* array of size 2 * sortbuf_allocated */
- size_t sortbuf_allocated;
- size_t sortbuf_count;
-
- /* Initialize the accumulator. */
if (resultbuf == NULL)
{
result = NULL;
@@ -44,12 +35,15 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
result = resultbuf;
allocated = *lengthp;
}
- length = 0;
+ size_t length = 0;
- /* Initialize the buffer for sorting. */
- sortbuf = sortbuf_preallocated;
- sortbuf_allocated = SORTBUF_PREALLOCATED;
- sortbuf_count = 0;
+ /* The buffer for sorting. */
+ #define SORTBUF_PREALLOCATED 64
+ struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED];
+ struct ucs4_with_ccc *sortbuf = /* array of size 2 * sortbuf_allocated */
+ sortbuf_preallocated;
+ size_t sortbuf_allocated = SORTBUF_PREALLOCATED;
+ size_t sortbuf_count = 0;
{
const UNIT *s_end = s + n;
@@ -59,7 +53,6 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
int count;
ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
int decomposed_count;
- int i;
if (s < s_end)
{
@@ -73,46 +66,40 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
the size of the decomposition tables, because for example for
U+1FC1 the recursive canonical decomposition and the recursive
compatibility decomposition are different. */
- {
- int curr;
-
- for (curr = 0; curr < decomposed_count; )
- {
- /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
- all elements are atomic. */
- ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
- int curr_decomposed_count;
-
- curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
- if (curr_decomposed_count >= 0)
- {
- /* Move curr_decomposed[0..curr_decomposed_count-1] over
- decomposed[curr], making room. It's not worth using
- memcpy() here, since the counts are so small. */
- int shift = curr_decomposed_count - 1;
-
- if (shift < 0)
- abort ();
- if (shift > 0)
- {
- int j;
+ for (int curr = 0; curr < decomposed_count; )
+ {
+ /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
+ all elements are atomic. */
+ ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+ int curr_decomposed_count;
- decomposed_count += shift;
- if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
- abort ();
- for (j = decomposed_count - 1 - shift; j > curr; j--)
- decomposed[j + shift] = decomposed[j];
- }
- for (; shift >= 0; shift--)
- decomposed[curr + shift] = curr_decomposed[shift];
- }
- else
- {
- /* decomposed[curr] is atomic. */
- curr++;
- }
- }
- }
+ curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
+ if (curr_decomposed_count >= 0)
+ {
+ /* Move curr_decomposed[0..curr_decomposed_count-1] over
+ decomposed[curr], making room. It's not worth using
+ memcpy() here, since the counts are so small. */
+ int shift = curr_decomposed_count - 1;
+
+ if (shift < 0)
+ abort ();
+ if (shift > 0)
+ {
+ decomposed_count += shift;
+ if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
+ abort ();
+ for (int j = decomposed_count - 1 - shift; j > curr; j--)
+ decomposed[j + shift] = decomposed[j];
+ }
+ for (; shift >= 0; shift--)
+ decomposed[curr + shift] = curr_decomposed[shift];
+ }
+ else
+ {
+ /* decomposed[curr] is atomic. */
+ curr++;
+ }
+ }
}
else
{
@@ -120,7 +107,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
decomposed_count = 0;
}
- i = 0;
+ int i = 0;
for (;;)
{
ucs4_t uc;
@@ -143,8 +130,6 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
if (ccc == 0)
{
- size_t j;
-
/* Apply the canonical ordering algorithm to the accumulated
sequence of characters. */
if (sortbuf_count > 1)
@@ -175,7 +160,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
(also a starter). */
if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
{
- for (j = 1; j < sortbuf_count; )
+ for (size_t j = 1; j < sortbuf_count; )
{
if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
{
@@ -183,11 +168,9 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
composer (sortbuf[0].code, sortbuf[j].code);
if (combined)
{
- size_t k;
-
sortbuf[0].code = combined;
/* sortbuf[0].ccc = 0, still valid. */
- for (k = j + 1; k < sortbuf_count; k++)
+ for (size_t k = j + 1; k < sortbuf_count; k++)
sortbuf[k - 1] = sortbuf[k];
sortbuf_count--;
continue;
@@ -212,7 +195,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
}
}
- for (j = 0; j < sortbuf_count; j++)
+ for (size_t j = 0; j < sortbuf_count; j++)
{
ucs4_t muc = sortbuf[j].code;
@@ -303,12 +286,10 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
/* Append (uc, ccc) to sortbuf. */
if (sortbuf_count == sortbuf_allocated)
{
- struct ucs4_with_ccc *new_sortbuf;
-
sortbuf_allocated = 2 * sortbuf_allocated;
if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
abort ();
- new_sortbuf =
+ struct ucs4_with_ccc *new_sortbuf =
(struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
if (new_sortbuf == NULL)
{
@@ -352,9 +333,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
else if (result != resultbuf && length < allocated)
{
/* Shrink the allocated memory if possible. */
- UNIT *memory;
-
- memory = (UNIT *) realloc (result, length * sizeof (UNIT));
+ UNIT *memory = (UNIT *) realloc (result, length * sizeof (UNIT));
if (memory != NULL)
result = memory;
}
diff --git a/lib/uninorm/u-normcmp.h b/lib/uninorm/u-normcmp.h
index 12f89d2b..ca1b24ba 100644
--- a/lib/uninorm/u-normcmp.h
+++ b/lib/uninorm/u-normcmp.h
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
@@ -27,24 +27,18 @@ int
FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2,
uninorm_t nf, int *resultp)
{
- UNIT buf1[2048 / sizeof (UNIT)];
- UNIT buf2[2048 / sizeof (UNIT)];
- UNIT *norms1;
- size_t norms1_length;
- UNIT *norms2;
- size_t norms2_length;
- int cmp;
-
/* Normalize S1. */
- norms1_length = sizeof (buf1) / sizeof (UNIT);
- norms1 = U_NORMALIZE (nf, s1, n1, buf1, &norms1_length);
+ UNIT buf1[2048 / sizeof (UNIT)];
+ size_t norms1_length = sizeof (buf1) / sizeof (UNIT);
+ UNIT *norms1 = U_NORMALIZE (nf, s1, n1, buf1, &norms1_length);
if (norms1 == NULL)
/* errno is set here. */
return -1;
/* Normalize S2. */
- norms2_length = sizeof (buf2) / sizeof (UNIT);
- norms2 = U_NORMALIZE (nf, s2, n2, buf2, &norms2_length);
+ UNIT buf2[2048 / sizeof (UNIT)];
+ size_t norms2_length = sizeof (buf2) / sizeof (UNIT);
+ UNIT *norms2 = U_NORMALIZE (nf, s2, n2, buf2, &norms2_length);
if (norms2 == NULL)
{
if (norms1 != buf1)
@@ -57,7 +51,7 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2,
}
/* Compare the normalized strings. */
- cmp = U_CMP2 (norms1, norms1_length, norms2, norms2_length);
+ int cmp = U_CMP2 (norms1, norms1_length, norms2, norms2_length);
if (cmp > 0)
cmp = 1;
else if (cmp < 0)
diff --git a/lib/uninorm/u-normcoll.h b/lib/uninorm/u-normcoll.h
index 948462c0..fd2dd71f 100644
--- a/lib/uninorm/u-normcoll.h
+++ b/lib/uninorm/u-normcoll.h
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
@@ -27,24 +27,18 @@ int
FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2,
uninorm_t nf, int *resultp)
{
- char buf1[2048];
- char buf2[2048];
- char *transformed1;
- size_t transformed1_length;
- char *transformed2;
- size_t transformed2_length;
- int cmp;
-
/* Normalize and transform S1. */
- transformed1_length = sizeof (buf1);
- transformed1 = U_NORMXFRM (s1, n1, nf, buf1, &transformed1_length);
+ char buf1[2048];
+ size_t transformed1_length = sizeof (buf1);
+ char *transformed1 = U_NORMXFRM (s1, n1, nf, buf1, &transformed1_length);
if (transformed1 == NULL)
/* errno is set here. */
return -1;
/* Normalize and transform S2. */
- transformed2_length = sizeof (buf2);
- transformed2 = U_NORMXFRM (s2, n2, nf, buf2, &transformed2_length);
+ char buf2[2048];
+ size_t transformed2_length = sizeof (buf2);
+ char *transformed2 = U_NORMXFRM (s2, n2, nf, buf2, &transformed2_length);
if (transformed2 == NULL)
{
if (transformed1 != buf1)
@@ -57,8 +51,8 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2,
}
/* Compare the transformed strings. */
- cmp = memcmp2 (transformed1, transformed1_length,
- transformed2, transformed2_length);
+ int cmp = memcmp2 (transformed1, transformed1_length,
+ transformed2, transformed2_length);
if (cmp < 0)
cmp = -1;
else if (cmp > 0)
diff --git a/lib/uninorm/u-normxfrm.h b/lib/uninorm/u-normxfrm.h
index 92bf051e..deee268a 100644
--- a/lib/uninorm/u-normxfrm.h
+++ b/lib/uninorm/u-normxfrm.h
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
@@ -27,24 +27,18 @@ char *
FUNC (const UNIT *s, size_t n, uninorm_t nf,
char *resultbuf, size_t *lengthp)
{
- UNIT normsbuf[2048 / sizeof (UNIT)];
- UNIT *norms;
- size_t norms_length;
- char convsbuf[2048];
- char *convs;
- size_t convs_length;
- char *result;
-
/* Normalize the Unicode string. */
- norms_length = sizeof (normsbuf) / sizeof (UNIT);
- norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length);
+ UNIT normsbuf[2048 / sizeof (UNIT)];
+ size_t norms_length = sizeof (normsbuf) / sizeof (UNIT);
+ UNIT *norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length);
if (norms == NULL)
/* errno is set here. */
return NULL;
/* Convert it to locale encoding. */
- convs_length = sizeof (convsbuf) - 1;
- convs = U_CONV_TO_ENCODING (locale_charset (),
+ char convsbuf[2048];
+ size_t convs_length = sizeof (convsbuf) - 1;
+ char *convs = U_CONV_TO_ENCODING (locale_charset (),
iconveh_error,
norms, norms_length,
NULL,
@@ -77,7 +71,7 @@ FUNC (const UNIT *s, size_t n, uninorm_t nf,
}
/* Apply locale dependent transformations for comparison. */
- result = amemxfrm (convs, convs_length, resultbuf, lengthp);
+ char *result = amemxfrm (convs, convs_length, resultbuf, lengthp);
if (result == NULL)
{
if (convs != convsbuf)
diff --git a/lib/uninorm/u16-normalize.c b/lib/uninorm/u16-normalize.c
index 13651747..60dc575a 100644
--- a/lib/uninorm/u16-normalize.c
+++ b/lib/uninorm/u16-normalize.c
@@ -1,5 +1,5 @@
/* Normalization of UTF-16 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u16-normcmp.c b/lib/uninorm/u16-normcmp.c
index 81c274b0..d715a3f8 100644
--- a/lib/uninorm/u16-normcmp.c
+++ b/lib/uninorm/u16-normcmp.c
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of UTF-16 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u16-normcoll.c b/lib/uninorm/u16-normcoll.c
index af686bd6..9573c7d5 100644
--- a/lib/uninorm/u16-normcoll.c
+++ b/lib/uninorm/u16-normcoll.c
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of UTF-16 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u16-normxfrm.c b/lib/uninorm/u16-normxfrm.c
index 5378cc51..8230c617 100644
--- a/lib/uninorm/u16-normxfrm.c
+++ b/lib/uninorm/u16-normxfrm.c
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of UTF-16 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u32-normalize.c b/lib/uninorm/u32-normalize.c
index 60f05233..e2312576 100644
--- a/lib/uninorm/u32-normalize.c
+++ b/lib/uninorm/u32-normalize.c
@@ -1,5 +1,5 @@
/* Normalization of UTF-32 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software: you can redistribute it and/or modify
diff --git a/lib/uninorm/u32-normcmp.c b/lib/uninorm/u32-normcmp.c
index 108ec242..570fae4c 100644
--- a/lib/uninorm/u32-normcmp.c
+++ b/lib/uninorm/u32-normcmp.c
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of UTF-32 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u32-normcoll.c b/lib/uninorm/u32-normcoll.c
index f2beb177..4be11ba7 100644
--- a/lib/uninorm/u32-normcoll.c
+++ b/lib/uninorm/u32-normcoll.c
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of UTF-32 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u32-normxfrm.c b/lib/uninorm/u32-normxfrm.c
index 88471a15..a434e9fc 100644
--- a/lib/uninorm/u32-normxfrm.c
+++ b/lib/uninorm/u32-normxfrm.c
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of UTF-32 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u8-normalize.c b/lib/uninorm/u8-normalize.c
index bcc97a33..aea9e2b5 100644
--- a/lib/uninorm/u8-normalize.c
+++ b/lib/uninorm/u8-normalize.c
@@ -1,5 +1,5 @@
/* Normalization of UTF-8 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u8-normcmp.c b/lib/uninorm/u8-normcmp.c
index aabef0e5..c77bf2fc 100644
--- a/lib/uninorm/u8-normcmp.c
+++ b/lib/uninorm/u8-normcmp.c
@@ -1,5 +1,5 @@
/* Normalization insensitive comparison of UTF-8 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u8-normcoll.c b/lib/uninorm/u8-normcoll.c
index ce919f5f..31439dce 100644
--- a/lib/uninorm/u8-normcoll.c
+++ b/lib/uninorm/u8-normcoll.c
@@ -1,5 +1,5 @@
/* Locale dependent, normalization insensitive comparison of UTF-8 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/u8-normxfrm.c b/lib/uninorm/u8-normxfrm.c
index aec7f2a5..7a4cc833 100644
--- a/lib/uninorm/u8-normxfrm.c
+++ b/lib/uninorm/u8-normxfrm.c
@@ -1,5 +1,5 @@
/* Locale dependent transformation for comparison of UTF-8 strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
diff --git a/lib/uninorm/uninorm-filter.c b/lib/uninorm/uninorm-filter.c
index 542013d6..c343a16a 100644
--- a/lib/uninorm/uninorm-filter.c
+++ b/lib/uninorm/uninorm-filter.c
@@ -1,5 +1,5 @@
/* Stream-based normalization of Unicode strings.
- Copyright (C) 2009-2025 Free Software Foundation, Inc.
+ Copyright (C) 2009-2026 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This file is free software.
@@ -95,55 +95,46 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
the size of the decomposition tables, because for example for
U+1FC1 the recursive canonical decomposition and the recursive
compatibility decomposition are different. */
- {
- int curr;
-
- for (curr = 0; curr < decomposed_count; )
- {
- /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
- all elements are atomic. */
- ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
- int curr_decomposed_count;
-
- curr_decomposed_count =
- filter->decomposer (decomposed[curr], curr_decomposed);
- if (curr_decomposed_count >= 0)
- {
- /* Move curr_decomposed[0..curr_decomposed_count-1] over
- decomposed[curr], making room. It's not worth using
- memcpy() here, since the counts are so small. */
- int shift = curr_decomposed_count - 1;
-
- if (shift < 0)
- abort ();
- if (shift > 0)
- {
- int j;
-
- decomposed_count += shift;
- if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
- abort ();
- for (j = decomposed_count - 1 - shift; j > curr; j--)
- decomposed[j + shift] = decomposed[j];
- }
- for (; shift >= 0; shift--)
- decomposed[curr + shift] = curr_decomposed[shift];
- }
- else
- {
- /* decomposed[curr] is atomic. */
- curr++;
- }
- }
- }
+ for (int curr = 0; curr < decomposed_count; )
+ {
+ /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
+ all elements are atomic. */
+ ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+ int curr_decomposed_count =
+ filter->decomposer (decomposed[curr], curr_decomposed);
+ if (curr_decomposed_count >= 0)
+ {
+ /* Move curr_decomposed[0..curr_decomposed_count-1] over
+ decomposed[curr], making room. It's not worth using
+ memcpy() here, since the counts are so small. */
+ int shift = curr_decomposed_count - 1;
+
+ if (shift < 0)
+ abort ();
+ if (shift > 0)
+ {
+ decomposed_count += shift;
+ if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
+ abort ();
+ for (int j = decomposed_count - 1 - shift; j > curr; j--)
+ decomposed[j + shift] = decomposed[j];
+ }
+ for (; shift >= 0; shift--)
+ decomposed[curr + shift] = curr_decomposed[shift];
+ }
+ else
+ {
+ /* decomposed[curr] is atomic. */
+ curr++;
+ }
+ }
{
/* Cache sortbuf and sortbuf_count in local register variables. */
struct ucs4_with_ccc *sortbuf = filter->sortbuf;
size_t sortbuf_count = filter->sortbuf_count;
- int i;
- for (i = 0; i < decomposed_count; i++)
+ for (int i = 0; i < decomposed_count; i++)
{
/* Fetch the next character from the decomposition. */
ucs4_t uc = decomposed[i];
@@ -151,8 +142,6 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
if (ccc == 0)
{
- size_t j;
-
/* Apply the canonical ordering algorithm to the accumulated
sequence of characters. */
if (sortbuf_count > 1)
@@ -183,7 +172,7 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
(also a starter). */
if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
{
- for (j = 1; j < sortbuf_count; )
+ for (size_t j = 1; j < sortbuf_count; )
{
if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
{
@@ -191,11 +180,9 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
filter->composer (sortbuf[0].code, sortbuf[j].code);
if (combined)
{
- size_t k;
-
sortbuf[0].code = combined;
/* sortbuf[0].ccc = 0, still valid. */
- for (k = j + 1; k < sortbuf_count; k++)
+ for (size_t k = j + 1; k < sortbuf_count; k++)
sortbuf[k - 1] = sortbuf[k];
sortbuf_count--;
continue;
@@ -220,7 +207,7 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
}
}
- for (j = 0; j < sortbuf_count; j++)
+ for (size_t j = 0; j < sortbuf_count; j++)
{
ucs4_t muc = sortbuf[j].code;
@@ -241,12 +228,10 @@ uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
/* Append (uc, ccc) to sortbuf. */
if (sortbuf_count == filter->sortbuf_allocated)
{
- struct ucs4_with_ccc *new_sortbuf;
-
filter->sortbuf_allocated = 2 * filter->sortbuf_allocated;
if (filter->sortbuf_allocated < sortbuf_count) /* integer overflow? */
abort ();
- new_sortbuf =
+ struct ucs4_with_ccc *new_sortbuf =
(struct ucs4_with_ccc *)
malloc (2 * filter->sortbuf_allocated * sizeof (struct ucs4_with_ccc));
if (new_sortbuf == NULL)
@@ -286,7 +271,6 @@ uninorm_filter_flush (struct uninorm_filter *filter)
/* Cache sortbuf and sortbuf_count in local register variables. */
struct ucs4_with_ccc * const sortbuf = filter->sortbuf;
size_t sortbuf_count = filter->sortbuf_count;
- size_t j;
/* Apply the canonical ordering algorithm to the accumulated
sequence of characters. */
@@ -318,7 +302,7 @@ uninorm_filter_flush (struct uninorm_filter *filter)
(also a starter). */
if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
{
- for (j = 1; j < sortbuf_count; )
+ for (size_t j = 1; j < sortbuf_count; )
{
if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
{
@@ -326,11 +310,9 @@ uninorm_filter_flush (struct uninorm_filter *filter)
filter->composer (sortbuf[0].code, sortbuf[j].code);
if (combined)
{
- size_t k;
-
sortbuf[0].code = combined;
/* sortbuf[0].ccc = 0, still valid. */
- for (k = j + 1; k < sortbuf_count; k++)
+ for (size_t k = j + 1; k < sortbuf_count; k++)
sortbuf[k - 1] = sortbuf[k];
sortbuf_count--;
continue;
@@ -341,7 +323,7 @@ uninorm_filter_flush (struct uninorm_filter *filter)
}
}
- for (j = 0; j < sortbuf_count; j++)
+ for (size_t j = 0; j < sortbuf_count; j++)
{
ucs4_t muc = sortbuf[j].code;