diff options
Diffstat (limited to 'lib/uninorm.in.h')
-rw-r--r-- | lib/uninorm.in.h | 251 |
1 files changed, 0 insertions, 251 deletions
diff --git a/lib/uninorm.in.h b/lib/uninorm.in.h deleted file mode 100644 index 747ec37e..00000000 --- a/lib/uninorm.in.h +++ /dev/null @@ -1,251 +0,0 @@ -/* Normalization forms (composition and decomposition) of Unicode strings. - Copyright (C) 2001-2002, 2009-2015 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2009. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#ifndef _UNINORM_H -#define _UNINORM_H - -/* Get LIBUNISTRING_DLL_VARIABLE. */ -#include <unistring/woe32dll.h> - -/* Get size_t. */ -#include <stddef.h> - -#include "unitypes.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -/* Conventions: - - All functions prefixed with u8_ operate on UTF-8 encoded strings. - Their unit is an uint8_t (1 byte). - - All functions prefixed with u16_ operate on UTF-16 encoded strings. - Their unit is an uint16_t (a 2-byte word). - - All functions prefixed with u32_ operate on UCS-4 encoded strings. - Their unit is an uint32_t (a 4-byte word). - - All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly - n units. - - Functions returning a string result take a (resultbuf, lengthp) argument - pair. If resultbuf is not NULL and the result fits into *lengthp units, - it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly - allocated string is returned. In both cases, *lengthp is set to the - length (number of units) of the returned string. In case of error, - NULL is returned and errno is set. */ - - -enum -{ - UC_DECOMP_CANONICAL,/* Canonical decomposition. */ - UC_DECOMP_FONT, /* <font> A font variant (e.g. a blackletter form). */ - UC_DECOMP_NOBREAK, /* <noBreak> A no-break version of a space or hyphen. */ - UC_DECOMP_INITIAL, /* <initial> An initial presentation form (Arabic). */ - UC_DECOMP_MEDIAL, /* <medial> A medial presentation form (Arabic). */ - UC_DECOMP_FINAL, /* <final> A final presentation form (Arabic). */ - UC_DECOMP_ISOLATED,/* <isolated> An isolated presentation form (Arabic). */ - UC_DECOMP_CIRCLE, /* <circle> An encircled form. */ - UC_DECOMP_SUPER, /* <super> A superscript form. */ - UC_DECOMP_SUB, /* <sub> A subscript form. */ - UC_DECOMP_VERTICAL,/* <vertical> A vertical layout presentation form. */ - UC_DECOMP_WIDE, /* <wide> A wide (or zenkaku) compatibility character. */ - UC_DECOMP_NARROW, /* <narrow> A narrow (or hankaku) compatibility character. */ - UC_DECOMP_SMALL, /* <small> A small variant form (CNS compatibility). */ - UC_DECOMP_SQUARE, /* <square> A CJK squared font variant. */ - UC_DECOMP_FRACTION,/* <fraction> A vulgar fraction form. */ - UC_DECOMP_COMPAT /* <compat> Otherwise unspecified compatibility character. */ -}; - -/* Maximum size of decomposition of a single Unicode character. */ -#define UC_DECOMPOSITION_MAX_LENGTH 32 - -/* Return the character decomposition mapping of a Unicode character. - DECOMPOSITION must point to an array of at least UC_DECOMPOSITION_MAX_LENGTH - ucs_t elements. - When a decomposition exists, DECOMPOSITION[0..N-1] and *DECOMP_TAG are - filled and N is returned. Otherwise -1 is returned. */ -extern int - uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition); - -/* Return the canonical character decomposition mapping of a Unicode character. - DECOMPOSITION must point to an array of at least UC_DECOMPOSITION_MAX_LENGTH - ucs_t elements. - When a decomposition exists, DECOMPOSITION[0..N-1] is filled and N is - returned. Otherwise -1 is returned. */ -extern int - uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition); - - -/* Attempt to combine the Unicode characters uc1, uc2. - uc1 is known to have canonical combining class 0. - Return the combination of uc1 and uc2, if it exists. - Return 0 otherwise. - Not all decompositions can be recombined using this function. See the - Unicode file CompositionExclusions.txt for details. */ -extern ucs4_t - uc_composition (ucs4_t uc1, ucs4_t uc2) - _UC_ATTRIBUTE_CONST; - - -/* An object of type uninorm_t denotes a Unicode normalization form. */ -struct unicode_normalization_form; -typedef const struct unicode_normalization_form *uninorm_t; - -/* UNINORM_NFD: Normalization form D: canonical decomposition. */ -extern LIBUNISTRING_DLL_VARIABLE const struct unicode_normalization_form uninorm_nfd; -#define UNINORM_NFD (&uninorm_nfd) - -/* UNINORM_NFC: Normalization form C: canonical decomposition, then - canonical composition. */ -extern LIBUNISTRING_DLL_VARIABLE const struct unicode_normalization_form uninorm_nfc; -#define UNINORM_NFC (&uninorm_nfc) - -/* UNINORM_NFKD: Normalization form KD: compatibility decomposition. */ -extern LIBUNISTRING_DLL_VARIABLE const struct unicode_normalization_form uninorm_nfkd; -#define UNINORM_NFKD (&uninorm_nfkd) - -/* UNINORM_NFKC: Normalization form KC: compatibility decomposition, then - canonical composition. */ -extern LIBUNISTRING_DLL_VARIABLE const struct unicode_normalization_form uninorm_nfkc; -#define UNINORM_NFKC (&uninorm_nfkc) - -/* Test whether a normalization form does compatibility decomposition. */ -#define uninorm_is_compat_decomposing(nf) \ - ((* (const unsigned int *) (nf) >> 0) & 1) - -/* Test whether a normalization form includes canonical composition. */ -#define uninorm_is_composing(nf) \ - ((* (const unsigned int *) (nf) >> 1) & 1) - -/* Return the decomposing variant of a normalization form. - This maps NFC,NFD -> NFD and NFKC,NFKD -> NFKD. */ -extern uninorm_t - uninorm_decomposing_form (uninorm_t nf) - _UC_ATTRIBUTE_PURE; - - -/* Return the specified normalization form of a string. */ -extern uint8_t * - u8_normalize (uninorm_t nf, const uint8_t *s, size_t n, - uint8_t *resultbuf, size_t *lengthp); -extern uint16_t * - u16_normalize (uninorm_t nf, const uint16_t *s, size_t n, - uint16_t *resultbuf, size_t *lengthp); -extern uint32_t * - u32_normalize (uninorm_t nf, const uint32_t *s, size_t n, - uint32_t *resultbuf, size_t *lengthp); - - -/* Compare S1 and S2, ignoring differences in normalization. - NF must be either UNINORM_NFD or UNINORM_NFKD. - If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and - return 0. Upon failure, return -1 with errno set. */ -extern int - u8_normcmp (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, - uninorm_t nf, int *resultp); -extern int - u16_normcmp (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, - uninorm_t nf, int *resultp); -extern int - u32_normcmp (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, - uninorm_t nf, int *resultp); - - -/* Converts the string S of length N to a NUL-terminated byte sequence, in such - a way that comparing uN_normxfrm (S1) and uN_normxfrm (S2) with uN_cmp2() is - equivalent to comparing S1 and S2 with uN_normcoll(). - NF must be either UNINORM_NFC or UNINORM_NFKC. */ -extern char * - u8_normxfrm (const uint8_t *s, size_t n, uninorm_t nf, - char *resultbuf, size_t *lengthp); -extern char * - u16_normxfrm (const uint16_t *s, size_t n, uninorm_t nf, - char *resultbuf, size_t *lengthp); -extern char * - u32_normxfrm (const uint32_t *s, size_t n, uninorm_t nf, - char *resultbuf, size_t *lengthp); - - -/* Compare S1 and S2, ignoring differences in normalization, using the - collation rules of the current locale. - NF must be either UNINORM_NFC or UNINORM_NFKC. - If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and - return 0. Upon failure, return -1 with errno set. */ -extern int - u8_normcoll (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, - uninorm_t nf, int *resultp); -extern int - u16_normcoll (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, - uninorm_t nf, int *resultp); -extern int - u32_normcoll (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, - uninorm_t nf, int *resultp); - - -/* Normalization of a stream of Unicode characters. - - A "stream of Unicode characters" is essentially a function that accepts an - ucs4_t argument repeatedly, optionally combined with a function that - "flushes" the stream. */ - -/* Data type of a stream of Unicode characters that normalizes its input - according to a given normalization form and passes the normalized character - sequence to the encapsulated stream of Unicode characters. */ -struct uninorm_filter; - -/* Create and return a normalization filter for Unicode characters. - The pair (stream_func, stream_data) is the encapsulated stream. - stream_func (stream_data, uc) receives the Unicode character uc - and returns 0 if successful, or -1 with errno set upon failure. - Return the new filter, or NULL with errno set upon failure. */ -extern struct uninorm_filter * - uninorm_filter_create (uninorm_t nf, - int (*stream_func) (void *stream_data, ucs4_t uc), - void *stream_data); - -/* Stuff a Unicode character into a normalizing filter. - Return 0 if successful, or -1 with errno set upon failure. */ -extern int - uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc); - -/* Bring data buffered in the filter to its destination, the encapsulated - stream. - Return 0 if successful, or -1 with errno set upon failure. - Note! If after calling this function, additional characters are written - into the filter, the resulting character sequence in the encapsulated stream - will not necessarily be normalized. */ -extern int - uninorm_filter_flush (struct uninorm_filter *filter); - -/* Bring data buffered in the filter to its destination, the encapsulated - stream, then close and free the filter. - Return 0 if successful, or -1 with errno set upon failure. */ -extern int - uninorm_filter_free (struct uninorm_filter *filter); - - -#ifdef __cplusplus -} -#endif - - -#endif /* _UNINORM_H */ |