diff options
Diffstat (limited to 'doc')
42 files changed, 22375 insertions, 0 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 00000000..a66c69da --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,185 @@ +## Makefile for the doc subdirectory of GNU libunistring. +## Copyright (C) 2009 Free Software Foundation, Inc. +## +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see <http://www.gnu.org/licenses/>. + +## Process this file with automake to produce Makefile.in. + +AUTOMAKE_OPTIONS = 1.5 gnits +EXTRA_DIST = +MOSTLYCLEANFILES = + +# List of -I options referring to directories that contain texinfo sources +# used by this directory. +# Should contain at least one -I option, to work around a bug in texi2dvi 1.13, +# see <http://lists.gnu.org/archive/html/bug-automake/2009-04/msg00029.html>. +TEXINCLUDES = -I . + +MAKEINFO = env LANG= LC_MESSAGES= LC_ALL= LANGUAGE= @MAKEINFO@ +MAKEINFOFLAGS = $(TEXINCLUDES) --no-split + +info_TEXINFOS = libunistring.texi +# List of texinfo sources @included by libunistring.texi, excluding version.texi. +libunistring_TEXINFOS = \ + unitypes.texi unistr.texi uniconv.texi unistdio.texi uniname.texi \ + unictype.texi uniwidth.texi uniwbrk.texi unilbrk.texi uninorm.texi \ + unicase.texi uniregex.texi \ + gpl.texi lgpl.texi fdl.texi + +# The dependencies of stamp-vti generated by automake are incomplete. +# So we have to duplicate the entire rule which would otherwise be generated +# by automake. +$(srcdir)/stamp-vti: $(info_TEXINFOS) $(libunistring_TEXINFOS) $(top_srcdir)/version.sh + (dir=.; test -f ./libunistring.texi || dir=$(srcdir); \ + set `$(SHELL) $(top_srcdir)/build-aux/mdate-sh $$dir/libunistring.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp + cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + rm -f vti.tmp + cp $(srcdir)/version.texi $@ + +# We distribute only the split HTML documentation. +# The user can generate the others, via +# make libunistring.ps +# make libunistring.pdf +# make libunistring.html + +all-local: html-local +install-data-local: install-html +installdirs-local: installdirs-html +uninstall-local: uninstall-html +dist-hook: dist-html + +html-local: html-split +# Override of automake's definition. The HTML files we want to distribute are +# not the ones that automake knows about, and we cannot define HTMLS to a value +# containing wildcards. +install-html: install-html-split + @: +uninstall-html: uninstall-html-split +dist-html: dist-html-split + +# CLEANFILES: libunistring.{dvi,ps,pdf,html} are already known to automake. +MAINTAINERCLEANFILES = libunistring_*.html + + +# Documentation in DVI format. + +# Override of automake's definition: +#TEXI2DVI = @TEXI2DVI@ +TEXI2DVI = @TEXI2DVI@ $(TEXINCLUDES) + +# The install-dvi target is already defined by automake. + +installdirs-dvi: + $(mkdir_p) $(DESTDIR)$(dvidir) + +uninstall-dvi: + $(RM) $(DESTDIR)$(dvidir)/libunistring.dvi + + +# Documentation in Postscript format. + +# Override of automake's definition: +#DVIPS = @DVIPS@ +DVIPS = @DVIPS@ -D600 + +libunistring.ps: libunistring.dvi + $(DVIPS) -o $@ `if test -f libunistring.dvi; then echo libunistring.dvi; else echo $(srcdir)/libunistring.dvi; fi` + +# The install-ps target is already defined by automake. + +installdirs-ps: + $(mkdir_p) $(DESTDIR)$(psdir) + +uninstall-ps: + $(RM) $(DESTDIR)$(psdir)/libunistring.ps + + +# Documentation in Portable Document Format. + +# Override of automake's definition: +#TEXI2PDF = @TEXI2DVI@ --pdf +TEXI2PDF = @TEXI2DVI@ --pdf $(TEXINCLUDES) + +# The install-pdf target is already defined by automake. + +installdirs-pdf: + $(mkdir_p) $(DESTDIR)$(pdfdir) + +uninstall-pdf: + $(RM) $(DESTDIR)$(pdfdir)/libunistring.pdf + + +# Documentation in HTML format. + +TEXI2HTML = @PERL@ $(top_srcdir)/build-aux/texi2html + +html-monolithic: libunistring.html +html-split: libunistring_toc.html + +# Override of automake's definition. +# We want to use texi2html, not makeinfo --html. +libunistring.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -monolithic `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` + +libunistring_toc.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + case "@PERL@" in \ + *"/missing perl") \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` || exit 0 ;; \ + *) $(RM) libunistring_*.html ; \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` ;; \ + esac \ + && { mv libunistring/libunistring.html libunistring_toc.html; \ + mv libunistring/*.html .; \ + rmdir libunistring; \ + } + +install-html-monolithic: libunistring.html + $(mkdir_p) $(DESTDIR)$(htmldir) + $(INSTALL_DATA) `if test -f libunistring.html; then echo .; else echo $(srcdir); fi`/libunistring.html $(DESTDIR)$(htmldir)/libunistring.html + +install-html-split: libunistring_toc.html + $(mkdir_p) $(DESTDIR)$(htmldir) + for file in `if test -f libunistring_toc.html; then echo .; else echo $(srcdir); fi`/libunistring_*.html; do \ + $(INSTALL_DATA) $$file $(DESTDIR)$(htmldir)/`basename $$file`; \ + done + +installdirs-html: + $(mkdir_p) $(DESTDIR)$(htmldir) + +uninstall-html-monolithic: + $(RM) $(DESTDIR)$(htmldir)/libunistring.html + +uninstall-html-split: + $(RM) $(DESTDIR)$(htmldir)/libunistring_*.html + +dist-html-monolithic: + $(mkdir_p) $(distdir)/ + file=libunistring.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + cp -p $$d/$$file $(distdir)/$$file || exit 1 + +# We would like to put libunistring_*.html into EXTRA_DIST, but it doesn't work. +dist-html-split: + $(mkdir_p) $(distdir)/ + file=libunistring_toc.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + for file in `cd $$d && echo libunistring_*.html`; do \ + cp -p $$d/$$file $(distdir)/$$file || exit 1; \ + done diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 00000000..66a4a3bc --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,1207 @@ +# Makefile.in generated by automake 1.11 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc +DIST_COMMON = $(libunistring_TEXINFOS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/stamp-vti \ + $(srcdir)/version.texi +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/gnulib-m4/00gnulib.m4 \ + $(top_srcdir)/gnulib-m4/alloca.m4 \ + $(top_srcdir)/gnulib-m4/codeset.m4 \ + $(top_srcdir)/gnulib-m4/eealloc.m4 \ + $(top_srcdir)/gnulib-m4/environ.m4 \ + $(top_srcdir)/gnulib-m4/errno_h.m4 \ + $(top_srcdir)/gnulib-m4/error.m4 \ + $(top_srcdir)/gnulib-m4/exitfail.m4 \ + $(top_srcdir)/gnulib-m4/exponentd.m4 \ + $(top_srcdir)/gnulib-m4/exponentf.m4 \ + $(top_srcdir)/gnulib-m4/exponentl.m4 \ + $(top_srcdir)/gnulib-m4/extensions.m4 \ + $(top_srcdir)/gnulib-m4/float_h.m4 \ + $(top_srcdir)/gnulib-m4/fpieee.m4 \ + $(top_srcdir)/gnulib-m4/frexp.m4 \ + $(top_srcdir)/gnulib-m4/frexpl.m4 \ + $(top_srcdir)/gnulib-m4/getpagesize.m4 \ + $(top_srcdir)/gnulib-m4/glibc21.m4 \ + $(top_srcdir)/gnulib-m4/gnulib-common.m4 \ + $(top_srcdir)/gnulib-m4/gnulib-comp.m4 \ + $(top_srcdir)/gnulib-m4/iconv.m4 \ + $(top_srcdir)/gnulib-m4/iconv_h.m4 \ + $(top_srcdir)/gnulib-m4/iconv_open.m4 \ + $(top_srcdir)/gnulib-m4/include_next.m4 \ + $(top_srcdir)/gnulib-m4/inline.m4 \ + $(top_srcdir)/gnulib-m4/intlmacosx.m4 \ + $(top_srcdir)/gnulib-m4/intmax_t.m4 \ + $(top_srcdir)/gnulib-m4/inttypes_h.m4 \ + $(top_srcdir)/gnulib-m4/isnand.m4 \ + $(top_srcdir)/gnulib-m4/isnanf.m4 \ + $(top_srcdir)/gnulib-m4/isnanl.m4 \ + $(top_srcdir)/gnulib-m4/lcmessage.m4 \ + $(top_srcdir)/gnulib-m4/ldexpl.m4 \ + $(top_srcdir)/gnulib-m4/lib-ld.m4 \ + $(top_srcdir)/gnulib-m4/lib-link.m4 \ + $(top_srcdir)/gnulib-m4/lib-prefix.m4 \ + $(top_srcdir)/gnulib-m4/localcharset.m4 \ + $(top_srcdir)/gnulib-m4/locale-fr.m4 \ + $(top_srcdir)/gnulib-m4/locale-ja.m4 \ + $(top_srcdir)/gnulib-m4/locale-tr.m4 \ + $(top_srcdir)/gnulib-m4/locale-zh.m4 \ + $(top_srcdir)/gnulib-m4/locale_h.m4 \ + $(top_srcdir)/gnulib-m4/localename.m4 \ + $(top_srcdir)/gnulib-m4/longlong.m4 \ + $(top_srcdir)/gnulib-m4/malloc.m4 \ + $(top_srcdir)/gnulib-m4/malloca.m4 \ + $(top_srcdir)/gnulib-m4/math_h.m4 \ + $(top_srcdir)/gnulib-m4/mbchar.m4 \ + $(top_srcdir)/gnulib-m4/mbiter.m4 \ + $(top_srcdir)/gnulib-m4/mbrtowc.m4 \ + $(top_srcdir)/gnulib-m4/mbsinit.m4 \ + $(top_srcdir)/gnulib-m4/mbstate_t.m4 \ + $(top_srcdir)/gnulib-m4/memchr.m4 \ + $(top_srcdir)/gnulib-m4/minmax.m4 \ + $(top_srcdir)/gnulib-m4/mmap-anon.m4 \ + $(top_srcdir)/gnulib-m4/multiarch.m4 \ + $(top_srcdir)/gnulib-m4/nocrash.m4 \ + $(top_srcdir)/gnulib-m4/printf-frexp.m4 \ + $(top_srcdir)/gnulib-m4/printf-frexpl.m4 \ + $(top_srcdir)/gnulib-m4/printf.m4 \ + $(top_srcdir)/gnulib-m4/relocatable-lib.m4 \ + $(top_srcdir)/gnulib-m4/setenv.m4 \ + $(top_srcdir)/gnulib-m4/signbit.m4 \ + $(top_srcdir)/gnulib-m4/size_max.m4 \ + $(top_srcdir)/gnulib-m4/stdbool.m4 \ + $(top_srcdir)/gnulib-m4/stdint.m4 \ + $(top_srcdir)/gnulib-m4/stdint_h.m4 \ + $(top_srcdir)/gnulib-m4/stdlib_h.m4 \ + $(top_srcdir)/gnulib-m4/strerror.m4 \ + $(top_srcdir)/gnulib-m4/string_h.m4 \ + $(top_srcdir)/gnulib-m4/unistd_h.m4 \ + $(top_srcdir)/gnulib-m4/vasnprintf.m4 \ + $(top_srcdir)/gnulib-m4/wchar.m4 \ + $(top_srcdir)/gnulib-m4/wchar_t.m4 \ + $(top_srcdir)/gnulib-m4/wctob.m4 \ + $(top_srcdir)/gnulib-m4/wctype.m4 \ + $(top_srcdir)/gnulib-m4/wcwidth.m4 \ + $(top_srcdir)/gnulib-m4/wint_t.m4 \ + $(top_srcdir)/gnulib-m4/xalloc.m4 \ + $(top_srcdir)/gnulib-m4/xsize.m4 $(top_srcdir)/m4/exported.m4 \ + $(top_srcdir)/m4/init-package-version.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/woe32-dll.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_GEN = $(am__v_GEN_$(V)) +am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) +am__v_GEN_0 = @echo " GEN " $@; +AM_V_at = $(am__v_at_$(V)) +am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) +am__v_at_0 = @ +SOURCES = +DIST_SOURCES = +INFO_DEPS = $(srcdir)/libunistring.info +TEXINFO_TEX = $(top_srcdir)/build-aux/texinfo.tex +am__TEXINFO_TEX_DIR = $(top_srcdir)/build-aux +DVIS = libunistring.dvi +PDFS = libunistring.pdf +PSS = libunistring.ps +HTMLS = libunistring.html +TEXINFOS = libunistring.texi +MAKEINFOHTML = $(MAKEINFO) --html +AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS) +am__installdirs = "$(DESTDIR)$(infodir)" +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALLOCA_H = @ALLOCA_H@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPLE_UNIVERSAL_BUILD = @APPLE_UNIVERSAL_BUILD@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BITSIZEOF_PTRDIFF_T = @BITSIZEOF_PTRDIFF_T@ +BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@ +BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@ +BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@ +BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLL_VARIABLE = @DLL_VARIABLE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ + +# Documentation in Postscript format. + +# Override of automake's definition: +#DVIPS = @DVIPS@ +DVIPS = @DVIPS@ -D600 +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EMULTIHOP_HIDDEN = @EMULTIHOP_HIDDEN@ +EMULTIHOP_VALUE = @EMULTIHOP_VALUE@ +ENOLINK_HIDDEN = @ENOLINK_HIDDEN@ +ENOLINK_VALUE = @ENOLINK_VALUE@ +EOVERFLOW_HIDDEN = @EOVERFLOW_HIDDEN@ +EOVERFLOW_VALUE = @EOVERFLOW_VALUE@ +ERRNO_H = @ERRNO_H@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLOAT_H = @FLOAT_H@ +GLIBC21 = @GLIBC21@ +GLOBAL_SYMBOL_PIPE = @GLOBAL_SYMBOL_PIPE@ +GNULIB_ATOLL = @GNULIB_ATOLL@ +GNULIB_BTOWC = @GNULIB_BTOWC@ +GNULIB_CALLOC_POSIX = @GNULIB_CALLOC_POSIX@ +GNULIB_CEILF = @GNULIB_CEILF@ +GNULIB_CEILL = @GNULIB_CEILL@ +GNULIB_CHOWN = @GNULIB_CHOWN@ +GNULIB_CLOSE = @GNULIB_CLOSE@ +GNULIB_DUP2 = @GNULIB_DUP2@ +GNULIB_ENVIRON = @GNULIB_ENVIRON@ +GNULIB_EUIDACCESS = @GNULIB_EUIDACCESS@ +GNULIB_FCHDIR = @GNULIB_FCHDIR@ +GNULIB_FLOORF = @GNULIB_FLOORF@ +GNULIB_FLOORL = @GNULIB_FLOORL@ +GNULIB_FREXP = @GNULIB_FREXP@ +GNULIB_FREXPL = @GNULIB_FREXPL@ +GNULIB_FSYNC = @GNULIB_FSYNC@ +GNULIB_FTRUNCATE = @GNULIB_FTRUNCATE@ +GNULIB_GETCWD = @GNULIB_GETCWD@ +GNULIB_GETDOMAINNAME = @GNULIB_GETDOMAINNAME@ +GNULIB_GETDTABLESIZE = @GNULIB_GETDTABLESIZE@ +GNULIB_GETHOSTNAME = @GNULIB_GETHOSTNAME@ +GNULIB_GETLOADAVG = @GNULIB_GETLOADAVG@ +GNULIB_GETLOGIN_R = @GNULIB_GETLOGIN_R@ +GNULIB_GETPAGESIZE = @GNULIB_GETPAGESIZE@ +GNULIB_GETSUBOPT = @GNULIB_GETSUBOPT@ +GNULIB_GETUSERSHELL = @GNULIB_GETUSERSHELL@ +GNULIB_ISFINITE = @GNULIB_ISFINITE@ +GNULIB_ISINF = @GNULIB_ISINF@ +GNULIB_ISNAN = @GNULIB_ISNAN@ +GNULIB_ISNAND = @GNULIB_ISNAND@ +GNULIB_ISNANF = @GNULIB_ISNANF@ +GNULIB_ISNANL = @GNULIB_ISNANL@ +GNULIB_LCHOWN = @GNULIB_LCHOWN@ +GNULIB_LDEXPL = @GNULIB_LDEXPL@ +GNULIB_LINK = @GNULIB_LINK@ +GNULIB_LSEEK = @GNULIB_LSEEK@ +GNULIB_MALLOC_POSIX = @GNULIB_MALLOC_POSIX@ +GNULIB_MATHL = @GNULIB_MATHL@ +GNULIB_MBRLEN = @GNULIB_MBRLEN@ +GNULIB_MBRTOWC = @GNULIB_MBRTOWC@ +GNULIB_MBSCASECMP = @GNULIB_MBSCASECMP@ +GNULIB_MBSCASESTR = @GNULIB_MBSCASESTR@ +GNULIB_MBSCHR = @GNULIB_MBSCHR@ +GNULIB_MBSCSPN = @GNULIB_MBSCSPN@ +GNULIB_MBSINIT = @GNULIB_MBSINIT@ +GNULIB_MBSLEN = @GNULIB_MBSLEN@ +GNULIB_MBSNCASECMP = @GNULIB_MBSNCASECMP@ +GNULIB_MBSNLEN = @GNULIB_MBSNLEN@ +GNULIB_MBSNRTOWCS = @GNULIB_MBSNRTOWCS@ +GNULIB_MBSPBRK = @GNULIB_MBSPBRK@ +GNULIB_MBSPCASECMP = @GNULIB_MBSPCASECMP@ +GNULIB_MBSRCHR = @GNULIB_MBSRCHR@ +GNULIB_MBSRTOWCS = @GNULIB_MBSRTOWCS@ +GNULIB_MBSSEP = @GNULIB_MBSSEP@ +GNULIB_MBSSPN = @GNULIB_MBSSPN@ +GNULIB_MBSSTR = @GNULIB_MBSSTR@ +GNULIB_MBSTOK_R = @GNULIB_MBSTOK_R@ +GNULIB_MEMCHR = @GNULIB_MEMCHR@ +GNULIB_MEMMEM = @GNULIB_MEMMEM@ +GNULIB_MEMPCPY = @GNULIB_MEMPCPY@ +GNULIB_MEMRCHR = @GNULIB_MEMRCHR@ +GNULIB_MKDTEMP = @GNULIB_MKDTEMP@ +GNULIB_MKSTEMP = @GNULIB_MKSTEMP@ +GNULIB_PUTENV = @GNULIB_PUTENV@ +GNULIB_RANDOM_R = @GNULIB_RANDOM_R@ +GNULIB_RAWMEMCHR = @GNULIB_RAWMEMCHR@ +GNULIB_READLINK = @GNULIB_READLINK@ +GNULIB_REALLOC_POSIX = @GNULIB_REALLOC_POSIX@ +GNULIB_ROUND = @GNULIB_ROUND@ +GNULIB_ROUNDF = @GNULIB_ROUNDF@ +GNULIB_ROUNDL = @GNULIB_ROUNDL@ +GNULIB_RPMATCH = @GNULIB_RPMATCH@ +GNULIB_SETENV = @GNULIB_SETENV@ +GNULIB_SIGNBIT = @GNULIB_SIGNBIT@ +GNULIB_SLEEP = @GNULIB_SLEEP@ +GNULIB_STPCPY = @GNULIB_STPCPY@ +GNULIB_STPNCPY = @GNULIB_STPNCPY@ +GNULIB_STRCASESTR = @GNULIB_STRCASESTR@ +GNULIB_STRCHRNUL = @GNULIB_STRCHRNUL@ +GNULIB_STRDUP = @GNULIB_STRDUP@ +GNULIB_STRERROR = @GNULIB_STRERROR@ +GNULIB_STRNDUP = @GNULIB_STRNDUP@ +GNULIB_STRNLEN = @GNULIB_STRNLEN@ +GNULIB_STRPBRK = @GNULIB_STRPBRK@ +GNULIB_STRSEP = @GNULIB_STRSEP@ +GNULIB_STRSIGNAL = @GNULIB_STRSIGNAL@ +GNULIB_STRSTR = @GNULIB_STRSTR@ +GNULIB_STRTOD = @GNULIB_STRTOD@ +GNULIB_STRTOK_R = @GNULIB_STRTOK_R@ +GNULIB_STRTOLL = @GNULIB_STRTOLL@ +GNULIB_STRTOULL = @GNULIB_STRTOULL@ +GNULIB_STRVERSCMP = @GNULIB_STRVERSCMP@ +GNULIB_TRUNC = @GNULIB_TRUNC@ +GNULIB_TRUNCF = @GNULIB_TRUNCF@ +GNULIB_TRUNCL = @GNULIB_TRUNCL@ +GNULIB_UNISTD_H_SIGPIPE = @GNULIB_UNISTD_H_SIGPIPE@ +GNULIB_UNSETENV = @GNULIB_UNSETENV@ +GNULIB_WCRTOMB = @GNULIB_WCRTOMB@ +GNULIB_WCSNRTOMBS = @GNULIB_WCSNRTOMBS@ +GNULIB_WCSRTOMBS = @GNULIB_WCSRTOMBS@ +GNULIB_WCTOB = @GNULIB_WCTOB@ +GNULIB_WCWIDTH = @GNULIB_WCWIDTH@ +GNULIB_WRITE = @GNULIB_WRITE@ +GREP = @GREP@ +HAVE_ATOLL = @HAVE_ATOLL@ +HAVE_BTOWC = @HAVE_BTOWC@ +HAVE_CALLOC_POSIX = @HAVE_CALLOC_POSIX@ +HAVE_DECL_ACOSL = @HAVE_DECL_ACOSL@ +HAVE_DECL_ASINL = @HAVE_DECL_ASINL@ +HAVE_DECL_ATANL = @HAVE_DECL_ATANL@ +HAVE_DECL_COSL = @HAVE_DECL_COSL@ +HAVE_DECL_ENVIRON = @HAVE_DECL_ENVIRON@ +HAVE_DECL_EXPL = @HAVE_DECL_EXPL@ +HAVE_DECL_FREXPL = @HAVE_DECL_FREXPL@ +HAVE_DECL_GETLOADAVG = @HAVE_DECL_GETLOADAVG@ +HAVE_DECL_GETLOGIN_R = @HAVE_DECL_GETLOGIN_R@ +HAVE_DECL_LDEXPL = @HAVE_DECL_LDEXPL@ +HAVE_DECL_LOGL = @HAVE_DECL_LOGL@ +HAVE_DECL_MEMMEM = @HAVE_DECL_MEMMEM@ +HAVE_DECL_MEMRCHR = @HAVE_DECL_MEMRCHR@ +HAVE_DECL_SINL = @HAVE_DECL_SINL@ +HAVE_DECL_SQRTL = @HAVE_DECL_SQRTL@ +HAVE_DECL_STRDUP = @HAVE_DECL_STRDUP@ +HAVE_DECL_STRERROR = @HAVE_DECL_STRERROR@ +HAVE_DECL_STRNDUP = @HAVE_DECL_STRNDUP@ +HAVE_DECL_STRNLEN = @HAVE_DECL_STRNLEN@ +HAVE_DECL_STRSIGNAL = @HAVE_DECL_STRSIGNAL@ +HAVE_DECL_STRTOK_R = @HAVE_DECL_STRTOK_R@ +HAVE_DECL_TANL = @HAVE_DECL_TANL@ +HAVE_DECL_TRUNC = @HAVE_DECL_TRUNC@ +HAVE_DECL_TRUNCF = @HAVE_DECL_TRUNCF@ +HAVE_DECL_WCTOB = @HAVE_DECL_WCTOB@ +HAVE_DECL_WCWIDTH = @HAVE_DECL_WCWIDTH@ +HAVE_DUP2 = @HAVE_DUP2@ +HAVE_EUIDACCESS = @HAVE_EUIDACCESS@ +HAVE_FSYNC = @HAVE_FSYNC@ +HAVE_FTRUNCATE = @HAVE_FTRUNCATE@ +HAVE_GETDOMAINNAME = @HAVE_GETDOMAINNAME@ +HAVE_GETDTABLESIZE = @HAVE_GETDTABLESIZE@ +HAVE_GETHOSTNAME = @HAVE_GETHOSTNAME@ +HAVE_GETPAGESIZE = @HAVE_GETPAGESIZE@ +HAVE_GETSUBOPT = @HAVE_GETSUBOPT@ +HAVE_GETUSERSHELL = @HAVE_GETUSERSHELL@ +HAVE_GLOBAL_SYMBOL_PIPE = @HAVE_GLOBAL_SYMBOL_PIPE@ +HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ +HAVE_ISNAND = @HAVE_ISNAND@ +HAVE_ISNANF = @HAVE_ISNANF@ +HAVE_ISNANL = @HAVE_ISNANL@ +HAVE_ISWCNTRL = @HAVE_ISWCNTRL@ +HAVE_LINK = @HAVE_LINK@ +HAVE_LONG_LONG_INT = @HAVE_LONG_LONG_INT@ +HAVE_MALLOC_POSIX = @HAVE_MALLOC_POSIX@ +HAVE_MBRLEN = @HAVE_MBRLEN@ +HAVE_MBRTOWC = @HAVE_MBRTOWC@ +HAVE_MBSINIT = @HAVE_MBSINIT@ +HAVE_MBSNRTOWCS = @HAVE_MBSNRTOWCS@ +HAVE_MBSRTOWCS = @HAVE_MBSRTOWCS@ +HAVE_MEMPCPY = @HAVE_MEMPCPY@ +HAVE_MKDTEMP = @HAVE_MKDTEMP@ +HAVE_OS_H = @HAVE_OS_H@ +HAVE_RANDOM_H = @HAVE_RANDOM_H@ +HAVE_RANDOM_R = @HAVE_RANDOM_R@ +HAVE_RAWMEMCHR = @HAVE_RAWMEMCHR@ +HAVE_READLINK = @HAVE_READLINK@ +HAVE_REALLOC_POSIX = @HAVE_REALLOC_POSIX@ +HAVE_RPMATCH = @HAVE_RPMATCH@ +HAVE_SETENV = @HAVE_SETENV@ +HAVE_SIGNED_SIG_ATOMIC_T = @HAVE_SIGNED_SIG_ATOMIC_T@ +HAVE_SIGNED_WCHAR_T = @HAVE_SIGNED_WCHAR_T@ +HAVE_SIGNED_WINT_T = @HAVE_SIGNED_WINT_T@ +HAVE_SLEEP = @HAVE_SLEEP@ +HAVE_STDINT_H = @HAVE_STDINT_H@ +HAVE_STPCPY = @HAVE_STPCPY@ +HAVE_STPNCPY = @HAVE_STPNCPY@ +HAVE_STRCASESTR = @HAVE_STRCASESTR@ +HAVE_STRCHRNUL = @HAVE_STRCHRNUL@ +HAVE_STRNDUP = @HAVE_STRNDUP@ +HAVE_STRPBRK = @HAVE_STRPBRK@ +HAVE_STRSEP = @HAVE_STRSEP@ +HAVE_STRTOD = @HAVE_STRTOD@ +HAVE_STRTOLL = @HAVE_STRTOLL@ +HAVE_STRTOULL = @HAVE_STRTOULL@ +HAVE_STRUCT_RANDOM_DATA = @HAVE_STRUCT_RANDOM_DATA@ +HAVE_STRVERSCMP = @HAVE_STRVERSCMP@ +HAVE_SYS_BITYPES_H = @HAVE_SYS_BITYPES_H@ +HAVE_SYS_INTTYPES_H = @HAVE_SYS_INTTYPES_H@ +HAVE_SYS_LOADAVG_H = @HAVE_SYS_LOADAVG_H@ +HAVE_SYS_PARAM_H = @HAVE_SYS_PARAM_H@ +HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HAVE_UNISTD_H = @HAVE_UNISTD_H@ +HAVE_UNSETENV = @HAVE_UNSETENV@ +HAVE_UNSIGNED_LONG_LONG_INT = @HAVE_UNSIGNED_LONG_LONG_INT@ +HAVE_WCHAR_H = @HAVE_WCHAR_H@ +HAVE_WCRTOMB = @HAVE_WCRTOMB@ +HAVE_WCSNRTOMBS = @HAVE_WCSNRTOMBS@ +HAVE_WCSRTOMBS = @HAVE_WCSRTOMBS@ +HAVE_WCTYPE_H = @HAVE_WCTYPE_H@ +HAVE_WINT_T = @HAVE_WINT_T@ +HAVE__BOOL = @HAVE__BOOL@ +HEXVERSION = @HEXVERSION@ +ICONV_H = @ICONV_H@ +INCLUDE_NEXT = @INCLUDE_NEXT@ +INCLUDE_NEXT_AS_FIRST_DIRECTIVE = @INCLUDE_NEXT_AS_FIRST_DIRECTIVE@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTESTS_LIBDEPS = @LIBTESTS_LIBDEPS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LOCALCHARSET_TESTS_ENVIRONMENT = @LOCALCHARSET_TESTS_ENVIRONMENT@ +LOCALE_FR = @LOCALE_FR@ +LOCALE_FR_UTF8 = @LOCALE_FR_UTF8@ +LOCALE_H = @LOCALE_H@ +LOCALE_JA = @LOCALE_JA@ +LOCALE_TR_UTF8 = @LOCALE_TR_UTF8@ +LOCALE_ZH_CN = @LOCALE_ZH_CN@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = env LANG= LC_MESSAGES= LC_ALL= LANGUAGE= @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NAMESPACING = @NAMESPACING@ +NEXT_AS_FIRST_DIRECTIVE_ERRNO_H = @NEXT_AS_FIRST_DIRECTIVE_ERRNO_H@ +NEXT_AS_FIRST_DIRECTIVE_FLOAT_H = @NEXT_AS_FIRST_DIRECTIVE_FLOAT_H@ +NEXT_AS_FIRST_DIRECTIVE_ICONV_H = @NEXT_AS_FIRST_DIRECTIVE_ICONV_H@ +NEXT_AS_FIRST_DIRECTIVE_LOCALE_H = @NEXT_AS_FIRST_DIRECTIVE_LOCALE_H@ +NEXT_AS_FIRST_DIRECTIVE_MATH_H = @NEXT_AS_FIRST_DIRECTIVE_MATH_H@ +NEXT_AS_FIRST_DIRECTIVE_STDINT_H = @NEXT_AS_FIRST_DIRECTIVE_STDINT_H@ +NEXT_AS_FIRST_DIRECTIVE_STDLIB_H = @NEXT_AS_FIRST_DIRECTIVE_STDLIB_H@ +NEXT_AS_FIRST_DIRECTIVE_STRING_H = @NEXT_AS_FIRST_DIRECTIVE_STRING_H@ +NEXT_AS_FIRST_DIRECTIVE_UNISTD_H = @NEXT_AS_FIRST_DIRECTIVE_UNISTD_H@ +NEXT_AS_FIRST_DIRECTIVE_WCHAR_H = @NEXT_AS_FIRST_DIRECTIVE_WCHAR_H@ +NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H = @NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H@ +NEXT_ERRNO_H = @NEXT_ERRNO_H@ +NEXT_FLOAT_H = @NEXT_FLOAT_H@ +NEXT_ICONV_H = @NEXT_ICONV_H@ +NEXT_LOCALE_H = @NEXT_LOCALE_H@ +NEXT_MATH_H = @NEXT_MATH_H@ +NEXT_STDINT_H = @NEXT_STDINT_H@ +NEXT_STDLIB_H = @NEXT_STDLIB_H@ +NEXT_STRING_H = @NEXT_STRING_H@ +NEXT_UNISTD_H = @NEXT_UNISTD_H@ +NEXT_WCHAR_H = @NEXT_WCHAR_H@ +NEXT_WCTYPE_H = @NEXT_WCTYPE_H@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PRAGMA_SYSTEM_HEADER = @PRAGMA_SYSTEM_HEADER@ +PTRDIFF_T_SUFFIX = @PTRDIFF_T_SUFFIX@ +RANLIB = @RANLIB@ +RC = @RC@ +RELOCATABLE = @RELOCATABLE@ +REPLACE_BTOWC = @REPLACE_BTOWC@ +REPLACE_CEILF = @REPLACE_CEILF@ +REPLACE_CEILL = @REPLACE_CEILL@ +REPLACE_CHOWN = @REPLACE_CHOWN@ +REPLACE_CLOSE = @REPLACE_CLOSE@ +REPLACE_FCHDIR = @REPLACE_FCHDIR@ +REPLACE_FLOORF = @REPLACE_FLOORF@ +REPLACE_FLOORL = @REPLACE_FLOORL@ +REPLACE_FREXP = @REPLACE_FREXP@ +REPLACE_FREXPL = @REPLACE_FREXPL@ +REPLACE_GETCWD = @REPLACE_GETCWD@ +REPLACE_GETPAGESIZE = @REPLACE_GETPAGESIZE@ +REPLACE_HUGE_VAL = @REPLACE_HUGE_VAL@ +REPLACE_ICONV = @REPLACE_ICONV@ +REPLACE_ICONV_OPEN = @REPLACE_ICONV_OPEN@ +REPLACE_ICONV_UTF = @REPLACE_ICONV_UTF@ +REPLACE_ISFINITE = @REPLACE_ISFINITE@ +REPLACE_ISINF = @REPLACE_ISINF@ +REPLACE_ISNAN = @REPLACE_ISNAN@ +REPLACE_ISWCNTRL = @REPLACE_ISWCNTRL@ +REPLACE_LCHOWN = @REPLACE_LCHOWN@ +REPLACE_LDEXPL = @REPLACE_LDEXPL@ +REPLACE_LSEEK = @REPLACE_LSEEK@ +REPLACE_MBRLEN = @REPLACE_MBRLEN@ +REPLACE_MBRTOWC = @REPLACE_MBRTOWC@ +REPLACE_MBSINIT = @REPLACE_MBSINIT@ +REPLACE_MBSNRTOWCS = @REPLACE_MBSNRTOWCS@ +REPLACE_MBSRTOWCS = @REPLACE_MBSRTOWCS@ +REPLACE_MBSTATE_T = @REPLACE_MBSTATE_T@ +REPLACE_MEMCHR = @REPLACE_MEMCHR@ +REPLACE_MEMMEM = @REPLACE_MEMMEM@ +REPLACE_MKSTEMP = @REPLACE_MKSTEMP@ +REPLACE_NAN = @REPLACE_NAN@ +REPLACE_PUTENV = @REPLACE_PUTENV@ +REPLACE_ROUND = @REPLACE_ROUND@ +REPLACE_ROUNDF = @REPLACE_ROUNDF@ +REPLACE_ROUNDL = @REPLACE_ROUNDL@ +REPLACE_SIGNBIT = @REPLACE_SIGNBIT@ +REPLACE_SIGNBIT_USING_GCC = @REPLACE_SIGNBIT_USING_GCC@ +REPLACE_STRCASESTR = @REPLACE_STRCASESTR@ +REPLACE_STRDUP = @REPLACE_STRDUP@ +REPLACE_STRERROR = @REPLACE_STRERROR@ +REPLACE_STRSIGNAL = @REPLACE_STRSIGNAL@ +REPLACE_STRSTR = @REPLACE_STRSTR@ +REPLACE_STRTOD = @REPLACE_STRTOD@ +REPLACE_TRUNCL = @REPLACE_TRUNCL@ +REPLACE_WCRTOMB = @REPLACE_WCRTOMB@ +REPLACE_WCSNRTOMBS = @REPLACE_WCSNRTOMBS@ +REPLACE_WCSRTOMBS = @REPLACE_WCSRTOMBS@ +REPLACE_WCTOB = @REPLACE_WCTOB@ +REPLACE_WCWIDTH = @REPLACE_WCWIDTH@ +REPLACE_WRITE = @REPLACE_WRITE@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@ +SIZE_T_SUFFIX = @SIZE_T_SUFFIX@ +STDBOOL_H = @STDBOOL_H@ +STDINT_H = @STDINT_H@ +STRIP = @STRIP@ + +# Documentation in DVI format. + +# Override of automake's definition: +#TEXI2DVI = @TEXI2DVI@ +TEXI2DVI = @TEXI2DVI@ $(TEXINCLUDES) +UNISTD_H_HAVE_WINSOCK2_H = @UNISTD_H_HAVE_WINSOCK2_H@ +VERSION = @VERSION@ +VOID_UNSETENV = @VOID_UNSETENV@ +WCHAR_H = @WCHAR_H@ +WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@ +WCTYPE_H = @WCTYPE_H@ +WINT_T_SUFFIX = @WINT_T_SUFFIX@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +gl_LIBOBJS = @gl_LIBOBJS@ +gl_LTLIBOBJS = @gl_LTLIBOBJS@ +gltests_LIBOBJS = @gltests_LIBOBJS@ +gltests_LTLIBOBJS = @gltests_LTLIBOBJS@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = 1.5 gnits +EXTRA_DIST = +MOSTLYCLEANFILES = + +# List of -I options referring to directories that contain texinfo sources +# used by this directory. +# Should contain at least one -I option, to work around a bug in texi2dvi 1.13, +# see <http://lists.gnu.org/archive/html/bug-automake/2009-04/msg00029.html>. +TEXINCLUDES = -I . +MAKEINFOFLAGS = $(TEXINCLUDES) --no-split +info_TEXINFOS = libunistring.texi +# List of texinfo sources @included by libunistring.texi, excluding version.texi. +libunistring_TEXINFOS = \ + unitypes.texi unistr.texi uniconv.texi unistdio.texi uniname.texi \ + unictype.texi uniwidth.texi uniwbrk.texi unilbrk.texi uninorm.texi \ + unicase.texi uniregex.texi \ + gpl.texi lgpl.texi fdl.texi + + +# CLEANFILES: libunistring.{dvi,ps,pdf,html} are already known to automake. +MAINTAINERCLEANFILES = libunistring_*.html + +# Documentation in Portable Document Format. + +# Override of automake's definition: +#TEXI2PDF = @TEXI2DVI@ --pdf +TEXI2PDF = @TEXI2DVI@ --pdf $(TEXINCLUDES) + +# Documentation in HTML format. +TEXI2HTML = @PERL@ $(top_srcdir)/build-aux/texi2html +all: all-am + +.SUFFIXES: +.SUFFIXES: .dvi .html .info .pdf .ps .texi +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnits doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnits doc/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +.texi.info: + restore=: && backupdir="$(am__leading_dot)am$$$$" && \ + am__cwd=`pwd` && $(am__cd) $(srcdir) && \ + rm -rf $$backupdir && mkdir $$backupdir && \ + if ($(MAKEINFO) --version) >/dev/null 2>&1; then \ + for f in $@ $@-[0-9] $@-[0-9][0-9] $(@:.info=).i[0-9] $(@:.info=).i[0-9][0-9]; do \ + if test -f $$f; then mv $$f $$backupdir; restore=mv; else :; fi; \ + done; \ + else :; fi && \ + cd "$$am__cwd"; \ + if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $@ $<; \ + then \ + rc=0; \ + $(am__cd) $(srcdir); \ + else \ + rc=$$?; \ + $(am__cd) $(srcdir) && \ + $$restore $$backupdir/* `echo "./$@" | sed 's|[^/]*$$||'`; \ + fi; \ + rm -rf $$backupdir; exit $$rc + +.texi.dvi: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2DVI) $< + +.texi.pdf: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2PDF) $< + +.texi.html: + rm -rf $(@:.html=.htp) + if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $(@:.html=.htp) $<; \ + then \ + rm -rf $@; \ + if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \ + mv $(@:.html=) $@; else mv $(@:.html=.htp) $@; fi; \ + else \ + if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \ + rm -rf $(@:.html=); else rm -Rf $(@:.html=.htp) $@; fi; \ + exit 1; \ + fi +$(srcdir)/libunistring.info: libunistring.texi $(srcdir)/version.texi $(libunistring_TEXINFOS) +libunistring.dvi: libunistring.texi $(srcdir)/version.texi $(libunistring_TEXINFOS) +libunistring.pdf: libunistring.texi $(srcdir)/version.texi $(libunistring_TEXINFOS) +$(srcdir)/version.texi: $(srcdir)/stamp-vti + +mostlyclean-vti: + -rm -f vti.tmp + +maintainer-clean-vti: + -rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi +.dvi.ps: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + $(DVIPS) -o $@ $< + +uninstall-dvi-am: + @$(NORMAL_UNINSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(dvidir)/$$f'"; \ + rm -f "$(DESTDIR)$(dvidir)/$$f"; \ + done + +uninstall-html-am: + @$(NORMAL_UNINSTALL) + @list='$(HTMLS)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$f'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$f"; \ + done + +uninstall-info-am: + @$(PRE_UNINSTALL) + @if test -d '$(DESTDIR)$(infodir)' && \ + (install-info --version && \ + install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' --remove '$(DESTDIR)$(infodir)/$$relfile'"; \ + if install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$$relfile"; \ + then :; else test ! -f "$(DESTDIR)$(infodir)/$$relfile" || exit 1; fi; \ + done; \ + else :; fi + @$(NORMAL_UNINSTALL) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + relfile_i=`echo "$$relfile" | sed 's|\.info$$||;s|$$|.i|'`; \ + (if test -d "$(DESTDIR)$(infodir)" && cd "$(DESTDIR)$(infodir)"; then \ + echo " cd '$(DESTDIR)$(infodir)' && rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]"; \ + rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]; \ + else :; fi); \ + done + +uninstall-pdf-am: + @$(NORMAL_UNINSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(pdfdir)/$$f'"; \ + rm -f "$(DESTDIR)$(pdfdir)/$$f"; \ + done + +uninstall-ps-am: + @$(NORMAL_UNINSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(psdir)/$$f'"; \ + rm -f "$(DESTDIR)$(psdir)/$$f"; \ + done + +dist-info: $(INFO_DEPS) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; \ + for base in $$list; do \ + case $$base in \ + $(srcdir)/*) base=`echo "$$base" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$base; then d=.; else d=$(srcdir); fi; \ + base_i=`echo "$$base" | sed 's|\.info$$||;s|$$|.i|'`; \ + for file in $$d/$$base $$d/$$base-[0-9] $$d/$$base-[0-9][0-9] $$d/$$base_i[0-9] $$d/$$base_i[0-9][0-9]; do \ + if test -f $$file; then \ + relfile=`expr "$$file" : "$$d/\(.*\)"`; \ + test -f "$(distdir)/$$relfile" || \ + cp -p $$file "$(distdir)/$$relfile"; \ + else :; fi; \ + done; \ + done + +mostlyclean-aminfo: + -rm -rf libunistring.am libunistring.aux libunistring.cp libunistring.cps \ + libunistring.fn libunistring.ky libunistring.kys \ + libunistring.log libunistring.pg libunistring.pgs \ + libunistring.tmp libunistring.toc libunistring.tp \ + libunistring.vr libunistring.vrs + +clean-aminfo: + -test -z "libunistring.dvi libunistring.pdf libunistring.ps libunistring.html" \ + || rm -rf libunistring.dvi libunistring.pdf libunistring.ps libunistring.html + +maintainer-clean-aminfo: + @list='$(INFO_DEPS)'; for i in $$list; do \ + i_i=`echo "$$i" | sed 's|\.info$$||;s|$$|.i|'`; \ + echo " rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]"; \ + rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]; \ + done +tags: TAGS +TAGS: + +ctags: CTAGS +CTAGS: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-info dist-hook +check-am: all-am +check: check-am +all-am: Makefile $(INFO_DEPS) all-local +installdirs: installdirs-local + for dir in "$(DESTDIR)$(infodir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-aminfo clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: $(DVIS) + +html: html-am + +html-am: $(HTMLS) html-local + +info: info-am + +info-am: $(INFO_DEPS) + +install-data-am: install-data-local install-info-am + +install-dvi: install-dvi-am + +install-dvi-am: $(DVIS) + @$(NORMAL_INSTALL) + test -z "$(dvidir)" || $(MKDIR_P) "$(DESTDIR)$(dvidir)" + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(dvidir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(dvidir)" || exit $$?; \ + done +install-exec-am: + +install-html-am: $(HTMLS) + @$(NORMAL_INSTALL) + test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)" + @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \ + $(am__strip_dir) \ + if test -d "$$d$$p"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \ + echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \ + else \ + list2="$$list2 $$d$$p"; \ + fi; \ + done; \ + test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; } +install-info: install-info-am + +install-info-am: $(INFO_DEPS) + @$(NORMAL_INSTALL) + test -z "$(infodir)" || $(MKDIR_P) "$(DESTDIR)$(infodir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + file_i=`echo "$$file" | sed 's|\.info$$||;s|$$|.i|'`; \ + for ifile in $$d/$$file $$d/$$file-[0-9] $$d/$$file-[0-9][0-9] \ + $$d/$$file_i[0-9] $$d/$$file_i[0-9][0-9] ; do \ + if test -f $$ifile; then \ + echo "$$ifile"; \ + else : ; fi; \ + done; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done + @$(POST_INSTALL) + @if (install-info --version && \ + install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' '$(DESTDIR)$(infodir)/$$relfile'";\ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$$relfile" || :;\ + done; \ + else : ; fi +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: $(PDFS) + @$(NORMAL_INSTALL) + test -z "$(pdfdir)" || $(MKDIR_P) "$(DESTDIR)$(pdfdir)" + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pdfdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pdfdir)" || exit $$?; done +install-ps: install-ps-am + +install-ps-am: $(PSS) + @$(NORMAL_INSTALL) + test -z "$(psdir)" || $(MKDIR_P) "$(DESTDIR)$(psdir)" + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(psdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(psdir)" || exit $$?; done +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-aminfo mostlyclean-generic \ + mostlyclean-libtool mostlyclean-vti + +pdf: pdf-am + +pdf-am: $(PDFS) + +ps: ps-am + +ps-am: $(PSS) + +uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-local uninstall-pdf-am uninstall-ps-am + +.MAKE: install-am install-strip + +.PHONY: all all-am all-local check check-am clean clean-aminfo \ + clean-generic clean-libtool dist-hook dist-info distclean \ + distclean-generic distclean-libtool distdir dvi dvi-am html \ + html-am html-local info info-am install install-am \ + install-data install-data-am install-data-local install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-local maintainer-clean maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti mostlyclean \ + mostlyclean-aminfo mostlyclean-generic mostlyclean-libtool \ + mostlyclean-vti pdf pdf-am ps ps-am uninstall uninstall-am \ + uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-local uninstall-pdf-am uninstall-ps-am + + +# The dependencies of stamp-vti generated by automake are incomplete. +# So we have to duplicate the entire rule which would otherwise be generated +# by automake. +$(srcdir)/stamp-vti: $(info_TEXINFOS) $(libunistring_TEXINFOS) $(top_srcdir)/version.sh + (dir=.; test -f ./libunistring.texi || dir=$(srcdir); \ + set `$(SHELL) $(top_srcdir)/build-aux/mdate-sh $$dir/libunistring.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp + cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + rm -f vti.tmp + cp $(srcdir)/version.texi $@ + +# We distribute only the split HTML documentation. +# The user can generate the others, via +# make libunistring.ps +# make libunistring.pdf +# make libunistring.html + +all-local: html-local +install-data-local: install-html +installdirs-local: installdirs-html +uninstall-local: uninstall-html +dist-hook: dist-html + +html-local: html-split +# Override of automake's definition. The HTML files we want to distribute are +# not the ones that automake knows about, and we cannot define HTMLS to a value +# containing wildcards. +install-html: install-html-split + @: +uninstall-html: uninstall-html-split +dist-html: dist-html-split + +# The install-dvi target is already defined by automake. + +installdirs-dvi: + $(mkdir_p) $(DESTDIR)$(dvidir) + +uninstall-dvi: + $(RM) $(DESTDIR)$(dvidir)/libunistring.dvi + +libunistring.ps: libunistring.dvi + $(DVIPS) -o $@ `if test -f libunistring.dvi; then echo libunistring.dvi; else echo $(srcdir)/libunistring.dvi; fi` + +# The install-ps target is already defined by automake. + +installdirs-ps: + $(mkdir_p) $(DESTDIR)$(psdir) + +uninstall-ps: + $(RM) $(DESTDIR)$(psdir)/libunistring.ps + +# The install-pdf target is already defined by automake. + +installdirs-pdf: + $(mkdir_p) $(DESTDIR)$(pdfdir) + +uninstall-pdf: + $(RM) $(DESTDIR)$(pdfdir)/libunistring.pdf + +html-monolithic: libunistring.html +html-split: libunistring_toc.html + +# Override of automake's definition. +# We want to use texi2html, not makeinfo --html. +libunistring.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -monolithic `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` + +libunistring_toc.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + case "@PERL@" in \ + *"/missing perl") \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` || exit 0 ;; \ + *) $(RM) libunistring_*.html ; \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` ;; \ + esac \ + && { mv libunistring/libunistring.html libunistring_toc.html; \ + mv libunistring/*.html .; \ + rmdir libunistring; \ + } + +install-html-monolithic: libunistring.html + $(mkdir_p) $(DESTDIR)$(htmldir) + $(INSTALL_DATA) `if test -f libunistring.html; then echo .; else echo $(srcdir); fi`/libunistring.html $(DESTDIR)$(htmldir)/libunistring.html + +install-html-split: libunistring_toc.html + $(mkdir_p) $(DESTDIR)$(htmldir) + for file in `if test -f libunistring_toc.html; then echo .; else echo $(srcdir); fi`/libunistring_*.html; do \ + $(INSTALL_DATA) $$file $(DESTDIR)$(htmldir)/`basename $$file`; \ + done + +installdirs-html: + $(mkdir_p) $(DESTDIR)$(htmldir) + +uninstall-html-monolithic: + $(RM) $(DESTDIR)$(htmldir)/libunistring.html + +uninstall-html-split: + $(RM) $(DESTDIR)$(htmldir)/libunistring_*.html + +dist-html-monolithic: + $(mkdir_p) $(distdir)/ + file=libunistring.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + cp -p $$d/$$file $(distdir)/$$file || exit 1 + +# We would like to put libunistring_*.html into EXTRA_DIST, but it doesn't work. +dist-html-split: + $(mkdir_p) $(distdir)/ + file=libunistring_toc.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + for file in `cd $$d && echo libunistring_*.html`; do \ + cp -p $$d/$$file $(distdir)/$$file || exit 1; \ + done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/fdl.texi b/doc/fdl.texi new file mode 100644 index 00000000..8805f1a4 --- /dev/null +++ b/doc/fdl.texi @@ -0,0 +1,506 @@ +@c The GNU Free Documentation License. +@center Version 1.3, 3 November 2008 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +@uref{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@enumerate 0 +@item +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document @dfn{free} in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +@item +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +@sc{ascii} without markup, Texinfo input format, La@TeX{} input +format, @acronym{SGML} or @acronym{XML} using a publicly available +@acronym{DTD}, and standard-conforming simple @acronym{HTML}, +PostScript or @acronym{PDF} designed for human modification. Examples +of transparent image formats include @acronym{PNG}, @acronym{XCF} and +@acronym{JPG}. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, @acronym{SGML} or +@acronym{XML} for which the @acronym{DTD} and/or processing tools are +not generally available, and the machine-generated @acronym{HTML}, +PostScript or @acronym{PDF} produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +The ``publisher'' means any person or entity that distributes copies +of the Document to the public. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +@item +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +@item +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + +@item +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +@enumerate A +@item +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +@item +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +@item +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +@item +Preserve all the copyright notices of the Document. + +@item +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +@item +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +@item +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +@item +Include an unaltered copy of this License. + +@item +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +@item +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +@item +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +@item +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +@item +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. + +@item +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. + +@item +Preserve any Warranty Disclaimers. +@end enumerate + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + +@item +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' + +@item +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + +@item +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. + +@item +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. + +@item +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. + +@item +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +@uref{http://www.gnu.org/copyleft/}. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. + +@item +RELICENSING + +``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the +site means any set of copyrightable works thus published on the MMC +site. + +``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. + +``Incorporate'' means to publish or republish a Document, in whole or +in part, as part of another Document. + +An MMC is ``eligible for relicensing'' if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. + +@end enumerate + +@page +@heading ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +@smallexample +@group + Copyright (C) @var{year} @var{your name}. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +@end group +@end smallexample + +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with@dots{}Texts.'' line with this: + +@smallexample +@group + with the Invariant Sections being @var{list their titles}, with + the Front-Cover Texts being @var{list}, and with the Back-Cover Texts + being @var{list}. +@end group +@end smallexample + +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +@c Local Variables: +@c ispell-local-pdict: "ispell-dict" +@c End: + diff --git a/doc/gpl.texi b/doc/gpl.texi new file mode 100644 index 00000000..97a17e19 --- /dev/null +++ b/doc/gpl.texi @@ -0,0 +1,717 @@ +@c The GNU General Public License. +@center Version 3, 29 June 2007 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2007 Free Software Foundation, Inc. @url{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. +@end display + +@heading Preamble + +The GNU General Public License is a free, copyleft license for +software and other kinds of works. + +The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom +to share and change all versions of a program---to make sure it remains +free software for all its users. We, the Free Software Foundation, +use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the +software, or if you modify it: responsibilities to respect the freedom +of others. + +For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, +receive or can get the source code. And you must show them these +terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + +Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those +domains in future versions of the GPL, as needed to protect the +freedom of users. + +Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish +to avoid the special danger that patents applied to a free program +could make it effectively proprietary. To prevent this, the GPL +assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and +modification follow. + +@heading TERMS AND CONDITIONS + +@enumerate 0 +@item Definitions. + +``This License'' refers to version 3 of the GNU General Public License. + +``Copyright'' also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. + +``The Program'' refers to any copyrightable work licensed under this +License. Each licensee is addressed as ``you''. ``Licensees'' and +``recipients'' may be individuals or organizations. + +To ``modify'' a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a ``modified version'' of +the earlier work or a work ``based on'' the earlier work. + +A ``covered work'' means either the unmodified Program or a work based +on the Program. + +To ``propagate'' a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + +To ``convey'' a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. + +An interactive user interface displays ``Appropriate Legal Notices'' to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +@item Source Code. + +The ``source code'' for a work means the preferred form of the work for +making modifications to it. ``Object code'' means any non-source form +of a work. + +A ``Standard Interface'' means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + +The ``System Libraries'' of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +``Major Component'', in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + +The ``Corresponding Source'' for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same +work. + +@item Basic Permissions. + +All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + +@item Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. + +@item Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +@item Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: + +@enumerate a +@item +The work must carry prominent notices stating that you modified it, +and giving a relevant date. + +@item +The work must carry prominent notices stating that it is released +under this License and any conditions added under section 7. This +requirement modifies the requirement in section 4 to ``keep intact all +notices''. + +@item +You must license the entire work, as a whole, under this License to +anyone who comes into possession of a copy. This License will +therefore apply, along with any applicable section 7 additional terms, +to the whole of the work, and all its parts, regardless of how they +are packaged. This License gives no permission to license the work in +any other way, but it does not invalidate such permission if you have +separately received it. + +@item +If the work has interactive user interfaces, each must display +Appropriate Legal Notices; however, if the Program has interactive +interfaces that do not display Appropriate Legal Notices, your work +need not make them do so. +@end enumerate + +A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +``aggregate'' if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +@item Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: + +@enumerate a +@item +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by the +Corresponding Source fixed on a durable physical medium customarily +used for software interchange. + +@item +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by a written +offer, valid for at least three years and valid for as long as you +offer spare parts or customer support for that product model, to give +anyone who possesses the object code either (1) a copy of the +Corresponding Source for all the software in the product that is +covered by this License, on a durable physical medium customarily used +for software interchange, for a price no more than your reasonable +cost of physically performing this conveying of source, or (2) access +to copy the Corresponding Source from a network server at no charge. + +@item +Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is +allowed only occasionally and noncommercially, and only if you +received the object code with such an offer, in accord with subsection +6b. + +@item +Convey the object code by offering access from a designated place +(gratis or for a charge), and offer equivalent access to the +Corresponding Source in the same way through the same place at no +further charge. You need not require recipients to copy the +Corresponding Source along with the object code. If the place to copy +the object code is a network server, the Corresponding Source may be +on a different server (operated by you or a third party) that supports +equivalent copying facilities, provided you maintain clear directions +next to the object code saying where to find the Corresponding Source. +Regardless of what server hosts the Corresponding Source, you remain +obligated to ensure that it is available for as long as needed to +satisfy these requirements. + +@item +Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of +the work are being offered to the general public at no charge under +subsection 6d. + +@end enumerate + +A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + +A ``User Product'' is either (1) a ``consumer product'', which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +``normally used'' refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. + +``Installation Information'' for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. + +Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +@item Additional Terms. + +``Additional permissions'' are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: + +@enumerate a +@item +Disclaiming warranty or limiting liability differently from the terms +of sections 15 and 16 of this License; or + +@item +Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices +displayed by works containing it; or + +@item +Prohibiting misrepresentation of the origin of that material, or +requiring that modified versions of such material be marked in +reasonable ways as different from the original version; or + +@item +Limiting the use for publicity purposes of names of licensors or +authors of the material; or + +@item +Declining to grant rights under trademark law for use of some trade +names, trademarks, or service marks; or + +@item +Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with +contractual assumptions of liability to the recipient, for any +liability that these contractual assumptions directly impose on those +licensors and authors. +@end enumerate + +All other non-permissive additional terms are considered ``further +restrictions'' within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. + +@item Termination. + +You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +@item Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +@item Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + +An ``entity transaction'' is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +@item Patents. + +A ``contributor'' is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's ``contributor version''. + +A contributor's ``essential patent claims'' are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, ``control'' includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + +In the following three paragraphs, a ``patent license'' is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To ``grant'' such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + +If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. ``Knowingly relying'' means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + +A patent license is ``discriminatory'' if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +@item No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey +a covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree +to terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. + +@item Use with the GNU Affero General Public License. + +Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + +@item Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU General Public +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that numbered version or +of any later version published by the Free Software Foundation. If +the Program does not specify a version number of the GNU General +Public License, you may choose any version ever published by the Free +Software Foundation. + +If the Program specifies that a proxy can decide which future versions +of the GNU General Public License can be used, that proxy's public +statement of acceptance of a version permanently authorizes you to +choose that version for the Program. + +Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +@item Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS'' WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + +@item Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +@item Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +@end enumerate + +@heading END OF TERMS AND CONDITIONS + +@heading How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the ``copyright'' line and a pointer to where the full notice is found. + +@smallexample +@var{one line to give the program's name and a brief idea of what it does.} +Copyright (C) @var{year} @var{name of author} + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see @url{http://www.gnu.org/licenses/}. +@end smallexample + +Also add information on how to contact you by electronic and paper mail. + +If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + +@smallexample +@var{program} Copyright (C) @var{year} @var{name of author} +This program comes with ABSOLUTELY NO WARRANTY; for details type @samp{show w}. +This is free software, and you are welcome to redistribute it +under certain conditions; type @samp{show c} for details. +@end smallexample + +The hypothetical commands @samp{show w} and @samp{show c} should show +the appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an ``about box''. + +You should also get your employer (if you work as a programmer) or school, +if any, to sign a ``copyright disclaimer'' for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +@url{http://www.gnu.org/licenses/}. + +The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use +the GNU Lesser General Public License instead of this License. But +first, please read @url{http://www.gnu.org/philosophy/why-not-lgpl.html}. diff --git a/doc/lgpl.texi b/doc/lgpl.texi new file mode 100644 index 00000000..c29a6fb4 --- /dev/null +++ b/doc/lgpl.texi @@ -0,0 +1,190 @@ +@c The GNU Lesser General Public License. +@center Version 3, 29 June 2007 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2007 Free Software Foundation, Inc. @url{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. +@end display + +This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + +@enumerate 0 +@item Additional Definitions. + +As used herein, ``this License'' refers to version 3 of the GNU Lesser +General Public License, and the ``GNU GPL'' refers to version 3 of the GNU +General Public License. + +``The Library'' refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + +An ``Application'' is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + +A ``Combined Work'' is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the ``Linked +Version''. + +The ``Minimal Corresponding Source'' for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + +The ``Corresponding Application Code'' for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + +@item Exception to Section 3 of the GNU GPL. + +You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + +@item Conveying Modified Versions. + +If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + +@enumerate a +@item +under this License, provided that you make a good faith effort to +ensure that, in the event an Application does not supply the +function or data, the facility still operates, and performs +whatever part of its purpose remains meaningful, or + +@item +under the GNU GPL, with none of the additional permissions of +this License applicable to that copy. +@end enumerate + +@item Object Code Incorporating Material from Library Header Files. + +The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + +@enumerate a +@item +Give prominent notice with each copy of the object code that the +Library is used in it and that the Library and its use are +covered by this License. +@item +Accompany the object code with a copy of the GNU GPL and this license +document. +@end enumerate + +@item Combined Works. + +You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + +@enumerate a +@item +Give prominent notice with each copy of the Combined Work that +the Library is used in it and that the Library and its use are +covered by this License. +@item +Accompany the Combined Work with a copy of the GNU GPL and this license +document. +@item +For a Combined Work that displays copyright notices during +execution, include the copyright notice for the Library among +these notices, as well as a reference directing the user to the +copies of the GNU GPL and this license document. +@item +Do one of the following: + +@enumerate 0 +@item +Convey the Minimal Corresponding Source under the terms of this +License, and the Corresponding Application Code in a form +suitable for, and under terms that permit, the user to +recombine or relink the Application with a modified version of +the Linked Version to produce a modified Combined Work, in the +manner specified by section 6 of the GNU GPL for conveying +Corresponding Source. +@item +Use a suitable shared library mechanism for linking with the +Library. A suitable mechanism is one that (a) uses at run time +a copy of the Library already present on the user's computer +system, and (b) will operate properly with a modified version +of the Library that is interface-compatible with the Linked +Version. +@end enumerate + +@item +Provide Installation Information, but only if you would otherwise +be required to provide such information under section 6 of the +GNU GPL, and only to the extent that such information is +necessary to install and execute a modified version of the +Combined Work produced by recombining or relinking the +Application with a modified version of the Linked Version. (If +you use option 4d0, the Installation Information must accompany +the Minimal Corresponding Source and Corresponding Application +Code. If you use option 4d1, you must provide the Installation +Information in the manner specified by section 6 of the GNU GPL +for conveying Corresponding Source.) +@end enumerate + +@item Combined Libraries. + +You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + +@enumerate a +@item +Accompany the combined library with a copy of the same work based +on the Library, uncombined with any other library facilities, +conveyed under the terms of this License. +@item +Give prominent notice with the combined library that part of it +is a work based on the Library, and explaining where to find the +accompanying uncombined form of the same work. +@end enumerate + +@item Revised Versions of the GNU Lesser General Public License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License ``or any later version'' +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + +If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + +@end enumerate diff --git a/doc/libunistring.info b/doc/libunistring.info new file mode 100644 index 00000000..2fad8fec --- /dev/null +++ b/doc/libunistring.info @@ -0,0 +1,6200 @@ +This is libunistring.info, produced by makeinfo version 4.13 from +libunistring.texi. + +INFO-DIR-SECTION Software development +START-INFO-DIR-ENTRY +* GNU libunistring: (libunistring). Unicode string library. +END-INFO-DIR-ENTRY + + This manual is for GNU libunistring. + + +File: libunistring.info, Node: Top, Next: Introduction, Up: (dir) + +GNU libunistring +**************** + +* Menu: + +* Introduction:: Who may need Unicode strings? +* Conventions:: Conventions used in this manual +* unitypes.h:: Elementary types +* unistr.h:: Elementary Unicode string functions +* uniconv.h:: Conversions between Unicode and encodings +* unistdio.h:: Output with Unicode strings +* uniname.h:: Names of Unicode characters +* unictype.h:: Unicode character classification and properties +* uniwidth.h:: Display width +* uniwbrk.h:: Word breaks in strings +* unilbrk.h:: Line breaking +* uninorm.h:: Normalization forms +* unicase.h:: Case mappings +* uniregex.h:: Regular expressions +* Using the library:: How to link with the library and use it? +* More functionality:: More advanced functionality +* Licenses:: Licenses + +* Index:: General Index + + --- The Detailed Node Listing --- + +Introduction + +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with `char *' strings +* The wchar_t mess:: Why `wchar_t *' strings are useless +* Unicode strings:: How are Unicode strings represented? + +unistr.h + +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: + +unictype.h + +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: + +General category + +* Object oriented API:: +* Bit mask API:: + +Properties + +* Properties as objects:: +* Properties as functions:: + +uniwbrk.h + +* Word breaks in a string:: +* Word break property:: + +uninorm.h + +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: + +unicase,h + +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: + +Using the library + +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: + +Licenses + +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License + + +File: libunistring.info, Node: Introduction, Next: Conventions, Prev: Top, Up: Top + +1 Introduction +************** + + This library provides functions for manipulating Unicode strings and +for manipulating C strings according to the Unicode standard. + + It consists of the following parts: + +`<unistr.h>' + elementary string functions + +`<uniconv.h>' + conversion from/to legacy encodings + +`<unistdio.h>' + formatted output to strings + +`<uniname.h>' + character names + +`<unictype.h>' + character classification and properties + +`<uniwidth.h>' + string width when using nonproportional fonts + +`<uniwbrk.h>' + word breaks + +`<unilbrk.h>' + line breaking algorithm + +`<uninorm.h>' + normalization (composition and decomposition) + +`<unicase.h>' + case folding + +`<uniregex.h>' + regular expressions (not yet implemented) + + libunistring is for you if your application involves non-trivial text +processing, such as upper/lower case conversions, line breaking, +operations on words, or more advanced analysis of text. Text provided +by the user can, in general, contain characters of all kinds of +scripts. The text processing functions provided by this library handle +all scripts and all languages. + + libunistring is for you if your application already uses the ISO C / +POSIX `<ctype.h>', `<wctype.h>' functions and the text it operates on is +provided by the user and can be in any language. + + libunistring is also for you if your application uses Unicode +strings as internal in-memory representation. + +* Menu: + +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with `char *' strings +* The wchar_t mess:: Why `wchar_t *' strings are useless +* Unicode strings:: How are Unicode strings represented? + + +File: libunistring.info, Node: Unicode, Next: Unicode and i18n, Up: Introduction + +1.1 Unicode +=========== + + Unicode is a standardized repertoire of characters that contains +characters from all scripts of the world, from Latin letters to Chinese +ideographs and Babylonian cuneiform glyphs. It also specifies how +these characters are to be rendered on a screen or on paper, and how +common text processing (word selection, line breaking, uppercasing of +page titles etc.) is supposed to behave on Unicode text. + + Unicode also specifies three ways of storing sequences of Unicode +characters in a computer whose basic unit of data is an 8-bit byte: +UTF-8 + Every character is represented as 1 to 4 bytes. + +UTF-16 + Every character is represented as 1 to 2 units of 16 bits. + +UTF-32, a.k.a. UCS-4 + Every character is represented as 1 unit of 32 bits. + + For encoding Unicode text in a file, UTF-8 is usually used. For +encoding Unicode strings in memory for a program, either of the three +encoding forms can be reasonably used. + + Unicode is widely used on the web. Prior to the use of Unicode, web +pages were in many different encodings (ISO-8859-1 for English, French, +Spanish, ISO-8859-2 for Polish, ISO-8859-7 for Greek, KOI8-R for +Russian, GB2312 or BIG5 for Chinese, ISO-2022-JP-2 or EUC-JP or +Shift_JIS for Japanese, and many many others). It was next to +impossible to create a document that contained Chinese and Polish text +in the same document. Due to the many encodings for Japanese, even the +processing of pure Japanese text was error prone. + + References: + * The Unicode standard: `http://www.unicode.org/' + + * Definition of UTF-8: `http://www.rfc-editor.org/rfc/rfc3629.txt' + + * Definition of UTF-16: `http://www.rfc-editor.org/rfc/rfc2781.txt' + + * Markus Kuhn's UTF-8 and Unicode FAQ: + `http://www.cl.cam.ac.uk/~mgk25/unicode.html' + + +File: libunistring.info, Node: Unicode and i18n, Next: Locale encodings, Prev: Unicode, Up: Introduction + +1.2 Unicode and Internationalization +==================================== + + Internationalization is the process of changing the source code of a +program so that it can meet the expectations of users in any culture, +if culture specific data (translations, images etc.) are provided. + + Use of Unicode is not strictly required for internationalization, +but it makes internationalization much easier, because operations that +need to look at specific characters (like hyphenation, spell checking, +or the automatic conversion of double-quotes to opening and closing +double-quote characters) don't need to consider multiple possible +encodings of the text. + + Use of Unicode also enables multilingualization: the ability of +having text in multiple languages present in the same document or even +in the same line of text. + + But use of Unicode is not everything. Internationalization usually +consists of three features: + * Use of Unicode where needed for text processing. This is what + this library is for. + + * Use of message catalogs for messages shown to the user, This is + what GNU gettext is about. + + * Use of locale specific conventions for date and time formats, for + numeric formatting, or for sorting of text. This can be done + adequately with the POSIX APIs and the implementation of locales + in the GNU C library. + + +File: libunistring.info, Node: Locale encodings, Next: In-memory representation, Prev: Unicode and i18n, Up: Introduction + +1.3 Locale encodings +==================== + + A locale is a set of cultural conventions. According to POSIX, for +a program, at any moment, there is one locale being designated as the +"current locale". (Actually, POSIX supports also one locale per +thread, but this feature is not yet universally implemented and not +widely used.) The locale is partitioned into several aspects, called +the "categories" of the locale. The main various aspects are: + * The character encoding and the character properties. This is the + `LC_CTYPE' category. + + * The sorting rules for text. This is the `LC_COLLATE' category. + + * The language specific translations of messages. This is the + `LC_MESSAGES' category. + + * The formatting rules for numbers, such as the decimal separator. + This is the `LC_NUMERIC' category. + + * The formatting rules for amounts of money. This is the + `LC_MONETARY' category. + + * The formatting of date and time. This is the `LC_TIME' category. + + In particular, the `LC_CTYPE' category of the current locale +determines the character encoding. This is the encoding of `char *' +strings. We also call it the "locale encoding". GNU libunistring has +a function, `locale_charset', that returns a standardized (platform +independent) name for this encoding. + + All locale encodings used on glibc systems are essentially ASCII +compatible: Most graphic ASCII characters have the same representation, +as a single byte, in that encoding as in ASCII. + + Among the possible locale encodings are UTF-8 and GB18030. Both +allow to represent any Unicode character as a sequence of bytes. UTF-8 +is used in most of the world, whereas GB18030 is used in the People's +Republic of China, because it is backward compatible with the GB2312 +encoding that was used in this country earlier. + + The legacy locale encodings, ISO-8859-15 (which supplanted +ISO-8859-1 in most of Europe), ISO-8859-2, KOI8-R, EUC-JP, etc., are +still in use in many places, though. + + UTF-16 and UTF-32 are not used as locale encodings, because they are +not ASCII compatible. + + +File: libunistring.info, Node: In-memory representation, Next: char * strings, Prev: Locale encodings, Up: Introduction + +1.4 Choice of in-memory representation of strings +================================================= + + There are three ways of representing strings in memory of a running +program. + * As `char *' strings. Such strings are represented in locale + encoding. This approach is employed when not much text processing + is done by the program. When some Unicode aware processing is to + be done, a string is converted to Unicode on the fly and back to + locale encoding afterwards. + + * As UTF-8 or UTF-16 or UTF-32 strings. This implies that + conversion from locale encoding to Unicode is performed on input, + and in the opposite direction on output. This approach is + employed when the program does a significant amount of text + processing, or when the program has multiple threads operating on + the same data but in different locales. + + * As `wchar_t *', a.k.a. "wide strings". This approach is misguided, + see *note The wchar_t mess::. + + +File: libunistring.info, Node: char * strings, Next: The wchar_t mess, Prev: In-memory representation, Up: Introduction + +1.5 `char *' strings +==================== + + The classical C strings, with its C library support standardized by +ISO C and POSIX, can be used in internationalized programs with some +precautions. The problem with this API is that many of the C library +functions for strings don't work correctly on strings in locale +encodings, leading to bugs that only people in some cultures of the +world will experience. + + The first problem with the C library API is the support of multibyte +locales. According to the locale encoding, in general, every character +is represented by one or more bytes (up to 4 bytes in practice -- but +use `MB_LEN_MAX' instead of the number 4 in the code). When every +character is represented by only 1 byte, we speak of an "unibyte +locale", otherwise of a "multibyte locale". It is important to realize +that the majority of Unix installations nowadays use UTF-8 or GB18030 +as locale encoding; therefore, the majority of users are using +multibyte locales. + + The important fact to remember is: _A `char' is a byte, not a +character._ + + As a consequence: + * The `<ctype.h>' API is useless in this context; it does not work in + multibyte locales. + + * The `strlen' function does not return the number of characters in + a string. Nor does it return the number of screen columns occupied + by a string after it is output. It merely returns the number of + _bytes_ occupied by a string. + + * Truncating a string, for example, with `strncpy', can have the + effect of truncating it in the middle of a multibyte character. + Such a string will, when output, have a garbled character at its + end, often represented by a hollow box. + + * `strchr' and `strrchr' do not work with multibyte strings if the + locale encoding is GB18030 and the character to be searched is a + digit. + + * `strstr' does not work with multibyte strings if the locale + encoding is different from UTF-8. + + * `strcspn', `strpbrk', `strspn' cannot work correctly in multibyte + locales: they assume the second argument is a list of single-byte + characters. Even in this simple case, they do not work with + multibyte strings if the locale encoding is GB18030 and one of the + characters to be searched is a digit. + + * `strsep' and `strtok_r' do not work with multibyte strings unless + all of the delimiter characters are ASCII characters < 0x30. + + * The `strcasecmp', `strncasecmp', and `strcasestr' functions do not + work with multibyte strings. + + The workarounds can be found in GNU gnulib +`http://www.gnu.org/software/gnulib/'. + * gnulib has modules `mbchar', `mbiter', `mbuiter' that represent + multibyte characters and allow to iterate across a multibyte + string with the same ease as through a unibyte string. + + * gnulib has functions `mbslen' and `mbswidth' that can be used + instead of `strlen' when the number of characters or the number of + screen columns of a string is requested. + + * gnulib has functions `mbschr' and `mbsrrchr' that are like + `strchr' and `strrchr', but work in multibyte locales. + + * gnulib has a function `mbsstr', like `strstr', but works in + multibyte locales. + + * gnulib has functions `mbscspn', `mbspbrk', `mbsspn' that are like + `strcspn', `strpbrk', `strspn', but work in multibyte locales. + + * gnulib has functions `mbssep' and `mbstok_r' that are like + `strsep' and `strtok_r' but work in multibyte locales. + + * gnulib has functions `mbscasecmp', `mbsncasecmp', `mbspcasecmp', + and `mbscasestr' that are like `strcasecmp', `strncasecmp', and + `strcasestr', but work in multibyte locales. Still, the function + `ulc_casecmp' is preferable to these functions; see below. + + The second problem with the C library API is that it has some +assumptions built-in that are not valid in some languages: + * It assumes that there are only two forms of every character: + uppercase and lowercase. This is not true for Croatian, where the + character LETTER DZ WITH CARON comes in three forms: LATIN CAPITAL + LETTER DZ WITH CARON (DZ), LATIN CAPITAL LETTER D WITH SMALL + LETTER Z WITH CARON (Dz), LATIN SMALL LETTER DZ WITH CARON (dz). + + * It assumes that uppercasing of 1 character leads to 1 character. + This is not true for German, where the LATIN SMALL LETTER SHARP S, + when uppercased, becomes `SS'. + + * It assumes that there is 1:1 mapping between uppercase and + lowercase forms. This is not true for the Greek sigma: GREEK + CAPITAL LETTER SIGMA is the uppercase of both GREEK SMALL LETTER + SIGMA and GREEK SMALL LETTER FINAL SIGMA. + + * It assumes that the upper/lowercase mappings are position + independent. This is not true for the Greek sigma and the + Lithuanian i. + + The correct way to deal with this problem is + 1. to provide functions for titlecasing, as well as for upper- and + lowercasing, + + 2. to view case transformations as functions that operates on strings, + rather than on characters. + + This is implemented in this library, through the functions declared +in `<unicase.h>', see *note unicase.h::. + + +File: libunistring.info, Node: The wchar_t mess, Next: Unicode strings, Prev: char * strings, Up: Introduction + +1.6 The `wchar_t' mess +====================== + + The ISO C and POSIX standard creators made an attempt to fix the +first problem mentioned in the previous section. They introduced + * a type `wchar_t', designed to encapsulate an entire character, + + * a "wide string" type `wchar_t *', and + + * functions declared in `<wctype.h>' that were meant to supplant the + ones in `<ctype.h>'. + + Unfortunately, this API and its implementation has numerous problems: + + * On AIX and Windows platforms, `wchar_t' is a 16-bit type. This + means that it can never accommodate an entire Unicode character. + Either the `wchar_t *' strings are limited to characters in UCS-2 + (the "Basic Multilingual Plane" of Unicode), or -- if `wchar_t *' + strings are encoded in UTF-16 -- a `wchar_t' represents only half + of a character in the worst case, making the `<wctype.h>' functions + pointless. + + * On Solaris and FreeBSD, the `wchar_t' encoding is locale dependent + and undocumented. This means, if you want to know any property of + a `wchar_t' character, other than the properties defined by + `<wctype.h>' -- such as whether it's a dash, currency symbol, + paragraph separator, or similar --, you have to convert it to + `char *' encoding first, by use of the function `wctomb'. + + * When you read a stream of wide characters, through the functions + `fgetwc' and `fgetws', and when the input stream/file is not in + the expected encoding, you have no way to determine the invalid + byte sequence and do some corrective action. If you use these + functions, your program becomes "garbage in - more garbage out" or + "garbage in - abort". + + As a consequence, it is better to use multibyte strings, as +explained in the previous section. Such multibyte strings can bypass +limitations of the `wchar_t' type, if you use functions defined in +gnulib and libunistring for text processing. They can also faithfully +transport malformed characters that were present in the input, without +requiring the program to produce garbage or abort. + + +File: libunistring.info, Node: Unicode strings, Prev: The wchar_t mess, Up: Introduction + +1.7 Unicode strings +=================== + + libunistring supports Unicode strings in three representations: + * UTF-8 strings, through the type `uint8_t *'. The units are bytes + (`uint8_t'). + + * UTF-16 strings, through the type `uint16_t *', The units are + 16-bit memory words (`uint16_t'). + + * UTF-32 strings, through the type `uint32_t *'. The units are + 32-bit memory words (`uint32_t'). + + As with C strings, there are two variants: + * Unicode strings with a terminating NUL character are represented as + a pointer to the first unit of the string. There is a unit + containing a 0 value at the end. It is considered part of the + string for all memory allocation purposes, but is not considered + part of the string for all other logical purposes. + + * Unicode strings where embedded NUL characters are allowed. These + are represented by a pointer to the first unit and the number of + units (not bytes!) of the string. In this setting, there is no + trailing zero-valued unit used as "end marker". + + +File: libunistring.info, Node: Conventions, Next: unitypes.h, Prev: Introduction, Up: Top + +2 Conventions +************* + + This chapter explains conventions valid throughout the libunistring +library. + + Variables of type `char *' denote C strings in locale encoding. See +*note Locale encodings::. + + Variables of type `uint8_t *' denote UTF-8 strings. Their units are +bytes. + + Variables of type `uint16_t *' denote UTF-16 strings, without byte +order mark. Their units are 2-byte words. + + Variables of type `uint32_t *' denote UTF-32 strings, without byte +order mark. Their units are 4-byte words. + + Argument pairs `(S, N)' denote a string `S[0..N-1]' with exactly N +units. + + All functions with prefix `ulc_' operate on C strings in locale +encoding. + + All functions with prefix `u8_' operate on UTF-8 strings. + + All functions with prefix `u16_' operate on UTF-16 strings. + + All functions with prefix `u32_' operate on UTF-32 strings. + + For every function with prefix `u8_', operating on UTF-8 strings, +there is also a corresponding function with prefix `u16_', operating on +UTF-16 strings, and a corresponding function with prefix `u32_', +operating on UTF-32 strings. Their description is analogous; in this +documentation we describe only the function that operates on UTF-8 +strings, for brevity. + + A declaration with a variable N denotes the three concrete +declarations with N = 8, N = 16, N = 32. + + All parameters starting with `str' and the parameters of functions +starting with `u8_str'/`u16_str'/`u32_str' denote a NUL terminated +string. + + Error values are always returned through the `errno' variable, +usually with a return value that indicates the presence of an error +(NULL for functions that return an pointer, or -1 for functions that +return an `int'). + + Functions returning a string result take a `(RESULTBUF, LENGTHP)' +argument pair. If RESULTBUF is not NULL and the result fits into +`*LENGTHP' units, it is put in RESULTBUF, and RESULTBUF is returned. +Otherwise, a freshly allocated string is returned. In both cases, +`*LENGTHP' is set to the length (number of units) of the returned +string. In case of error, NULL is returned and `errno' is set. + + +File: libunistring.info, Node: unitypes.h, Next: unistr.h, Prev: Conventions, Up: Top + +3 Elementary types `<unitypes.h>' +********************************* + + The include file `<unitypes.h>' provides the following basic types. + + -- Type: uint8_t + -- Type: uint16_t + -- Type: uint32_t + These are the storage units of UTF-8/16/32 strings, respectively. + The definitions are taken from `<stdint.h>', on platforms where + this include file is present. + + -- Type: ucs4_t + This type represents a single Unicode character, outside of an + UTF-32 string. + + +File: libunistring.info, Node: unistr.h, Next: uniconv.h, Prev: unitypes.h, Up: Top + +4 Elementary Unicode string functions `<unistr.h>' +************************************************** + + This include file declares elementary functions for Unicode strings. +It is essentially the equivalent of what `<string.h>' is for C strings. + +* Menu: + +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: + + +File: libunistring.info, Node: Elementary string checks, Next: Elementary string conversions, Up: unistr.h + +4.1 Elementary string checks +============================ + + The following function is available to verify the integrity of a +Unicode string. + + -- Function: const uint8_t * u8_check (const uint8_t *S, size_t N) + -- Function: const uint16_t * u16_check (const uint16_t *S, size_t N) + -- Function: const uint32_t * u32_check (const uint32_t *S, size_t N) + This function checks whether a Unicode string is well-formed. It + returns NULL if valid, or a pointer to the first invalid unit + otherwise. + + +File: libunistring.info, Node: Elementary string conversions, Next: Elementary string functions, Prev: Elementary string checks, Up: unistr.h + +4.2 Elementary string conversions +================================= + + The following functions perform conversions between the different +forms of Unicode strings. + + -- Function: uint16_t * u8_to_u16 (const uint8_t *S, size_t N, + uint16_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-8 string to an UTF-16 string. + + -- Function: uint32_t * u8_to_u32 (const uint8_t *S, size_t N, + uint32_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-8 string to an UTF-32 string. + + -- Function: uint8_t * u16_to_u8 (const uint16_t *S, size_t N, uint8_t + *RESULTBUF, size_t *LENGTHP) + Converts an UTF-16 string to an UTF-8 string. + + -- Function: uint32_t * u16_to_u32 (const uint16_t *S, size_t N, + uint32_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-16 string to an UTF-32 string. + + -- Function: uint8_t * u32_to_u8 (const uint32_t *S, size_t N, uint8_t + *RESULTBUF, size_t *LENGTHP) + Converts an UTF-32 string to an UTF-8 string. + + -- Function: uint16_t * u32_to_u16 (const uint32_t *S, size_t N, + uint16_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-32 string to an UTF-16 string. + + +File: libunistring.info, Node: Elementary string functions, Next: Elementary string functions with memory allocation, Prev: Elementary string conversions, Up: unistr.h + +4.3 Elementary string functions +=============================== + + The following functions inspect and return details about the first +character in a Unicode string. + + -- Function: int u8_mblen (const uint8_t *S, size_t N) + -- Function: int u16_mblen (const uint16_t *S, size_t N) + -- Function: int u32_mblen (const uint32_t *S, size_t N) + Returns the length (number of units) of the first character in S, + which is no longer than N. Returns 0 if it is the NUL character. + Returns -1 upon failure. + + This function is similar to `mblen', except that it operates on a + Unicode string and that S must not be NULL. + + -- Function: int u8_mbtouc_unsafe (ucs4_t *PUC, const uint8_t *S, + size_t N) + -- Function: int u16_mbtouc_unsafe (ucs4_t *PUC, const uint16_t *S, + size_t N) + -- Function: int u32_mbtouc_unsafe (ucs4_t *PUC, const uint32_t *S, + size_t N) + Returns the length (number of units) of the first character in S, + putting its `ucs4_t' representation in `*PUC'. Upon failure, + `*PUC' is set to `0xfffd', and an appropriate number of units is + returned. + + The number of available units, N, must be > 0. + + This function is similar to `mbtowc', except that it operates on a + Unicode string, PUC and S must not be NULL, N must be > 0, and the + NUL character is not treated specially. + + -- Function: int u8_mbtouc (ucs4_t *PUC, const uint8_t *S, size_t N) + -- Function: int u16_mbtouc (ucs4_t *PUC, const uint16_t *S, size_t N) + -- Function: int u32_mbtouc (ucs4_t *PUC, const uint32_t *S, size_t N) + This function is like `u8_mbtouc_unsafe', except that it will + detect an invalid UTF-8 character, even if the library is compiled + without `--enable-safety'. + + -- Function: int u8_mbtoucr (ucs4_t *PUC, const uint8_t *S, size_t N) + -- Function: int u16_mbtoucr (ucs4_t *PUC, const uint16_t *S, size_t N) + -- Function: int u32_mbtoucr (ucs4_t *PUC, const uint32_t *S, size_t N) + Returns the length (number of units) of the first character in S, + putting its `ucs4_t' representation in `*PUC'. Upon failure, + `*PUC' is set to `0xfffd', and -1 is returned for an invalid + sequence of units, -2 is returned for an incomplete sequence of + units. + + The number of available units, N, must be > 0. + + This function is similar to `u8_mbtouc', except that the return + value gives more details about the failure, similar to `mbrtowc'. + + The following function stores a Unicode character as a Unicode +string in memory. + + -- Function: int u8_uctomb (uint8_t *S, ucs4_t UC, int N) + -- Function: int u16_uctomb (uint16_t *S, ucs4_t UC, int N) + -- Function: int u32_uctomb (uint32_t *S, ucs4_t UC, int N) + Puts the multibyte character represented by UC in S, returning its + length. Returns -1 upon failure, -2 if the number of available + units, N, is too small. The latter case cannot occur if N >= + 6/2/1, respectively. + + This function is similar to `wctomb', except that it operates on a + Unicode strings, S must not be NULL, and the argument N must be + specified. + + The following functions copy Unicode strings in memory. + + -- Function: uint8_t * u8_cpy (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_cpy (uint16_t *DEST, const uint16_t *SRC, + size_t N) + -- Function: uint32_t * u32_cpy (uint32_t *DEST, const uint32_t *SRC, + size_t N) + Copies N units from SRC to DEST. + + This function is similar to `memcpy', except that it operates on + Unicode strings. + + -- Function: uint8_t * u8_move (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_move (uint16_t *DEST, const uint16_t *SRC, + size_t N) + -- Function: uint32_t * u32_move (uint32_t *DEST, const uint32_t *SRC, + size_t N) + Copies N units from SRC to DEST, guaranteeing correct behavior for + overlapping memory areas. + + This function is similar to `memmove', except that it operates on + Unicode strings. + + The following function fills a Unicode string. + + -- Function: uint8_t * u8_set (uint8_t *S, ucs4_t UC, size_t N) + -- Function: uint16_t * u16_set (uint16_t *S, ucs4_t UC, size_t N) + -- Function: uint32_t * u32_set (uint32_t *S, ucs4_t UC, size_t N) + Sets the first N characters of S to UC. UC should be a character + that occupies only 1 unit. + + This function is similar to `memset', except that it operates on + Unicode strings. + + The following function compares two Unicode strings of the same +length. + + -- Function: int u8_cmp (const uint8_t *S1, const uint8_t *S2, size_t + N) + -- Function: int u16_cmp (const uint16_t *S1, const uint16_t *S2, + size_t N) + -- Function: int u32_cmp (const uint32_t *S1, const uint32_t *S2, + size_t N) + Compares S1 and S2, each of length N, lexicographically. Returns + a negative value if S1 compares smaller than S2, a positive value + if S1 compares larger than S2, or 0 if they compare equal. + + This function is similar to `memcmp', except that it operates on + Unicode strings. + + The following function compares two Unicode strings of possibly +different lengths. + + -- Function: int u8_cmp2 (const uint8_t *S1, size_t N1, const uint8_t + *S2, size_t N2) + -- Function: int u16_cmp2 (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2) + -- Function: int u32_cmp2 (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2) + Compares S1 and S2, lexicographically. Returns a negative value + if S1 compares smaller than S2, a positive value if S1 compares + larger than S2, or 0 if they compare equal. + + This function is similar to the gnulib function `memcmp2', except + that it operates on Unicode strings. + + The following function searches for a given Unicode character. + + -- Function: uint8_t * u8_chr (const uint8_t *S, size_t N, ucs4_t UC) + -- Function: uint16_t * u16_chr (const uint16_t *S, size_t N, ucs4_t + UC) + -- Function: uint32_t * u32_chr (const uint32_t *S, size_t N, ucs4_t + UC) + Searches the string at S for UC. Returns a pointer to the first + occurrence of UC in S, or NULL if UC does not occur in S. + + This function is similar to `memchr', except that it operates on + Unicode strings. + + The following function counts the number of Unicode characters. + + -- Function: size_t u8_mbsnlen (const uint8_t *S, size_t N) + -- Function: size_t u16_mbsnlen (const uint16_t *S, size_t N) + -- Function: size_t u32_mbsnlen (const uint32_t *S, size_t N) + Counts and returns the number of Unicode characters in the N units + from S. + + This function is similar to the gnulib function `mbsnlen', except + that it operates on Unicode strings. + + +File: libunistring.info, Node: Elementary string functions with memory allocation, Next: Elementary string functions on NUL terminated strings, Prev: Elementary string functions, Up: unistr.h + +4.4 Elementary string functions with memory allocation +====================================================== + + The following function copies a Unicode string. + + -- Function: uint8_t * u8_cpy_alloc (const uint8_t *S, size_t N) + -- Function: uint16_t * u16_cpy_alloc (const uint16_t *S, size_t N) + -- Function: uint32_t * u32_cpy_alloc (const uint32_t *S, size_t N) + Makes a freshly allocated copy of S, of length N. + + +File: libunistring.info, Node: Elementary string functions on NUL terminated strings, Prev: Elementary string functions with memory allocation, Up: unistr.h + +4.5 Elementary string functions on NUL terminated strings +========================================================= + + The following functions inspect and return details about the first +character in a Unicode string. + + -- Function: int u8_strmblen (const uint8_t *S) + -- Function: int u16_strmblen (const uint16_t *S) + -- Function: int u32_strmblen (const uint32_t *S) + Returns the length (number of units) of the first character in S. + Returns 0 if it is the NUL character. Returns -1 upon failure. + + -- Function: int u8_strmbtouc (ucs4_t *PUC, const uint8_t *S) + -- Function: int u16_strmbtouc (ucs4_t *PUC, const uint16_t *S) + -- Function: int u32_strmbtouc (ucs4_t *PUC, const uint32_t *S) + Returns the length (number of units) of the first character in S, + putting its `ucs4_t' representation in `*PUC'. Returns 0 if it is + the NUL character. Returns -1 upon failure. + + -- Function: const uint8_t * u8_next (ucs4_t *PUC, const uint8_t *S) + -- Function: const uint16_t * u16_next (ucs4_t *PUC, const uint16_t *S) + -- Function: const uint32_t * u32_next (ucs4_t *PUC, const uint32_t *S) + Forward iteration step. Advances the pointer past the next + character, or returns NULL if the end of the string has been + reached. Puts the character's `ucs4_t' representation in `*PUC'. + + The following function inspects and returns details about the +previous character in a Unicode string. + + -- Function: const uint8_t * u8_prev (ucs4_t *PUC, const uint8_t *S, + const uint8_t *START) + -- Function: const uint16_t * u16_prev (ucs4_t *PUC, const uint16_t + *S, const uint16_t *START) + -- Function: const uint32_t * u32_prev (ucs4_t *PUC, const uint32_t + *S, const uint32_t *START) + Backward iteration step. Advances the pointer to point to the + previous character, or returns NULL if the beginning of the string + had been reached. Puts the character's `ucs4_t' representation in + `*PUC'. + + The following functions determine the length of a Unicode string. + + -- Function: size_t u8_strlen (const uint8_t *S) + -- Function: size_t u16_strlen (const uint16_t *S) + -- Function: size_t u32_strlen (const uint32_t *S) + Returns the number of units in S. + + This function is similar to `strlen' and `wcslen', except that it + operates on Unicode strings. + + -- Function: size_t u8_strnlen (const uint8_t *S, size_t MAXLEN) + -- Function: size_t u16_strnlen (const uint16_t *S, size_t MAXLEN) + -- Function: size_t u32_strnlen (const uint32_t *S, size_t MAXLEN) + Returns the number of units in S, but at most MAXLEN. + + This function is similar to `strnlen' and `wcsnlen', except that + it operates on Unicode strings. + + The following functions copy portions of Unicode strings in memory. + + -- Function: uint8_t * u8_strcpy (uint8_t *DEST, const uint8_t *SRC) + -- Function: uint16_t * u16_strcpy (uint16_t *DEST, const uint16_t + *SRC) + -- Function: uint32_t * u32_strcpy (uint32_t *DEST, const uint32_t + *SRC) + Copies SRC to DEST. + + This function is similar to `strcpy' and `wcscpy', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_stpcpy (uint8_t *DEST, const uint8_t *SRC) + -- Function: uint16_t * u16_stpcpy (uint16_t *DEST, const uint16_t + *SRC) + -- Function: uint32_t * u32_stpcpy (uint32_t *DEST, const uint32_t + *SRC) + Copies SRC to DEST, returning the address of the terminating NUL + in DEST. + + This function is similar to `stpcpy', except that it operates on + Unicode strings. + + -- Function: uint8_t * u8_strncpy (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_strncpy (uint16_t *DEST, const uint16_t + *SRC, size_t N) + -- Function: uint32_t * u32_strncpy (uint32_t *DEST, const uint32_t + *SRC, size_t N) + Copies no more than N units of SRC to DEST. + + This function is similar to `strncpy' and `wcsncpy', except that + it operates on Unicode strings. + + -- Function: uint8_t * u8_stpncpy (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_stpncpy (uint16_t *DEST, const uint16_t + *SRC, size_t N) + -- Function: uint32_t * u32_stpncpy (uint32_t *DEST, const uint32_t + *SRC, size_t N) + Copies no more than N units of SRC to DEST, returning the address + of the last unit written into DEST. + + This function is similar to `stpncpy', except that it operates on + Unicode strings. + + -- Function: uint8_t * u8_strcat (uint8_t *DEST, const uint8_t *SRC) + -- Function: uint16_t * u16_strcat (uint16_t *DEST, const uint16_t + *SRC) + -- Function: uint32_t * u32_strcat (uint32_t *DEST, const uint32_t + *SRC) + Appends SRC onto DEST. + + This function is similar to `strcat' and `wcscat', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_strncat (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_strncat (uint16_t *DEST, const uint16_t + *SRC, size_t N) + -- Function: uint32_t * u32_strncat (uint32_t *DEST, const uint32_t + *SRC, size_t N) + Appends no more than N units of SRC onto DEST. + + This function is similar to `strncat' and `wcsncat', except that + it operates on Unicode strings. + + The following functions compare two Unicode strings. + + -- Function: int u8_strcmp (const uint8_t *S1, const uint8_t *S2) + -- Function: int u16_strcmp (const uint16_t *S1, const uint16_t *S2) + -- Function: int u32_strcmp (const uint32_t *S1, const uint32_t *S2) + Compares S1 and S2, lexicographically. Returns a negative value + if S1 compares smaller than S2, a positive value if S1 compares + larger than S2, or 0 if they compare equal. + + This function is similar to `strcmp' and `wcscmp', except that it + operates on Unicode strings. + + -- Function: int u8_strcoll (const uint8_t *S1, const uint8_t *S2) + -- Function: int u16_strcoll (const uint16_t *S1, const uint16_t *S2) + -- Function: int u32_strcoll (const uint32_t *S1, const uint32_t *S2) + Compares S1 and S2 using the collation rules of the current locale. + Returns -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. Upon failure, + sets `errno' and returns any value. + + This function is similar to `strcoll' and `wcscoll', except that + it operates on Unicode strings. + + Note that this function may consider different canonical + normalizations of the same string as having a large distance. It + is therefore better to use the function `u8_normcoll' instead of + this one; see *note uninorm.h::. + + -- Function: int u8_strncmp (const uint8_t *S1, const uint8_t *S2, + size_t N) + -- Function: int u16_strncmp (const uint16_t *S1, const uint16_t *S2, + size_t N) + -- Function: int u32_strncmp (const uint32_t *S1, const uint32_t *S2, + size_t N) + Compares no more than N units of S1 and S2. + + This function is similar to `strncmp' and `wcsncmp', except that + it operates on Unicode strings. + + The following function allocates a duplicate of a Unicode string. + + -- Function: uint8_t * u8_strdup (const uint8_t *S) + -- Function: uint16_t * u16_strdup (const uint16_t *S) + -- Function: uint32_t * u32_strdup (const uint32_t *S) + Duplicates S, returning an identical malloc'd string. + + This function is similar to `strdup' and `wcsdup', except that it + operates on Unicode strings. + + The following functions search for a given Unicode character. + + -- Function: uint8_t * u8_strchr (const uint8_t *STR, ucs4_t UC) + -- Function: uint16_t * u16_strchr (const uint16_t *STR, ucs4_t UC) + -- Function: uint32_t * u32_strchr (const uint32_t *STR, ucs4_t UC) + Finds the first occurrence of UC in STR. + + This function is similar to `strchr' and `wcschr', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_strrchr (const uint8_t *STR, ucs4_t UC) + -- Function: uint16_t * u16_strrchr (const uint16_t *STR, ucs4_t UC) + -- Function: uint32_t * u32_strrchr (const uint32_t *STR, ucs4_t UC) + Finds the last occurrence of UC in STR. + + This function is similar to `strrchr' and `wcsrchr', except that + it operates on Unicode strings. + + The following functions search for the first occurrence of some +Unicode character in or outside a given set of Unicode characters. + + -- Function: size_t u8_strcspn (const uint8_t *STR, const uint8_t + *REJECT) + -- Function: size_t u16_strcspn (const uint16_t *STR, const uint16_t + *REJECT) + -- Function: size_t u32_strcspn (const uint32_t *STR, const uint32_t + *REJECT) + Returns the length of the initial segment of STR which consists + entirely of Unicode characters not in REJECT. + + This function is similar to `strcspn' and `wcscspn', except that + it operates on Unicode strings. + + -- Function: size_t u8_strspn (const uint8_t *STR, const uint8_t + *ACCEPT) + -- Function: size_t u16_strspn (const uint16_t *STR, const uint16_t + *ACCEPT) + -- Function: size_t u32_strspn (const uint32_t *STR, const uint32_t + *ACCEPT) + Returns the length of the initial segment of STR which consists + entirely of Unicode characters in ACCEPT. + + This function is similar to `strspn' and `wcsspn', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_strpbrk (const uint8_t *STR, const uint8_t + *ACCEPT) + -- Function: uint16_t * u16_strpbrk (const uint16_t *STR, const + uint16_t *ACCEPT) + -- Function: uint32_t * u32_strpbrk (const uint32_t *STR, const + uint32_t *ACCEPT) + Finds the first occurrence in STR of any character in ACCEPT. + + This function is similar to `strpbrk' and `wcspbrk', except that + it operates on Unicode strings. + + The following functions search whether a given Unicode string is a +substring of another Unicode string. + + -- Function: uint8_t * u8_strstr (const uint8_t *HAYSTACK, const + uint8_t *NEEDLE) + -- Function: uint16_t * u16_strstr (const uint16_t *HAYSTACK, const + uint16_t *NEEDLE) + -- Function: uint32_t * u32_strstr (const uint32_t *HAYSTACK, const + uint32_t *NEEDLE) + Finds the first occurrence of NEEDLE in HAYSTACK. + + This function is similar to `strstr' and `wcsstr', except that it + operates on Unicode strings. + + -- Function: bool u8_startswith (const uint8_t *STR, const uint8_t + *PREFIX) + -- Function: bool u16_startswith (const uint16_t *STR, const uint16_t + *PREFIX) + -- Function: bool u32_startswith (const uint32_t *STR, const uint32_t + *PREFIX) + Tests whether STR starts with PREFIX. + + -- Function: bool u8_endswith (const uint8_t *STR, const uint8_t + *SUFFIX) + -- Function: bool u16_endswith (const uint16_t *STR, const uint16_t + *SUFFIX) + -- Function: bool u32_endswith (const uint32_t *STR, const uint32_t + *SUFFIX) + Tests whether STR ends with SUFFIX. + + The following function does one step in tokenizing a Unicode string. + + -- Function: uint8_t * u8_strtok (uint8_t *STR, const uint8_t *DELIM, + uint8_t **PTR) + -- Function: uint16_t * u16_strtok (uint16_t *STR, const uint16_t + *DELIM, uint16_t **PTR) + -- Function: uint32_t * u32_strtok (uint32_t *STR, const uint32_t + *DELIM, uint32_t **PTR) + Divides STR into tokens separated by characters in DELIM. + + This function is similar to `strtok_r' and `wcstok', except that + it operates on Unicode strings. Its interface is actually more + similar to `wcstok' than to `strtok'. + + +File: libunistring.info, Node: uniconv.h, Next: unistdio.h, Prev: unistr.h, Up: Top + +5 Conversions between Unicode and encodings `<uniconv.h>' +********************************************************* + + This include file declares functions for converting between Unicode +strings and `char *' strings in locale encoding or in other specified +encodings. + + The following function returns the locale encoding. + + -- Function: const char * locale_charset () + Determines the current locale's character encoding, and + canonicalizes it into one of the canonical names listed in + `config.charset'. If the canonical name cannot be determined, the + result is a non-canonical name. + + The result must not be freed; it is statically allocated. + + The result of this function can be used as an argument to the + `iconv_open' function in GNU libc, in GNU libiconv, or in the + gnulib provided wrapper around the native `iconv_open' function. + It may not work as an argument to the native `iconv_open' function + directly. + + The handling of unconvertible characters during the conversions can +be parametrized through the following enumeration type: + + -- Type: enum iconv_ilseq_handler + This type specifies how unconvertible characters in the input are + handled. + + -- Constant: enum iconv_ilseq_handler iconveh_error + This handler causes the function to return with `errno' set to + `EILSEQ'. + + -- Constant: enum iconv_ilseq_handler iconveh_question_mark + This handler produces one question mark `?' per unconvertible + character. + + -- Constant: enum iconv_ilseq_handler iconveh_escape_sequence + This handler produces an escape sequence `\uXXXX' or `\UXXXXXXXX' + for each unconvertible character. + + The following functions convert between strings in a specified +encoding and Unicode strings. + + -- Function: uint8_t * u8_conv_from_encoding (const char *FROMCODE, + enum iconv_ilseq_handler HANDLER, const char *SRC, size_t + SRCLEN, size_t *OFFSETS, uint8_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint16_t * u16_conv_from_encoding (const char *FROMCODE, + enum iconv_ilseq_handler HANDLER, const char *SRC, size_t + SRCLEN, size_t *OFFSETS, uint16_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_conv_from_encoding (const char *FROMCODE, + enum iconv_ilseq_handler HANDLER, const char *SRC, size_t + SRCLEN, size_t *OFFSETS, uint32_t *RESULTBUF, size_t *LENGTHP) + Converts an entire string, possibly including NUL bytes, from one + encoding to UTF-8 encoding. + + Converts a memory region given in encoding FROMCODE. FROMCODE is + as for the `iconv_open' function. + + The input is in the memory region between SRC (inclusive) and `SRC + + SRCLEN' (exclusive). + + If OFFSETS is not NULL, it should point to an array of SRCLEN + integers; this array is filled with offsets into the result, i.e. + the character starting at `SRC[i]' corresponds to the character + starting at `RESULT[OFFSETS[i]]', and other offsets are set to + `(size_t)(-1)'. + + `RESULTBUF' and `*LENGTHP' should be a scratch buffer and its + size, or `RESULTBUF' can be NULL. + + May erase the contents of the memory at `RESULTBUF'. + + If successful: The resulting Unicode string (non-NULL) is returned + and its length stored in `*LENGTHP'. The resulting string is + `RESULTBUF' if no dynamic memory allocation was necessary, or a + freshly allocated memory block otherwise. + + In case of error: NULL is returned and `errno' is set. Particular + `errno' values: `EINVAL', `EILSEQ', `ENOMEM'. + + -- Function: char * u8_conv_to_encoding (const char *TOCODE, enum + iconv_ilseq_handler HANDLER, const uint8_t *SRC, size_t + SRCLEN, size_t *OFFSETS, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u16_conv_to_encoding (const char *TOCODE, enum + iconv_ilseq_handler HANDLER, const uint16_t *SRC, size_t + SRCLEN, size_t *OFFSETS, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u32_conv_to_encoding (const char *TOCODE, enum + iconv_ilseq_handler HANDLER, const uint32_t *SRC, size_t + SRCLEN, size_t *OFFSETS, char *RESULTBUF, size_t *LENGTHP) + Converts an entire Unicode string, possibly including NUL units, + from UTF-8 encoding to a given encoding. + + Converts a memory region to encoding TOCODE. TOCODE is as for the + `iconv_open' function. + + The input is in the memory region between SRC (inclusive) and `SRC + + SRCLEN' (exclusive). + + If OFFSETS is not NULL, it should point to an array of SRCLEN + integers; this array is filled with offsets into the result, i.e. + the character starting at `SRC[i]' corresponds to the character + starting at `RESULT[OFFSETS[i]]', and other offsets are set to + `(size_t)(-1)'. + + `RESULTBUF' and `*LENGTHP' should be a scratch buffer and its + size, or `RESULTBUF' can be NULL. + + May erase the contents of the memory at `RESULTBUF'. + + If successful: The resulting Unicode string (non-NULL) is returned + and its length stored in `*LENGTHP'. The resulting string is + `RESULTBUF' if no dynamic memory allocation was necessary, or a + freshly allocated memory block otherwise. + + In case of error: NULL is returned and `errno' is set. Particular + `errno' values: `EINVAL', `EILSEQ', `ENOMEM'. + + The following functions convert between NUL terminated strings in a +specified encoding and NUL terminated Unicode strings. + + -- Function: uint8_t * u8_strconv_from_encoding (const char *STRING, + const char *FROMCODE, enum iconv_ilseq_handler HANDLER) + -- Function: uint16_t * u16_strconv_from_encoding (const char *STRING, + const char *FROMCODE, enum iconv_ilseq_handler HANDLER) + -- Function: uint32_t * u32_strconv_from_encoding (const char *STRING, + const char *FROMCODE, enum iconv_ilseq_handler HANDLER) + Converts a NUL terminated string from a given encoding. + + The result is `malloc' allocated, or NULL (with ERRNO set) in case + of error. + + Particular `errno' values: `EILSEQ', `ENOMEM'. + + -- Function: char * u8_strconv_to_encoding (const uint8_t *STRING, + const char *TOCODE, enum iconv_ilseq_handler HANDLER) + -- Function: char * u16_strconv_to_encoding (const uint16_t *STRING, + const char *TOCODE, enum iconv_ilseq_handler HANDLER) + -- Function: char * u32_strconv_to_encoding (const uint32_t *STRING, + const char *TOCODE, enum iconv_ilseq_handler HANDLER) + Converts a NUL terminated string to a given encoding. + + The result is `malloc' allocated, or NULL (with `errno' set) in + case of error. + + Particular `errno' values: `EILSEQ', `ENOMEM'. + + The following functions are shorthands that convert between NUL +terminated strings in locale encoding and NUL terminated Unicode +strings. + + -- Function: uint8_t * u8_strconv_from_locale (const char *STRING) + -- Function: uint16_t * u16_strconv_from_locale (const char *STRING) + -- Function: uint32_t * u32_strconv_from_locale (const char *STRING) + Converts a NUL terminated string from the locale encoding. + + The result is `malloc' allocated, or NULL (with `errno' set) in + case of error. + + Particular `errno' values: `ENOMEM'. + + -- Function: char * u8_strconv_to_locale (const uint8_t *STRING) + -- Function: char * u16_strconv_to_locale (const uint16_t *STRING) + -- Function: char * u32_strconv_to_locale (const uint32_t *STRING) + Converts a NUL terminated string to the locale encoding. + + The result is `malloc' allocated, or NULL (with `errno' set) in + case of error. + + Particular `errno' values: `ENOMEM'. + + +File: libunistring.info, Node: unistdio.h, Next: uniname.h, Prev: uniconv.h, Up: Top + +6 Output with Unicode strings `<unistdio.h>' +******************************************** + + This include file declares functions for doing formatted output with +Unicode strings. It defines a set of functions similar to `fprintf' and +`sprintf', which are declared in `<stdio.h>'. + + These functions work like the `printf' function family. In the +format string: + * The format directive `U' takes an UTF-8 string (`const uint8_t *'). + + * The format directive `lU' takes an UTF-16 string (`const uint16_t + *'). + + * The format directive `llU' takes an UTF-32 string (`const uint32_t + *'). + + A function name with an infix `v' indicates that a `va_list' is +passed instead of multiple arguments. + + The functions `*sprintf' have a BUF argument that is assumed to be +large enough. (_DANGEROUS! Overflowing the buffer will crash the +program._) + + The functions `*snprintf' have a BUF argument that is assumed to be +SIZE units large. (_DANGEROUS! The resulting string might be +truncated in the middle of a multibyte character._) + + The functions `*asprintf' have a RESULTP argument. The result will +be freshly allocated and stored in `*resultp'. + + The functions `*asnprintf' have a (RESULTBUF, LENGTHP) argument +pair. If RESULTBUF is not NULL and the result fits into `*LENGTHP' +units, it is put in RESULTBUF, and RESULTBUF is returned. Otherwise, a +freshly allocated string is returned. In both cases, `*LENGTHP' is set +to the length (number of units) of the returned string. In case of +error, NULL is returned and `errno' is set. + + The following functions take an ASCII format string and return a +result that is a `char *' string in locale encoding. + + -- Function: int ulc_sprintf (char *BUF, const char *FORMAT, ...) + + -- Function: int ulc_snprintf (char *BUF, size_t size, const char + *FORMAT, ...) + + -- Function: int ulc_asprintf (char **RESULTP, const char *FORMAT, ...) + + -- Function: char * ulc_asnprintf (char *RESULTBUF, size_t *LENGTHP, + const char *FORMAT, ...) + + -- Function: int ulc_vsprintf (char *BUF, const char *FORMAT, va_list + AP) + + -- Function: int ulc_vsnprintf (char *BUF, size_t size, const char + *FORMAT, va_list AP) + + -- Function: int ulc_vasprintf (char **RESULTP, const char *FORMAT, + va_list AP) + + -- Function: char * ulc_vasnprintf (char *RESULTBUF, size_t *LENGTHP, + const char *FORMAT, va_list AP) + + The following functions take an ASCII format string and return a +result in UTF-8 format. + + -- Function: int u8_sprintf (uint8_t *BUF, const char *FORMAT, ...) + + -- Function: int u8_snprintf (uint8_t *BUF, size_t SIZE, const char + *FORMAT, ...) + + -- Function: int u8_asprintf (uint8_t **RESULTP, const char *FORMAT, + ...) + + -- Function: uint8_t * u8_asnprintf (uint8_t *RESULTBUF, size_t + *LENGTHP, const char *FORMAT, ...) + + -- Function: int u8_vsprintf (uint8_t *BUF, const char *FORMAT, + va_list ap) + + -- Function: int u8_vsnprintf (uint8_t *BUF, size_t SIZE, const char + *FORMAT, va_list AP) + + -- Function: int u8_vasprintf (uint8_t **RESULTP, const char *FORMAT, + va_list AP) + + -- Function: uint8_t * u8_vasnprintf (uint8_t *resultbuf, size_t + *LENGTHP, const char *FORMAT, va_list AP) + + The following functions take an UTF-8 format string and return a +result in UTF-8 format. + + -- Function: int u8_u8_sprintf (uint8_t *BUF, const uint8_t *FORMAT, + ...) + + -- Function: int u8_u8_snprintf (uint8_t *BUF, size_t SIZE, const + uint8_t *FORMAT, ...) + + -- Function: int u8_u8_asprintf (uint8_t **RESULTP, const uint8_t + *FORMAT, ...) + + -- Function: uint8_t * u8_u8_asnprintf (uint8_t *resultbuf, size_t + *LENGTHP, const uint8_t *FORMAT, ...) + + -- Function: int u8_u8_vsprintf (uint8_t *BUF, const uint8_t *FORMAT, + va_list AP) + + -- Function: int u8_u8_vsnprintf (uint8_t *BUF, size_t SIZE, const + uint8_t *FORMAT, va_list AP) + + -- Function: int u8_u8_vasprintf (uint8_t **RESULTP, const uint8_t + *FORMAT, va_list AP) + + -- Function: uint8_t * u8_u8_vasnprintf (uint8_t *resultbuf, size_t + *LENGTHP, const uint8_t *FORMAT, va_list AP) + + The following functions take an ASCII format string and return a +result in UTF-16 format. + + -- Function: int u16_sprintf (uint16_t *BUF, const char *FORMAT, ...) + + -- Function: int u16_snprintf (uint16_t *BUF, size_t SIZE, const char + *FORMAT, ...) + + -- Function: int u16_asprintf (uint16_t **RESULTP, const char *FORMAT, + ...) + + -- Function: uint16_t * u16_asnprintf (uint16_t *RESULTBUF, size_t + *LENGTHP, const char *FORMAT, ...) + + -- Function: int u16_vsprintf (uint16_t *BUF, const char *FORMAT, + va_list ap) + + -- Function: int u16_vsnprintf (uint16_t *BUF, size_t SIZE, const char + *FORMAT, va_list AP) + + -- Function: int u16_vasprintf (uint16_t **RESULTP, const char + *FORMAT, va_list AP) + + -- Function: uint16_t * u16_vasnprintf (uint16_t *resultbuf, size_t + *LENGTHP, const char *FORMAT, va_list AP) + + The following functions take an UTF-16 format string and return a +result in UTF-16 format. + + -- Function: int u16_u16_sprintf (uint16_t *BUF, const uint16_t + *FORMAT, ...) + + -- Function: int u16_u16_snprintf (uint16_t *BUF, size_t SIZE, const + uint16_t *FORMAT, ...) + + -- Function: int u16_u16_asprintf (uint16_t **RESULTP, const uint16_t + *FORMAT, ...) + + -- Function: uint16_t * u16_u16_asnprintf (uint16_t *resultbuf, size_t + *LENGTHP, const uint16_t *FORMAT, ...) + + -- Function: int u16_u16_vsprintf (uint16_t *BUF, const uint16_t + *FORMAT, va_list AP) + + -- Function: int u16_u16_vsnprintf (uint16_t *BUF, size_t SIZE, const + uint16_t *FORMAT, va_list AP) + + -- Function: int u16_u16_vasprintf (uint16_t **RESULTP, const uint16_t + *FORMAT, va_list AP) + + -- Function: uint16_t * u16_u16_vasnprintf (uint16_t *resultbuf, + size_t *LENGTHP, const uint16_t *FORMAT, va_list AP) + + The following functions take an ASCII format string and return a +result in UTF-32 format. + + -- Function: int u32_sprintf (uint32_t *BUF, const char *FORMAT, ...) + + -- Function: int u32_snprintf (uint32_t *BUF, size_t SIZE, const char + *FORMAT, ...) + + -- Function: int u32_asprintf (uint32_t **RESULTP, const char *FORMAT, + ...) + + -- Function: uint32_t * u32_asnprintf (uint32_t *RESULTBUF, size_t + *LENGTHP, const char *FORMAT, ...) + + -- Function: int u32_vsprintf (uint32_t *BUF, const char *FORMAT, + va_list ap) + + -- Function: int u32_vsnprintf (uint32_t *BUF, size_t SIZE, const char + *FORMAT, va_list AP) + + -- Function: int u32_vasprintf (uint32_t **RESULTP, const char + *FORMAT, va_list AP) + + -- Function: uint32_t * u32_vasnprintf (uint32_t *resultbuf, size_t + *LENGTHP, const char *FORMAT, va_list AP) + + The following functions take an UTF-32 format string and return a +result in UTF-32 format. + + -- Function: int u32_u32_sprintf (uint32_t *BUF, const uint32_t + *FORMAT, ...) + + -- Function: int u32_u32_snprintf (uint32_t *BUF, size_t SIZE, const + uint32_t *FORMAT, ...) + + -- Function: int u32_u32_asprintf (uint32_t **RESULTP, const uint32_t + *FORMAT, ...) + + -- Function: uint32_t * u32_u32_asnprintf (uint32_t *resultbuf, size_t + *LENGTHP, const uint32_t *FORMAT, ...) + + -- Function: int u32_u32_vsprintf (uint32_t *BUF, const uint32_t + *FORMAT, va_list AP) + + -- Function: int u32_u32_vsnprintf (uint32_t *BUF, size_t SIZE, const + uint32_t *FORMAT, va_list AP) + + -- Function: int u32_u32_vasprintf (uint32_t **RESULTP, const uint32_t + *FORMAT, va_list AP) + + -- Function: uint32_t * u32_u32_vasnprintf (uint32_t *resultbuf, + size_t *LENGTHP, const uint32_t *FORMAT, va_list AP) + + The following functions take an ASCII format string and produce +output in locale encoding to a `FILE' stream. + + -- Function: int ulc_fprintf (FILE *STREAM, const char *FORMAT, ...) + + -- Function: int ulc_vfprintf (FILE *STREAM, const char *FORMAT, + va_list AP) + + +File: libunistring.info, Node: uniname.h, Next: unictype.h, Prev: unistdio.h, Up: Top + +7 Names of Unicode characters `<uniname.h>' +******************************************* + + This include file implements the association between a Unicode +character and its name. + + The name of a Unicode character allows to distinguish it from other, +similar looking characters. For example, the character `x' has the name +`"LATIN SMALL LETTER X"' and is therefore different from the character +named `"MULTIPLICATION SIGN"'. + + -- Macro: unsigned int UNINAME_MAX + This macro expands to a constant that is the required size of + buffer for a Unicode character name. + + -- Function: char * unicode_character_name (ucs4_t UC, char *BUF) + Looks up the name of a Unicode character, in uppercase ASCII. BUF + must point to a buffer, at least `UNINAME_MAX' bytes in size. + Returns the filled BUF, or NULL if the character does not have a + name. + + -- Function: ucs4_t unicode_name_character (const char *NAME) + Looks up the Unicode character with a given name, in upper- or + lowercase ASCII. Returns the character if found, or + `UNINAME_INVALID' if not found. + + -- Macro: ucs4_t UNINAME_INVALID + This macro expands to a constant that is a special return value of + the `unicode_name_character' function. + + +File: libunistring.info, Node: unictype.h, Next: uniwidth.h, Prev: uniname.h, Up: Top + +8 Unicode character classification and properties `<unictype.h>' +**************************************************************** + + This include file declares functions that classify Unicode characters +and that test whether Unicode characters have specific properties. + + The classification assigns a "general category" to every Unicode +character. This is similar to the classification provided by ISO C in +`<wctype.h>'. + + Properties are the data that guides various text processing +algorithms in the presence of specific Unicode characters. + +* Menu: + +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: + + +File: libunistring.info, Node: General category, Next: Canonical combining class, Up: unictype.h + +8.1 General category +==================== + + Every Unicode character or code point has a _general category_ +assigned to it. This classification is important for most algorithms +that work on Unicode text. + + The GNU libunistring library provides two kinds of API for working +with general categories. The object oriented API uses a variable to +denote every predefined general category value or combinations thereof. +The low-level API uses a bit mask instead. The advantage of the object +oriented API is that if only a few predefined general category values +are used, the data tables are relatively small. When you combine +general category values (using `uc_general_category_or', +`uc_general_category_and', or `uc_general_category_and_not'), or when +you use the low level bit masks, a big table is used thats holds the +complete general category information for all Unicode characters. + +* Menu: + +* Object oriented API:: +* Bit mask API:: + + +File: libunistring.info, Node: Object oriented API, Next: Bit mask API, Up: General category + +8.1.1 The object oriented API for general category +-------------------------------------------------- + + -- Type: uc_general_category_t + This data type denotes a general category value. It is an + immediate type that can be copied by simple assignment, without + involving memory allocation. It is not an array type. + + The following are the predefined general category value. Additional +general categories may be added in the future. + + -- Constant: uc_general_category_t UC_CATEGORY_L + -- Constant: uc_general_category_t UC_CATEGORY_Lu + -- Constant: uc_general_category_t UC_CATEGORY_Ll + -- Constant: uc_general_category_t UC_CATEGORY_Lt + -- Constant: uc_general_category_t UC_CATEGORY_Lm + -- Constant: uc_general_category_t UC_CATEGORY_Lo + -- Constant: uc_general_category_t UC_CATEGORY_M + -- Constant: uc_general_category_t UC_CATEGORY_Mn + -- Constant: uc_general_category_t UC_CATEGORY_Mc + -- Constant: uc_general_category_t UC_CATEGORY_Me + -- Constant: uc_general_category_t UC_CATEGORY_N + -- Constant: uc_general_category_t UC_CATEGORY_Nd + -- Constant: uc_general_category_t UC_CATEGORY_Nl + -- Constant: uc_general_category_t UC_CATEGORY_No + -- Constant: uc_general_category_t UC_CATEGORY_P + -- Constant: uc_general_category_t UC_CATEGORY_Pc + -- Constant: uc_general_category_t UC_CATEGORY_Pd + -- Constant: uc_general_category_t UC_CATEGORY_Ps + -- Constant: uc_general_category_t UC_CATEGORY_Pe + -- Constant: uc_general_category_t UC_CATEGORY_Pi + -- Constant: uc_general_category_t UC_CATEGORY_Pf + -- Constant: uc_general_category_t UC_CATEGORY_Po + -- Constant: uc_general_category_t UC_CATEGORY_S + -- Constant: uc_general_category_t UC_CATEGORY_Sm + -- Constant: uc_general_category_t UC_CATEGORY_Sc + -- Constant: uc_general_category_t UC_CATEGORY_Sk + -- Constant: uc_general_category_t UC_CATEGORY_So + -- Constant: uc_general_category_t UC_CATEGORY_Z + -- Constant: uc_general_category_t UC_CATEGORY_Zs + -- Constant: uc_general_category_t UC_CATEGORY_Zl + -- Constant: uc_general_category_t UC_CATEGORY_Zp + -- Constant: uc_general_category_t UC_CATEGORY_C + -- Constant: uc_general_category_t UC_CATEGORY_Cc + -- Constant: uc_general_category_t UC_CATEGORY_Cf + -- Constant: uc_general_category_t UC_CATEGORY_Cs + -- Constant: uc_general_category_t UC_CATEGORY_Co + -- Constant: uc_general_category_t UC_CATEGORY_Cn + + The following are alias names for predefined General category values. + + -- Macro: uc_general_category_t UC_LETTER + This is another name for `UC_CATEGORY_L'. + + -- Macro: uc_general_category_t UC_UPPERCASE_LETTER + This is another name for `UC_CATEGORY_Lu'. + + -- Macro: uc_general_category_t UC_LOWERCASE_LETTER + This is another name for `UC_CATEGORY_Ll'. + + -- Macro: uc_general_category_t UC_TITLECASE_LETTER + This is another name for `UC_CATEGORY_Lt'. + + -- Macro: uc_general_category_t UC_MODIFIER_LETTER + This is another name for `UC_CATEGORY_Lm'. + + -- Macro: uc_general_category_t UC_OTHER_LETTER + This is another name for `UC_CATEGORY_Lo'. + + -- Macro: uc_general_category_t UC_MARK + This is another name for `UC_CATEGORY_M'. + + -- Macro: uc_general_category_t UC_NON_SPACING_MARK + This is another name for `UC_CATEGORY_Mn'. + + -- Macro: uc_general_category_t UC_COMBINING_SPACING_MARK + This is another name for `UC_CATEGORY_Mc'. + + -- Macro: uc_general_category_t UC_ENCLOSING_MARK + This is another name for `UC_CATEGORY_Me'. + + -- Macro: uc_general_category_t UC_NUMBER + This is another name for `UC_CATEGORY_N'. + + -- Macro: uc_general_category_t UC_DECIMAL_DIGIT_NUMBER + This is another name for `UC_CATEGORY_Nd'. + + -- Macro: uc_general_category_t UC_LETTER_NUMBER + This is another name for `UC_CATEGORY_Nl'. + + -- Macro: uc_general_category_t UC_OTHER_NUMBER + This is another name for `UC_CATEGORY_No'. + + -- Macro: uc_general_category_t UC_PUNCTUATION + This is another name for `UC_CATEGORY_P'. + + -- Macro: uc_general_category_t UC_CONNECTOR_PUNCTUATION + This is another name for `UC_CATEGORY_Pc'. + + -- Macro: uc_general_category_t UC_DASH_PUNCTUATION + This is another name for `UC_CATEGORY_Pd'. + + -- Macro: uc_general_category_t UC_OPEN_PUNCTUATION + This is another name for `UC_CATEGORY_Ps' ("start punctuation"). + + -- Macro: uc_general_category_t UC_CLOSE_PUNCTUATION + This is another name for `UC_CATEGORY_Pe' ("end punctuation"). + + -- Macro: uc_general_category_t UC_INITIAL_QUOTE_PUNCTUATION + This is another name for `UC_CATEGORY_Pi'. + + -- Macro: uc_general_category_t UC_FINAL_QUOTE_PUNCTUATION + This is another name for `UC_CATEGORY_Pf'. + + -- Macro: uc_general_category_t UC_OTHER_PUNCTUATION + This is another name for `UC_CATEGORY_Po'. + + -- Macro: uc_general_category_t UC_SYMBOL + This is another name for `UC_CATEGORY_S'. + + -- Macro: uc_general_category_t UC_MATH_SYMBOL + This is another name for `UC_CATEGORY_Sm'. + + -- Macro: uc_general_category_t UC_CURRENCY_SYMBOL + This is another name for `UC_CATEGORY_Sc'. + + -- Macro: uc_general_category_t UC_MODIFIER_SYMBOL + This is another name for `UC_CATEGORY_Sk'. + + -- Macro: uc_general_category_t UC_OTHER_SYMBOL + This is another name for `UC_CATEGORY_So'. + + -- Macro: uc_general_category_t UC_SEPARATOR + This is another name for `UC_CATEGORY_Z'. + + -- Macro: uc_general_category_t UC_SPACE_SEPARATOR + This is another name for `UC_CATEGORY_Zs'. + + -- Macro: uc_general_category_t UC_LINE_SEPARATOR + This is another name for `UC_CATEGORY_Zl'. + + -- Macro: uc_general_category_t UC_PARAGRAPH_SEPARATOR + This is another name for `UC_CATEGORY_Zp'. + + -- Macro: uc_general_category_t UC_OTHER + This is another name for `UC_CATEGORY_C'. + + -- Macro: uc_general_category_t UC_CONTROL + This is another name for `UC_CATEGORY_Cc'. + + -- Macro: uc_general_category_t UC_FORMAT + This is another name for `UC_CATEGORY_Cf'. + + -- Macro: uc_general_category_t UC_SURROGATE + This is another name for `UC_CATEGORY_Cs'. All code points in this + category are invalid characters. + + -- Macro: uc_general_category_t UC_PRIVATE_USE + This is another name for `UC_CATEGORY_Co'. + + -- Macro: uc_general_category_t UC_UNASSIGNED + This is another name for `UC_CATEGORY_Cn'. Some code points in + this category are invalid characters. + + The following functions combine general categories, like in a +boolean algebra, except that there is no `not' operation. + + -- Function: uc_general_category_t uc_general_category_or + (uc_general_category_t CATEGORY1, uc_general_category_t + CATEGORY2) + Returns the union of two general categories. This corresponds to + the unions of the two sets of characters. + + -- Function: uc_general_category_t uc_general_category_and + (uc_general_category_t CATEGORY1, uc_general_category_t + CATEGORY2) + Returns the intersection of two general categories as bit masks. + This _does not_ correspond to the intersection of the two sets of + characters. + + -- Function: uc_general_category_t uc_general_category_and_not + (uc_general_category_t CATEGORY1, uc_general_category_t + CATEGORY2) + Returns the intersection of a general category with the complement + of a second general category, as bit masks. This _does not_ + correspond to the intersection with complement, when viewing the + categories as sets of characters. + + The following functions associate general categories with their name. + + -- Function: const char * uc_general_category_name + (uc_general_category_t CATEGORY) + Returns the name of a general category. Returns NULL if the + general category corresponds to a bit mask that does not have a + name. + + -- Function: uc_general_category_t uc_general_category_byname (const + char *CATEGORY_NAME) + Returns the general category given by name, e.g. `"Lu"'. + + The following functions view general categories as sets of Unicode +characters. + + -- Function: uc_general_category_t uc_general_category (ucs4_t UC) + Returns the general category of a Unicode character. + + This function uses a big table. + + -- Function: bool uc_is_general_category (ucs4_t UC, + uc_general_category_t CATEGORY) + Tests whether a Unicode character belongs to a given category. + The CATEGORY argument can be a predefined general category or the + combination of several predefined general categories. + + +File: libunistring.info, Node: Bit mask API, Prev: Object oriented API, Up: General category + +8.1.2 The bit mask API for general category +------------------------------------------- + + The following are the predefined general category value as bit masks. +Additional general categories may be added in the future. + + -- Macro: uint32_t UC_CATEGORY_MASK_L + -- Macro: uint32_t UC_CATEGORY_MASK_Lu + -- Macro: uint32_t UC_CATEGORY_MASK_Ll + -- Macro: uint32_t UC_CATEGORY_MASK_Lt + -- Macro: uint32_t UC_CATEGORY_MASK_Lm + -- Macro: uint32_t UC_CATEGORY_MASK_Lo + -- Macro: uint32_t UC_CATEGORY_MASK_M + -- Macro: uint32_t UC_CATEGORY_MASK_Mn + -- Macro: uint32_t UC_CATEGORY_MASK_Mc + -- Macro: uint32_t UC_CATEGORY_MASK_Me + -- Macro: uint32_t UC_CATEGORY_MASK_N + -- Macro: uint32_t UC_CATEGORY_MASK_Nd + -- Macro: uint32_t UC_CATEGORY_MASK_Nl + -- Macro: uint32_t UC_CATEGORY_MASK_No + -- Macro: uint32_t UC_CATEGORY_MASK_P + -- Macro: uint32_t UC_CATEGORY_MASK_Pc + -- Macro: uint32_t UC_CATEGORY_MASK_Pd + -- Macro: uint32_t UC_CATEGORY_MASK_Ps + -- Macro: uint32_t UC_CATEGORY_MASK_Pe + -- Macro: uint32_t UC_CATEGORY_MASK_Pi + -- Macro: uint32_t UC_CATEGORY_MASK_Pf + -- Macro: uint32_t UC_CATEGORY_MASK_Po + -- Macro: uint32_t UC_CATEGORY_MASK_S + -- Macro: uint32_t UC_CATEGORY_MASK_Sm + -- Macro: uint32_t UC_CATEGORY_MASK_Sc + -- Macro: uint32_t UC_CATEGORY_MASK_Sk + -- Macro: uint32_t UC_CATEGORY_MASK_So + -- Macro: uint32_t UC_CATEGORY_MASK_Z + -- Macro: uint32_t UC_CATEGORY_MASK_Zs + -- Macro: uint32_t UC_CATEGORY_MASK_Zl + -- Macro: uint32_t UC_CATEGORY_MASK_Zp + -- Macro: uint32_t UC_CATEGORY_MASK_C + -- Macro: uint32_t UC_CATEGORY_MASK_Cc + -- Macro: uint32_t UC_CATEGORY_MASK_Cf + -- Macro: uint32_t UC_CATEGORY_MASK_Cs + -- Macro: uint32_t UC_CATEGORY_MASK_Co + -- Macro: uint32_t UC_CATEGORY_MASK_Cn + + The following function views general categories as sets of Unicode +characters. + + -- Function: bool uc_is_general_category_withtable (ucs4_t UC, + uint32_t BITMASK) + Tests whether a Unicode character belongs to a given category. + The BITMASK argument can be a predefined general category bitmask + or the combination of several predefined general category bitmasks. + + This function uses a big table comprising all general categories. + + +File: libunistring.info, Node: Canonical combining class, Next: Bidirectional category, Prev: General category, Up: unictype.h + +8.2 Canonical combining class +============================= + + Every Unicode character or code point has a _canonical combining +class_ assigned to it. + + What is the meaning of the canonical combining class? Essentially, +it indicates the priority with which a combining character is attached +to its base character. The characters for which the canonical +combining class is 0 are the base characters, and the characters for +which it is greater than 0 are the combining characters. Combining +characters are rendered near/attached/around their base character, and +combining characters with small combining classes are attached "first" +or "closer" to the base character. + + The canonical combining class of a character is a number in the range +0..255. The possible values are described in the Unicode Character +Database `http://www.unicode.org/Public/UNIDATA/UCD.html'. The list +here is not definitive; more values can be added in future versions. + + -- Constant: int UC_CCC_NR + The canonical combining class value for "Not Reordered" characters. + The value is 0. + + -- Constant: int UC_CCC_OV + The canonical combining class value for "Overlay" characters. + + -- Constant: int UC_CCC_NK + The canonical combining class value for "Nukta" characters. + + -- Constant: int UC_CCC_KV + The canonical combining class value for "Kana Voicing" characters. + + -- Constant: int UC_CCC_VR + The canonical combining class value for "Virama" characters. + + -- Constant: int UC_CCC_ATBL + The canonical combining class value for "Attached Below Left" + characters. + + -- Constant: int UC_CCC_ATB + The canonical combining class value for "Attached Below" + characters. + + -- Constant: int UC_CCC_ATAR + The canonical combining class value for "Attached Above Right" + characters. + + -- Constant: int UC_CCC_BL + The canonical combining class value for "Below Left" characters. + + -- Constant: int UC_CCC_B + The canonical combining class value for "Below" characters. + + -- Constant: int UC_CCC_BR + The canonical combining class value for "Below Right" characters. + + -- Constant: int UC_CCC_L + The canonical combining class value for "Left" characters. + + -- Constant: int UC_CCC_R + The canonical combining class value for "Right" characters. + + -- Constant: int UC_CCC_AL + The canonical combining class value for "Above Left" characters. + + -- Constant: int UC_CCC_A + The canonical combining class value for "Above" characters. + + -- Constant: int UC_CCC_AR + The canonical combining class value for "Above Right" characters. + + -- Constant: int UC_CCC_DB + The canonical combining class value for "Double Below" characters. + + -- Constant: int UC_CCC_DA + The canonical combining class value for "Double Above" characters. + + -- Constant: int UC_CCC_IS + The canonical combining class value for "Iota Subscript" + characters. + + The following function looks up the canonical combining class of a +character. + + -- Function: int uc_combining_class (ucs4_t UC) + Returns the canonical combining class of a Unicode character. + + +File: libunistring.info, Node: Bidirectional category, Next: Decimal digit value, Prev: Canonical combining class, Up: unictype.h + +8.3 Bidirectional category +========================== + + Every Unicode character or code point has a _bidirectional category_ +assigned to it. + + The bidirectional category guides the bidirectional algorithm +(`http://www.unicode.org/reports/tr9/'). The possible values are the +following. + + -- Constant: int UC_BIDI_L + The bidirectional category for `Left-to-Right`" characters. + + -- Constant: int UC_BIDI_LRE + The bidirectional category for "Left-to-Right Embedding" + characters. + + -- Constant: int UC_BIDI_LRO + The bidirectional category for "Left-to-Right Override" characters. + + -- Constant: int UC_BIDI_R + The bidirectional category for "Right-to-Left" characters. + + -- Constant: int UC_BIDI_AL + The bidirectional category for "Right-to-Left Arabic" characters. + + -- Constant: int UC_BIDI_RLE + The bidirectional category for "Right-to-Left Embedding" + characters. + + -- Constant: int UC_BIDI_RLO + The bidirectional category for "Right-to-Left Override" characters. + + -- Constant: int UC_BIDI_PDF + The bidirectional category for "Pop Directional Format" characters. + + -- Constant: int UC_BIDI_EN + The bidirectional category for "European Number" characters. + + -- Constant: int UC_BIDI_ES + The bidirectional category for "European Number Separator" + characters. + + -- Constant: int UC_BIDI_ET + The bidirectional category for "European Number Terminator" + characters. + + -- Constant: int UC_BIDI_AN + The bidirectional category for "Arabic Number" characters. + + -- Constant: int UC_BIDI_CS + The bidirectional category for "Common Number Separator" + characters. + + -- Constant: int UC_BIDI_NSM + The bidirectional category for "Non-Spacing Mark" characters. + + -- Constant: int UC_BIDI_BN + The bidirectional category for "Boundary Neutral" characters. + + -- Constant: int UC_BIDI_B + The bidirectional category for "Paragraph Separator" characters. + + -- Constant: int UC_BIDI_S + The bidirectional category for "Segment Separator" characters. + + -- Constant: int UC_BIDI_WS + The bidirectional category for "Whitespace" characters. + + -- Constant: int UC_BIDI_ON + The bidirectional category for "Other Neutral" characters. + + The following functions implement the association between a +bidirectional category and its name. + + -- Function: const char * uc_bidi_category_name (int CATEGORY) + Returns the name of a bidirectional category. + + -- Function: int uc_bidi_category_byname (const char *CATEGORY_NAME) + Returns the bidirectional category given by name, e.g. `"LRE"'. + + The following functions view bidirectional categories as sets of +Unicode characters. + + -- Function: int uc_bidi_category (ucs4_t UC) + Returns the bidirectional category of a Unicode character. + + -- Function: bool uc_is_bidi_category (ucs4_t UC, int CATEGORY) + Tests whether a Unicode character belongs to a given bidirectional + category. + + +File: libunistring.info, Node: Decimal digit value, Next: Digit value, Prev: Bidirectional category, Up: unictype.h + +8.4 Decimal digit value +======================= + + Decimal digits (like the digits from `0' to `9') exist in many +scripts. The following function converts a decimal digit character to +its numerical value. + + -- Function: int uc_decimal_value (ucs4_t UC) + Returns the decimal digit value of a Unicode character. The + return value is an integer in the range 0..9, or -1 for characters + that do not represent a decimal digit. + + +File: libunistring.info, Node: Digit value, Next: Numeric value, Prev: Decimal digit value, Up: unictype.h + +8.5 Digit value +=============== + + Digit characters are like decimal digit characters, possibly in +special forms, like as superscript, subscript, or circled. The +following function converts a digit character to its numerical value. + + -- Function: int uc_digit_value (ucs4_t UC) + Returns the digit value of a Unicode character. The return value + is an integer in the range 0..9, or -1 for characters that do not + represent a digit. + + +File: libunistring.info, Node: Numeric value, Next: Mirrored character, Prev: Digit value, Up: unictype.h + +8.6 Numeric value +================= + + There are also characters that represent numbers without a digit +system, like the Roman numerals, and fractional numbers, like 1/4 or +3/4. + + The following type represents the numeric value of a Unicode +character. + + -- Type: uc_fraction_t + This is a structure type with the following fields: + int numerator; + int denominator; + An integer N is represented by `numerator = N', `denominator = 1'. + + The following function converts a number character to its numerical +value. + + -- Function: uc_fraction_t uc_numeric_value (ucs4_t UC) + Returns the numeric value of a Unicode character. The return + value is a fraction, or the pseudo-fraction `{ 0, 0 }' for + characters that do not represent a number. + + +File: libunistring.info, Node: Mirrored character, Next: Properties, Prev: Numeric value, Up: unictype.h + +8.7 Mirrored character +====================== + + Character mirroring is used to associate the closing parenthesis +character to the opening parenthesis character, the closing brace +character with the opening brace character, and so on. + + The following function looks up the mirrored character of a Unicode +character. + + -- Function: bool uc_mirror_char (ucs4_t UC, ucs4_t *PUC) + Stores the mirrored character of a Unicode character UC in `*PUC' + and returns `true', if it exists. Otherwise it stores UC + unmodified in `*PUC' and returns `false'. + + +File: libunistring.info, Node: Properties, Next: Scripts, Prev: Mirrored character, Up: unictype.h + +8.8 Properties +============== + + This section defines boolean properties of Unicode characters. This +means, a character either has the given property or does not have it. +In other words, the property can be viewed as a subset of the set of +Unicode characters. + + The GNU libunistring library provides two kinds of API for working +with properties. The object oriented API uses a type `uc_property_t' +to designate a property. In the function-based API, which is a bit more +low level, a property is merely a function. + +* Menu: + +* Properties as objects:: +* Properties as functions:: + + +File: libunistring.info, Node: Properties as objects, Next: Properties as functions, Up: Properties + +8.8.1 Properties as objects - the object oriented API +----------------------------------------------------- + + The following type designates a property on Unicode characters. + + -- Type: uc_property_t + This data type denotes a boolean property on Unicode characters. + It is an immediate type that can be copied by simple assignment, + without involving memory allocation. It is not an array type. + + Many Unicode properties are predefined. + + The following are general properties. + + -- Constant: uc_property_t UC_PROPERTY_WHITE_SPACE + -- Constant: uc_property_t UC_PROPERTY_ALPHABETIC + -- Constant: uc_property_t UC_PROPERTY_OTHER_ALPHABETIC + -- Constant: uc_property_t UC_PROPERTY_NOT_A_CHARACTER + -- Constant: uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT + -- Constant: uc_property_t +UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT + -- Constant: uc_property_t UC_PROPERTY_DEPRECATED + -- Constant: uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION + -- Constant: uc_property_t UC_PROPERTY_VARIATION_SELECTOR + -- Constant: uc_property_t UC_PROPERTY_PRIVATE_USE + -- Constant: uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE + + The following properties are related to case folding. + + -- Constant: uc_property_t UC_PROPERTY_UPPERCASE + -- Constant: uc_property_t UC_PROPERTY_OTHER_UPPERCASE + -- Constant: uc_property_t UC_PROPERTY_LOWERCASE + -- Constant: uc_property_t UC_PROPERTY_OTHER_LOWERCASE + -- Constant: uc_property_t UC_PROPERTY_TITLECASE + -- Constant: uc_property_t UC_PROPERTY_SOFT_DOTTED + + The following properties are related to identifiers. + + -- Constant: uc_property_t UC_PROPERTY_ID_START + -- Constant: uc_property_t UC_PROPERTY_OTHER_ID_START + -- Constant: uc_property_t UC_PROPERTY_ID_CONTINUE + -- Constant: uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE + -- Constant: uc_property_t UC_PROPERTY_XID_START + -- Constant: uc_property_t UC_PROPERTY_XID_CONTINUE + -- Constant: uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE + -- Constant: uc_property_t UC_PROPERTY_PATTERN_SYNTAX + + The following properties have an influence on shaping and rendering. + + -- Constant: uc_property_t UC_PROPERTY_JOIN_CONTROL + -- Constant: uc_property_t UC_PROPERTY_GRAPHEME_BASE + -- Constant: uc_property_t UC_PROPERTY_GRAPHEME_EXTEND + -- Constant: uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND + -- Constant: uc_property_t UC_PROPERTY_GRAPHEME_LINK + + The following properties relate to bidirectional reordering. + + -- Constant: uc_property_t UC_PROPERTY_BIDI_CONTROL + -- Constant: uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT + -- Constant: uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT + -- Constant: uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT + -- Constant: uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT + -- Constant: uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT + -- Constant: uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_WHITESPACE + -- Constant: uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK + -- Constant: uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL + -- Constant: uc_property_t UC_PROPERTY_BIDI_PDF + -- Constant: uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE + -- Constant: uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL + + The following properties deal with number representations. + + -- Constant: uc_property_t UC_PROPERTY_HEX_DIGIT + -- Constant: uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT + + The following properties deal with CJK. + + -- Constant: uc_property_t UC_PROPERTY_IDEOGRAPHIC + -- Constant: uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH + -- Constant: uc_property_t UC_PROPERTY_RADICAL + -- Constant: uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR + -- Constant: uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR + + Other miscellaneous properties are: + + -- Constant: uc_property_t UC_PROPERTY_ZERO_WIDTH + -- Constant: uc_property_t UC_PROPERTY_SPACE + -- Constant: uc_property_t UC_PROPERTY_NON_BREAK + -- Constant: uc_property_t UC_PROPERTY_ISO_CONTROL + -- Constant: uc_property_t UC_PROPERTY_FORMAT_CONTROL + -- Constant: uc_property_t UC_PROPERTY_DASH + -- Constant: uc_property_t UC_PROPERTY_HYPHEN + -- Constant: uc_property_t UC_PROPERTY_PUNCTUATION + -- Constant: uc_property_t UC_PROPERTY_LINE_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_QUOTATION_MARK + -- Constant: uc_property_t UC_PROPERTY_SENTENCE_TERMINAL + -- Constant: uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION + -- Constant: uc_property_t UC_PROPERTY_CURRENCY_SYMBOL + -- Constant: uc_property_t UC_PROPERTY_MATH + -- Constant: uc_property_t UC_PROPERTY_OTHER_MATH + -- Constant: uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION + -- Constant: uc_property_t UC_PROPERTY_LEFT_OF_PAIR + -- Constant: uc_property_t UC_PROPERTY_COMBINING + -- Constant: uc_property_t UC_PROPERTY_COMPOSITE + -- Constant: uc_property_t UC_PROPERTY_DECIMAL_DIGIT + -- Constant: uc_property_t UC_PROPERTY_NUMERIC + -- Constant: uc_property_t UC_PROPERTY_DIACRITIC + -- Constant: uc_property_t UC_PROPERTY_EXTENDER + -- Constant: uc_property_t UC_PROPERTY_IGNORABLE_CONTROL + + The following function looks up a property by its name. + + -- Function: uc_property_t uc_property_byname (const char + *PROPERTY_NAME) + Returns the property given by name, e.g. `"White space"'. If a + property with the given name exists, the result will satisfy the + `uc_property_is_valid' predicate. Otherwise the result will not + satisfy this predicate and must not be passed to functions that + expect an `uc_property_t' argument. + + This function references a big table of all predefined properties. + Its use can significantly increase the size of your application. + + -- Function: bool uc_property_is_valid (uc_property_t property) + Returns `true' when the given property is valid, or `false' + otherwise. + + The following function views a property as a set of Unicode +characters. + + -- Function: bool uc_is_property (ucs4_t UC, uc_property_t PROPERTY) + Tests whether the Unicode character UC has the given property. + + +File: libunistring.info, Node: Properties as functions, Prev: Properties as objects, Up: Properties + +8.8.2 Properties as functions - the functional API +-------------------------------------------------- + + The following are general properties. + + -- Function: bool uc_is_property_white_space (ucs4_t UC) + -- Function: bool uc_is_property_alphabetic (ucs4_t UC) + -- Function: bool uc_is_property_other_alphabetic (ucs4_t UC) + -- Function: bool uc_is_property_not_a_character (ucs4_t UC) + -- Function: bool uc_is_property_default_ignorable_code_point (ucs4_t + UC) + -- Function: bool uc_is_property_other_default_ignorable_code_point + (ucs4_t UC) + -- Function: bool uc_is_property_deprecated (ucs4_t UC) + -- Function: bool uc_is_property_logical_order_exception (ucs4_t UC) + -- Function: bool uc_is_property_variation_selector (ucs4_t UC) + -- Function: bool uc_is_property_private_use (ucs4_t UC) + -- Function: bool uc_is_property_unassigned_code_value (ucs4_t UC) + + The following properties are related to case folding. + + -- Function: bool uc_is_property_uppercase (ucs4_t UC) + -- Function: bool uc_is_property_other_uppercase (ucs4_t UC) + -- Function: bool uc_is_property_lowercase (ucs4_t UC) + -- Function: bool uc_is_property_other_lowercase (ucs4_t UC) + -- Function: bool uc_is_property_titlecase (ucs4_t UC) + -- Function: bool uc_is_property_soft_dotted (ucs4_t UC) + + The following properties are related to identifiers. + + -- Function: bool uc_is_property_id_start (ucs4_t UC) + -- Function: bool uc_is_property_other_id_start (ucs4_t UC) + -- Function: bool uc_is_property_id_continue (ucs4_t UC) + -- Function: bool uc_is_property_other_id_continue (ucs4_t UC) + -- Function: bool uc_is_property_xid_start (ucs4_t UC) + -- Function: bool uc_is_property_xid_continue (ucs4_t UC) + -- Function: bool uc_is_property_pattern_white_space (ucs4_t UC) + -- Function: bool uc_is_property_pattern_syntax (ucs4_t UC) + + The following properties have an influence on shaping and rendering. + + -- Function: bool uc_is_property_join_control (ucs4_t UC) + -- Function: bool uc_is_property_grapheme_base (ucs4_t UC) + -- Function: bool uc_is_property_grapheme_extend (ucs4_t UC) + -- Function: bool uc_is_property_other_grapheme_extend (ucs4_t UC) + -- Function: bool uc_is_property_grapheme_link (ucs4_t UC) + + The following properties relate to bidirectional reordering. + + -- Function: bool uc_is_property_bidi_control (ucs4_t UC) + -- Function: bool uc_is_property_bidi_left_to_right (ucs4_t UC) + -- Function: bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t UC) + -- Function: bool uc_is_property_bidi_arabic_right_to_left (ucs4_t UC) + -- Function: bool uc_is_property_bidi_european_digit (ucs4_t UC) + -- Function: bool uc_is_property_bidi_eur_num_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_eur_num_terminator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_arabic_digit (ucs4_t UC) + -- Function: bool uc_is_property_bidi_common_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_block_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_segment_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_whitespace (ucs4_t UC) + -- Function: bool uc_is_property_bidi_non_spacing_mark (ucs4_t UC) + -- Function: bool uc_is_property_bidi_boundary_neutral (ucs4_t UC) + -- Function: bool uc_is_property_bidi_pdf (ucs4_t UC) + -- Function: bool uc_is_property_bidi_embedding_or_override (ucs4_t UC) + -- Function: bool uc_is_property_bidi_other_neutral (ucs4_t UC) + + The following properties deal with number representations. + + -- Function: bool uc_is_property_hex_digit (ucs4_t UC) + -- Function: bool uc_is_property_ascii_hex_digit (ucs4_t UC) + + The following properties deal with CJK. + + -- Function: bool uc_is_property_ideographic (ucs4_t UC) + -- Function: bool uc_is_property_unified_ideograph (ucs4_t UC) + -- Function: bool uc_is_property_radical (ucs4_t UC) + -- Function: bool uc_is_property_ids_binary_operator (ucs4_t UC) + -- Function: bool uc_is_property_ids_trinary_operator (ucs4_t UC) + + Other miscellaneous properties are: + + -- Function: bool uc_is_property_zero_width (ucs4_t UC) + -- Function: bool uc_is_property_space (ucs4_t UC) + -- Function: bool uc_is_property_non_break (ucs4_t UC) + -- Function: bool uc_is_property_iso_control (ucs4_t UC) + -- Function: bool uc_is_property_format_control (ucs4_t UC) + -- Function: bool uc_is_property_dash (ucs4_t UC) + -- Function: bool uc_is_property_hyphen (ucs4_t UC) + -- Function: bool uc_is_property_punctuation (ucs4_t UC) + -- Function: bool uc_is_property_line_separator (ucs4_t UC) + -- Function: bool uc_is_property_paragraph_separator (ucs4_t UC) + -- Function: bool uc_is_property_quotation_mark (ucs4_t UC) + -- Function: bool uc_is_property_sentence_terminal (ucs4_t UC) + -- Function: bool uc_is_property_terminal_punctuation (ucs4_t UC) + -- Function: bool uc_is_property_currency_symbol (ucs4_t UC) + -- Function: bool uc_is_property_math (ucs4_t UC) + -- Function: bool uc_is_property_other_math (ucs4_t UC) + -- Function: bool uc_is_property_paired_punctuation (ucs4_t UC) + -- Function: bool uc_is_property_left_of_pair (ucs4_t UC) + -- Function: bool uc_is_property_combining (ucs4_t UC) + -- Function: bool uc_is_property_composite (ucs4_t UC) + -- Function: bool uc_is_property_decimal_digit (ucs4_t UC) + -- Function: bool uc_is_property_numeric (ucs4_t UC) + -- Function: bool uc_is_property_diacritic (ucs4_t UC) + -- Function: bool uc_is_property_extender (ucs4_t UC) + -- Function: bool uc_is_property_ignorable_control (ucs4_t UC) + + +File: libunistring.info, Node: Scripts, Next: Blocks, Prev: Properties, Up: unictype.h + +8.9 Scripts +=========== + + The Unicode characters are subdivided into scripts. + + The following type is used to represent a script: + + -- Type: uc_script_t + This data type is a structure type that refers to statically + allocated read-only data. It contains the following fields: + const char *name; + + The `name' field contains the name of the script. + + The following functions look up a script. + + -- Function: const uc_script_t * uc_script (ucs4_t UC) + Returns the script of a Unicode character. Returns NULL if UC + does not belong to any script. + + -- Function: const uc_script_t * uc_script_byname (const char + *SCRIPT_NAME) + Returns the script given by its name, e.g. `"HAN"'. Returns NULL + if a script with the given name does not exist. + + The following function views a script as a set of Unicode characters. + + -- Function: bool uc_is_script (ucs4_t UC, const uc_script_t *SCRIPT) + Tests whether a Unicode character belongs to a given script. + + The following gives a global picture of all scripts. + + -- Function: void uc_all_scripts (const uc_script_t **SCRIPTS, size_t + *COUNT) + Get the list of all scripts. Stores a pointer to an array of all + scripts in `*SCRIPTS' and the length of this array in `*COUNT'. + + +File: libunistring.info, Node: Blocks, Next: ISO C and Java syntax, Prev: Scripts, Up: unictype.h + +8.10 Blocks +=========== + + The Unicode characters are subdivided into blocks. A block is an +interval of Unicode code points. + + The following type is used to represent a block. + + -- Type: uc_block_t + This data type is a structure type that refers to statically + allocated data. It contains the following fields: + ucs4_t start; + ucs4_t end; + const char *name; + + The `start' field is the first Unicode code point in the block. + + The `end' field is the last Unicode code point in the block. + + The `name' field is the name of the block. + + The following function looks up a block. + + -- Function: const uc_block_t * uc_block (ucs4_t UC) + Returns the block a character belongs to. + + The following function views a block as a set of Unicode characters. + + -- Function: bool uc_is_block (ucs4_t UC, const uc_block_t *BLOCK) + Tests whether a Unicode character belongs to a given block. + + The following gives a global picture of all block. + + -- Function: void uc_all_blocks (const uc_block_t **BLOCKS, size_t + *COUNT) + Get the list of all blocks. Stores a pointer to an array of all + blocks in `*BLOCKS' and the length of this array in `*COUNT'. + + +File: libunistring.info, Node: ISO C and Java syntax, Next: Classifications like in ISO C, Prev: Blocks, Up: unictype.h + +8.11 ISO C and Java syntax +========================== + + The following properties are taken from language standards. The +supported language standards are ISO C 99 and Java. + + -- Function: bool uc_is_c_whitespace (ucs4_t UC) + Tests whether a Unicode character is considered whitespace in ISO + C 99. + + -- Function: bool uc_is_java_whitespace (ucs4_t UC) + Tests whether a Unicode character is considered whitespace in Java. + + The following enumerated values are the possible return values of +the functions `uc_c_ident_category' and `uc_java_ident_category'. + + -- Constant: int UC_IDENTIFIER_START + This return value means that the given character is valid as first + or subsequent character in an identifier. + + -- Constant: int UC_IDENTIFIER_VALID + This return value means that the given character is valid as + subsequent character only. + + -- Constant: int UC_IDENTIFIER_INVALID + This return value means that the given character is not valid in + an identifier. + + -- Constant: int UC_IDENTIFIER_IGNORABLE + This return value (only for Java) means that the given character + is ignorable. + + The following function determine whether a given character can be a +constituent of an identifier in the given programming language. + + -- Function: int uc_c_ident_category (ucs4_t UC) + Returns the categorization of a Unicode character with respect to + the ISO C 99 identifier syntax. + + -- Function: int uc_java_ident_category (ucs4_t UC) + Returns the categorization of a Unicode character with respect to + the Java identifier syntax. + + +File: libunistring.info, Node: Classifications like in ISO C, Prev: ISO C and Java syntax, Up: unictype.h + +8.12 Classifications like in ISO C +================================== + + The following character classifications mimic those declared in the +ISO C header files `<ctype.h>' and `<wctype.h>'. These functions are +deprecated, because this set of functions was designed with ASCII in +mind and cannot reflect the more diverse reality of the Unicode +character set. But they can be a quick-and-dirty porting aid when +migrating from `wchar_t' APIs to Unicode strings. + + -- Function: bool uc_is_alnum (ucs4_t UC) + Tests for any character for which `uc_is_alpha' or `uc_is_digit' is + true. + + -- Function: bool uc_is_alpha (ucs4_t UC) + Tests for any character for which `uc_is_upper' or `uc_is_lower' is + true, or any character that is one of a locale-specific set of + characters for which none of `uc_is_cntrl', `uc_is_digit', + `uc_is_punct', or `uc_is_space' is true. + + -- Function: bool uc_is_cntrl (ucs4_t UC) + Tests for any control character. + + -- Function: bool uc_is_digit (ucs4_t UC) + Tests for any character that corresponds to a decimal-digit + character. + + -- Function: bool uc_is_graph (ucs4_t UC) + Tests for any character for which `uc_is_print' is true and + `uc_is_space' is false. + + -- Function: bool uc_is_lower (ucs4_t UC) + Tests for any character that corresponds to a lowercase letter or + is one of a locale-specific set of characters for which none of + `uc_is_cntrl', `uc_is_digit', `uc_is_punct', or `uc_is_space' is + true. + + -- Function: bool uc_is_print (ucs4_t UC) + Tests for any printing character. + + -- Function: bool uc_is_punct (ucs4_t UC) + Tests for any printing character that is one of a locale-specific + set of characters for which neither `uc_is_space' nor + `uc_is_alnum' is true. + + -- Function: bool uc_is_space (ucs4_t UC) + Test for any character that corresponds to a locale-specific set + of characters for which none of `uc_is_alnum', `uc_is_graph', or + `uc_is_punct' is true. + + -- Function: bool uc_is_upper (ucs4_t UC) + Tests for any character that corresponds to an uppercase letter or + is one of a locale-specific set of characters for which none of + `uc_is_cntrl', `uc_is_digit', `uc_is_punct', or `uc_is_space' is + true. + + -- Function: bool uc_is_xdigit (ucs4_t UC) + Tests for any character that corresponds to a hexadecimal-digit + character. + + -- Function: bool uc_is_blank (ucs4_t UC) + Tests for any character that corresponds to a standard blank + character or a locale-specific set of characters for which + `uc_is_alnum' is false. + + +File: libunistring.info, Node: uniwidth.h, Next: uniwbrk.h, Prev: unictype.h, Up: Top + +9 Display width `<uniwidth.h>' +****************************** + + This include file declares functions that return the display width, +measured in columns, of characters or strings, when output to a device +that uses non-proportional fonts. + + Note that for some rarely used characters the actual fonts or +terminal emulators can use a different width. There is no mechanism +for communicating the display width of characters across a Unix +pseudo-terminal (tty). Also, there are scripts with complex rendering, +like the Indic scripts. For these scripts, there is no such concept as +non-proportional fonts. Therefore the results of these functions +usually work fine on most scripts and on most characters but can fail +to represent the actual display width. + + These functions are locale dependent. The ENCODING argument +identifies the encoding (e.g. `"ISO-8859-2"' for Polish). + + -- Function: int uc_width (ucs4_t UC, const char *ENCODING) + Determines and returns the number of column positions required for + UC. Returns -1 if UC is a control character that has an influence + on the column position when output. + + -- Function: int u8_width (const uint8_t *S, size_t N, const char + *ENCODING) + -- Function: int u16_width (const uint16_t *S, size_t N, const char + *ENCODING) + -- Function: int u32_width (const uint32_t *S, size_t N, const char + *ENCODING) + Determines and returns the number of column positions required for + first N units (or fewer if S ends before this) in S. This + function ignores control characters in the string. + + -- Function: int u8_strwidth (const uint8_t *S, const char *ENCODING) + -- Function: int u16_strwidth (const uint16_t *S, const char *ENCODING) + -- Function: int u32_strwidth (const uint32_t *S, const char *ENCODING) + Determines and returns the number of column positions required for + S. This function ignores control characters in the string. + + +File: libunistring.info, Node: uniwbrk.h, Next: unilbrk.h, Prev: uniwidth.h, Up: Top + +10 Word breaks in strings `<uniwbrk.h>' +*************************************** + + This include file declares functions for determining where in a +string "words" start and end. Here "words" are not necessarily the +same as entities that can be looked up in dictionaries, but rather +groups of consecutive characters that should not be split by text +processing operations. + +* Menu: + +* Word breaks in a string:: +* Word break property:: + + +File: libunistring.info, Node: Word breaks in a string, Next: Word break property, Up: uniwbrk.h + +10.1 Word breaks in a string +============================ + + The following functions determine the word breaks in a string. + + -- Function: void u8_wordbreaks (const uint8_t *S, size_t N, char *P) + -- Function: void u16_wordbreaks (const uint16_t *S, size_t N, char *P) + -- Function: void u32_wordbreaks (const uint32_t *S, size_t N, char *P) + -- Function: void ulc_wordbreaks (const char *S, size_t N, char *P) + Determines the word break points in S, an array of N units, and + stores the result at `P[0..N-1]'. + `P[i] = 1' + means that there is a word boundary between `S[i-1]' and + `S[i]'. + + `P[i] = 0' + means that `S[i-1]' and `S[i]' must not be separated. + `P[0]' is always set to 0. If an application wants to consider a + word break to be present at the beginning of the string (before + `S[0]') or at the end of the string (after `S[0..N-1]'), it has to + treat these cases explicitly. + + +File: libunistring.info, Node: Word break property, Prev: Word breaks in a string, Up: uniwbrk.h + +10.2 Word break property +======================== + + This is a more low-level API. The word break property is a property +defined in Unicode Standard Annex #29, section "Word Boundaries", see +`http://www.unicode.org/reports/tr29/#Word_Boundaries'. It is used for +determining the word breaks in a string. + + The following are the possible values of the word break property. +More values may be added in the future. + + -- Constant: int WBP_OTHER + -- Constant: int WBP_CR + -- Constant: int WBP_LF + -- Constant: int WBP_NEWLINE + -- Constant: int WBP_EXTEND + -- Constant: int WBP_FORMAT + -- Constant: int WBP_KATAKANA + -- Constant: int WBP_ALETTER + -- Constant: int WBP_MIDNUMLET + -- Constant: int WBP_MIDLETTER + -- Constant: int WBP_MIDNUM + -- Constant: int WBP_NUMERIC + -- Constant: int WBP_EXTENDNUMLET + + The following function looks up the word break property of a +character. + + -- Function: int uc_wordbreak_property (ucs4_t UC) + Returns the Word_Break property of a Unicode character. + + +File: libunistring.info, Node: unilbrk.h, Next: uninorm.h, Prev: uniwbrk.h, Up: Top + +11 Line breaking `<unilbrk.h>' +****************************** + + This include file declares functions for determining where in a +string line breaks could or should be introduced, in order to make the +displayed string fit into a column of given width. + + These functions are locale dependent. The ENCODING argument +identifies the encoding (e.g. `"ISO-8859-2"' for Polish). + + The following enumerated values indicate whether, at a given +position, a line break is possible or not. Given an string S as an +array `S[0..N-1]' and a position I, the values have the following +meanings: + + -- Constant: int UC_BREAK_MANDATORY + This value indicates that `S[I]' is a line break character. + + -- Constant: int UC_BREAK_POSSIBLE + This value indicates that a line break may be inserted between + `S[I-1]' and `S[I]'. + + -- Constant: int UC_BREAK_HYPHENATION + This value indicates that a hyphen and a line break may be + inserted between `S[I-1]' and `S[I]'. But beware of language + dependent hyphenation rules. + + -- Constant: int UC_BREAK_PROHIBITED + This value indicates that `S[I-1]' and `S[I]' must not be + separated. + + -- Constant: int UC_BREAK_UNDEFINED + This value is not used as a return value; rather, in the + overriding argument of the `u*_width_linebreaks' functions, it + indicates the absence of an override. + + The following functions determine the positions at which line breaks +are possible. + + -- Function: void u8_possible_linebreaks (const uint8_t *S, size_t N, + const char *ENCODING, char *P) + -- Function: void u16_possible_linebreaks (const uint16_t *S, size_t + N, const char *ENCODING, char *P) + -- Function: void u32_possible_linebreaks (const uint32_t *S, size_t + N, const char *ENCODING, char *P) + -- Function: void ulc_possible_linebreaks (const char *S, size_t N, + const char *ENCODING, char *P) + Determines the line break points in S, and stores the result at + `P[0..N-1]'. Every `P[I]' is assigned one of the values + `UC_BREAK_MANDATORY', `UC_BREAK_POSSIBLE', `UC_BREAK_HYPHENATION', + `UC_BREAK_PROHIBITED'. + + The following functions determine where line breaks should be +inserted so that each line fits in a given width, when output to a +device that uses non-proportional fonts. + + -- Function: int u8_width_linebreaks (const uint8_t *S, size_t N, int + WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + -- Function: int u16_width_linebreaks (const uint16_t *S, size_t N, + int WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + -- Function: int u32_width_linebreaks (const uint32_t *S, size_t N, + int WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + -- Function: int ulc_width_linebreaks (const char *S, size_t N, int + WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + Chooses the best line breaks, assuming that every character + occupies a width given by the `uc_width' function (see *note + uniwidth.h::). + + The string is `S[0..N-1]'. + + The maximum number of columns per line is given as WIDTH. The + starting column of the string is given as START_COLUMN. If the + algorithm shall keep room after the last piece, this amount of + room can be given as AT_END_COLUMNS. + + OVERRIDE is an optional override; if `OVERRIDE[I] != + UC_BREAK_UNDEFINED', `OVERRIDE[I]' takes precedence over `P[I]' as + returned by the `u*_possible_linebreaks' function. + + The given ENCODING is used for disambiguating widths in `uc_width'. + + Returns the column after the end of the string, and stores the + result at `P[0..N-1]'. Every `P[I]' is assigned one of the values + `UC_BREAK_MANDATORY', `UC_BREAK_POSSIBLE', `UC_BREAK_HYPHENATION', + `UC_BREAK_PROHIBITED'. Here the value `UC_BREAK_POSSIBLE' + indicates that a line break _should_ be inserted. + + +File: libunistring.info, Node: uninorm.h, Next: unicase.h, Prev: unilbrk.h, Up: Top + +12 Normalization forms (composition and decomposition) `<uninorm.h>' +******************************************************************** + + This include file defines functions for transforming Unicode strings +to one of the four normal forms, known as NFC, NFD, NKFC, NFKD. These +transformations involve decomposition and -- for NFC and NFKC -- +composition of Unicode characters. + +* Menu: + +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: + + +File: libunistring.info, Node: Decomposition of characters, Next: Composition of characters, Up: uninorm.h + +12.1 Decomposition of Unicode characters +======================================== + + The following enumerated values are the possible types of +decomposition of a Unicode character. + + -- Constant: int UC_DECOMP_CANONICAL + Denotes canonical decomposition. + + -- Constant: int UC_DECOMP_FONT + UCD marker: `<font>'. Denotes a font variant (e.g. a blackletter + form). + + -- Constant: int UC_DECOMP_NOBREAK + UCD marker: `<noBreak>'. Denotes a no-break version of a space or + hyphen. + + -- Constant: int UC_DECOMP_INITIAL + UCD marker: `<initial>'. Denotes an initial presentation form + (Arabic). + + -- Constant: int UC_DECOMP_MEDIAL + UCD marker: `<medial>'. Denotes a medial presentation form + (Arabic). + + -- Constant: int UC_DECOMP_FINAL + UCD marker: `<final>'. Denotes a final presentation form (Arabic). + + -- Constant: int UC_DECOMP_ISOLATED + UCD marker: `<isolated>'. Denotes an isolated presentation form + (Arabic). + + -- Constant: int UC_DECOMP_CIRCLE + UCD marker: `<circle>'. Denotes an encircled form. + + -- Constant: int UC_DECOMP_SUPER + UCD marker: `<super>'. Denotes a superscript form. + + -- Constant: int UC_DECOMP_SUB + UCD marker: `<sub>'. Denotes a subscript form. + + -- Constant: int UC_DECOMP_VERTICAL + UCD marker: `<vertical>'. Denotes a vertical layout presentation + form. + + -- Constant: int UC_DECOMP_WIDE + UCD marker: `<wide>'. Denotes a wide (or zenkaku) compatibility + character. + + -- Constant: int UC_DECOMP_NARROW + UCD marker: `<narrow>'. Denotes a narrow (or hankaku) + compatibility character. + + -- Constant: int UC_DECOMP_SMALL + UCD marker: `<small>'. Denotes a small variant form (CNS + compatibility). + + -- Constant: int UC_DECOMP_SQUARE + UCD marker: `<square>'. Denotes a CJK squared font variant. + + -- Constant: int UC_DECOMP_FRACTION + UCD marker: `<fraction>'. Denotes a vulgar fraction form. + + -- Constant: int UC_DECOMP_COMPAT + UCD marker: `<compat>'. Denotes an otherwise unspecified + compatibility character. + + The following constant denotes the maximum size of decomposition of +a single Unicode character. + + -- Macro: unsigned int UC_DECOMPOSITION_MAX_LENGTH + This macro expands to a constant that is the required size of + buffer passed to the `uc_decomposition' and + `uc_canonical_decomposition' functions. + + The following functions decompose a Unicode character. + + -- Function: int uc_decomposition (ucs4_t UC, int *DECOMP_TAG, ucs4_t + *DECOMPOSITION) + Returns the character decomposition mapping of the Unicode + character UC. DECOMPOSITION must point to an array of at least + `UC_DECOMPOSITION_MAX_LENGTH' `ucs_t' elements. + + When a decomposition exists, `DECOMPOSITION[0..N-1]' and + `*DECOMP_TAG' are filled and N is returned. Otherwise -1 is + returned. + + -- Function: int uc_canonical_decomposition (ucs4_t UC, ucs4_t + *DECOMPOSITION) + Returns the canonical character decomposition mapping of the + Unicode character UC. DECOMPOSITION must point to an array of at + least `UC_DECOMPOSITION_MAX_LENGTH' `ucs_t' elements. + + When a decomposition exists, `DECOMPOSITION[0..N-1]' is filled and + N is returned. Otherwise -1 is returned. + + +File: libunistring.info, Node: Composition of characters, Next: Normalization of strings, Prev: Decomposition of characters, Up: uninorm.h + +12.2 Composition of Unicode characters +====================================== + + The following function composes a Unicode character from two Unicode +characters. + + -- Function: ucs4_t uc_composition (ucs4_t UC1, ucs4_t UC2) + Attempts to combine the Unicode characters UC1, UC2. UC1 is known + to have canonical combining class 0. + + Returns the combination of UC1 and UC2, if it exists. Returns 0 + otherwise. + + Not all decompositions can be recombined using this function. See + the Unicode file `CompositionExclusions.txt' for details. + + +File: libunistring.info, Node: Normalization of strings, Next: Normalizing comparisons, Prev: Composition of characters, Up: uninorm.h + +12.3 Normalization of strings +============================= + + The Unicode standard defines four normalization forms for Unicode +strings. The following type is used to denote a normalization form. + + -- Type: uninorm_t + An object of type `uninorm_t' denotes a Unicode normalization form. + This is a scalar type; its values can be compared with `=='. + + The following constants denote the four normalization forms. + + -- Macro: uninorm_t UNINORM_NFD + Denotes Normalization form D: canonical decomposition. + + -- Macro: uninorm_t UNINORM_NFC + Normalization form C: canonical decomposition, then canonical + composition. + + -- Macro: uninorm_t UNINORM_NFKD + Normalization form KD: compatibility decomposition. + + -- Macro: uninorm_t UNINORM_NFKC + Normalization form KC: compatibility decomposition, then canonical + composition. + + The following functions operate on `uninorm_t' objects. + + -- Function: bool uninorm_is_compat_decomposing (uninorm_t NF) + Tests whether the normalization form NF does compatibility + decomposition. + + -- Function: bool uninorm_is_composing (uninorm_t NF) + Tests whether the normalization form NF includes canonical + composition. + + -- Function: uninorm_t uninorm_decomposing_form (uninorm_t NF) + Returns the decomposing variant of the normalization form NF. + This maps NFC,NFD -> NFD and NFKC,NFKD -> NFKD. + + The following functions apply a Unicode normalization form to a +Unicode string. + + -- Function: uint8_t * u8_normalize (uninorm_t NF, const uint8_t *S, + size_t N, uint8_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint16_t * u16_normalize (uninorm_t NF, const uint16_t + *S, size_t N, uint16_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_normalize (uninorm_t NF, const uint32_t + *S, size_t N, uint32_t *RESULTBUF, size_t *LENGTHP) + Returns the specified normalization form of a string. + + +File: libunistring.info, Node: Normalizing comparisons, Next: Normalization of streams, Prev: Normalization of strings, Up: uninorm.h + +12.4 Normalizing comparisons +============================ + + The following functions compare Unicode string, ignoring differences +in normalization. + + -- Function: int u8_normcmp (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u16_normcmp (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u32_normcmp (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + Compares S1 and S2, ignoring differences in normalization. + + NF must be either `UNINORM_NFD' or `UNINORM_NFKD'. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + -- Function: char * u8_normxfrm (const uint8_t *S, size_t N, uninorm_t + NF, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u16_normxfrm (const uint16_t *S, size_t N, + uninorm_t NF, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u32_normxfrm (const uint32_t *S, size_t N, + uninorm_t NF, char *RESULTBUF, size_t *LENGTHP) + Converts the string S of length N to a NUL-terminated byte + sequence, in such a way that comparing `u8_normxfrm (S1)' and + `u8_normxfrm (S2)' with the `u8_cmp2' function is equivalent to + comparing S1 and S2 with the `u8_normcoll' function. + + NF must be either `UNINORM_NFC' or `UNINORM_NFKC'. + + -- Function: int u8_normcoll (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u16_normcoll (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u32_normcoll (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + Compares S1 and S2, ignoring differences in normalization, using + the collation rules of the current locale. + + NF must be either `UNINORM_NFC' or `UNINORM_NFKC'. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + +File: libunistring.info, Node: Normalization of streams, Prev: Normalizing comparisons, Up: uninorm.h + +12.5 Normalization of streams of Unicode characters +=================================================== + + A "stream of Unicode characters" is essentially a function that +accepts an `ucs4_t' argument repeatedly, optionally combined with a +function that "flushes" the stream. + + -- Type: struct uninorm_filter + This is the data type of a stream of Unicode characters that + normalizes its input according to a given normalization form and + passes the normalized character sequence to the encapsulated + stream of Unicode characters. + + -- Function: struct uninorm_filter * uninorm_filter_create (uninorm_t + NF, int (*STREAM_FUNC) (void *STREAM_DATA, ucs4_t UC), void + *STREAM_DATA) + Creates and returns a normalization filter for Unicode characters. + + The pair (STREAM_FUNC, STREAM_DATA) is the encapsulated stream. + `STREAM_FUNC (STREAM_DATA, UC)' receives the Unicode character UC + and returns 0 if successful, or -1 with `errno' set upon failure. + + Returns the new filter, or NULL with `errno' set upon failure. + + -- Function: int uninorm_filter_write (struct uninorm_filter *FILTER, + ucs4_t UC) + Stuffs a Unicode character into a normalizing filter. Returns 0 + if successful, or -1 with `errno' set upon failure. + + -- Function: int uninorm_filter_flush (struct uninorm_filter *FILTER) + Brings data buffered in the filter to its destination, the + encapsulated stream. + + Returns 0 if successful, or -1 with `errno' set upon failure. + + Note! If after calling this function, additional characters are + written into the filter, the resulting character sequence in the + encapsulated stream will not necessarily be normalized. + + -- Function: int uninorm_filter_free (struct uninorm_filter *FILTER) + Brings data buffered in the filter to its destination, the + encapsulated stream, then closes and frees the filter. + + Returns 0 if successful, or -1 with `errno' set upon failure. + + +File: libunistring.info, Node: unicase.h, Next: uniregex.h, Prev: uninorm.h, Up: Top + +13 Case mappings `<unicase.h>' +****************************** + + This include file defines functions for case mapping for Unicode +strings and case insensitive comparison of Unicode strings and C +strings. + + These string functions fix the problems that were mentioned in *note +char * strings::, namely, they handle the Croatian LETTER DZ WITH +CARON, the German LATIN SMALL LETTER SHARP S, the Greek sigma and the +Lithuanian i correctly. + +* Menu: + +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: + + +File: libunistring.info, Node: Case mappings of characters, Next: Case mappings of strings, Up: unicase.h + +13.1 Case mappings of characters +================================ + + The following functions implement case mappings on Unicode +characters -- for those cases only where the result of the mapping is a +again a single Unicode character. + + These mappings are locale and context independent. + + *WARNING!* These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that +treat an entire string at once and are language aware. + + -- Function: ucs4_t uc_toupper (ucs4_t UC) + Returns the uppercase mapping of the Unicode character UC. + + -- Function: ucs4_t uc_tolower (ucs4_t UC) + Returns the lowercase mapping of the Unicode character UC. + + -- Function: ucs4_t uc_totitle (ucs4_t UC) + Returns the titlecase mapping of the Unicode character UC. + + The titlecase mapping of a character is to be used when the + character should look like upper case and the following characters + are lower cased. + + For most characters, this is the same as the uppercase mapping. + There are only few characters where the title case variant and the + uuper case variant are different. These characters occur in the + Latin writing of the Croatian, Bosnian, and Serbian languages. + + Lower case Title case Upper case + ------------------------------------------------------------------ + LATIN SMALL LETTER LJ LATIN CAPITAL LETTER LATIN CAPITAL LETTER + L WITH SMALL LETTER J LJ + LATIN SMALL LETTER NJ LATIN CAPITAL LETTER LATIN CAPITAL LETTER + N WITH SMALL LETTER J NJ + LATIN SMALL LETTER DZ LATIN CAPITAL LETTER LATIN CAPITAL LETTER + D WITH SMALL LETTER Z DZ + LATIN SMALL LETTER LATIN CAPITAL LETTER LATIN CAPITAL LETTER + DZ WITH CARON D WITH SMALL LETTER DZ WITH CARON + Z WITH CARON + + +File: libunistring.info, Node: Case mappings of strings, Next: Case mappings of substrings, Prev: Case mappings of characters, Up: unicase.h + +13.2 Case mappings of strings +============================= + + Case mapping should always be performed on entire strings, not on +individual characters. The functions in this sections do so. + + These functions allow to apply a normalization after the case +mapping. The reason is that if you want to treat `ä' and `Ä' the +same, you most often also want to treat the composed and decomposed +forms of such a character, U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +and U+0041 LATIN CAPITAL LETTER A U+0308 COMBINING DIAERESIS the same. +The NF argument designates the normalization. + + These functions are locale dependent. The ISO639_LANGUAGE argument +identifies the language (e.g. `"tr"' for Turkish). NULL means to use +locale independent case mappings. + + -- Function: const char * uc_locale_language () + Returns the ISO 639 language code of the current locale. Returns + `""' if it is unknown, or in the "C" locale. + + -- Function: uint8_t * u8_toupper (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_toupper (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_toupper (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the uppercase mapping of a string. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + -- Function: uint8_t * u8_tolower (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_tolower (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_tolower (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the lowercase mapping of a string. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + -- Function: uint8_t * u8_totitle (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_totitle (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_totitle (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the titlecase mapping of a string. + + Mapping to title case means that, in each word, the first cased + character is being mapped to title case and the remaining + characters of the word are being mapped to lower case. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + +File: libunistring.info, Node: Case mappings of substrings, Next: Case insensitive comparison, Prev: Case mappings of strings, Up: unicase.h + +13.3 Case mappings of substrings +================================ + + Case mapping of a substring cannot simply be performed by extracting +the substring and then applying the case mapping function to it. This +does not work because case mapping requires some information about the +surrounding characters. The following functions allow to apply case +mappings to substrings of a given string, while taking into account the +characters that precede it (the "prefix") and the characters that +follow it (the "suffix"). + + -- Type: casing_prefix_context_t + This data type denotes the case-mapping context that is given by a + prefix string. It is an immediate type that can be copied by + simple assignment, without involving memory allocation. It is not + an array type. + + -- Constant: casing_prefix_context_t unicase_empty_prefix_context + This constant is the case-mapping context that corresponds to an + empty prefix string. + + The following functions return `casing_prefix_context_t' objects: + + -- Function: casing_prefix_context_t u8_casing_prefix_context (const + uint8_t *S, size_t N) + -- Function: casing_prefix_context_t u16_casing_prefix_context (const + uint16_t *S, size_t N) + -- Function: casing_prefix_context_t u32_casing_prefix_context (const + uint32_t *S, size_t N) + Returns the case-mapping context of a given prefix string. + + -- Function: casing_prefix_context_t u8_casing_prefixes_context (const + uint8_t *S, size_t N, casing_prefix_context_t A_CONTEXT) + -- Function: casing_prefix_context_t u16_casing_prefixes_context + (const uint16_t *S, size_t N, casing_prefix_context_t + A_CONTEXT) + -- Function: casing_prefix_context_t u32_casing_prefixes_context + (const uint32_t *S, size_t N, casing_prefix_context_t + A_CONTEXT) + Returns the case-mapping context of the prefix concat(A, S), given + the case-mapping context of the prefix A. + + -- Type: casing_suffix_context_t + This data type denotes the case-mapping context that is given by a + suffix string. It is an immediate type that can be copied by + simple assignment, without involving memory allocation. It is not + an array type. + + -- Constant: casing_suffix_context_t unicase_empty_suffix_context + This constant is the case-mapping context that corresponds to an + empty suffix string. + + The following functions return `casing_suffix_context_t' objects: + + -- Function: casing_suffix_context_t u8_casing_suffix_context (const + uint8_t *S, size_t N) + -- Function: casing_suffix_context_t u16_casing_suffix_context (const + uint16_t *S, size_t N) + -- Function: casing_suffix_context_t u32_casing_suffix_context (const + uint32_t *S, size_t N) + Returns the case-mapping context of a given suffix string. + + -- Function: casing_suffix_context_t u8_casing_suffixes_context (const + uint8_t *S, size_t N, casing_suffix_context_t A_CONTEXT) + -- Function: casing_suffix_context_t u16_casing_suffixes_context + (const uint16_t *S, size_t N, casing_suffix_context_t + A_CONTEXT) + -- Function: casing_suffix_context_t u32_casing_suffixes_context + (const uint32_t *S, size_t N, casing_suffix_context_t + A_CONTEXT) + Returns the case-mapping context of the suffix concat(S, A), given + the case-mapping context of the suffix A. + + The following functions perform a case mapping, considering the +prefix context and the suffix context. + + -- Function: uint8_t * u8_ct_toupper (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_toupper (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_toupper (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the uppercase mapping of a string that is surrounded by a + prefix and a suffix. + + -- Function: uint8_t * u8_ct_tolower (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_tolower (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_tolower (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the lowercase mapping of a string that is surrounded by a + prefix and a suffix. + + -- Function: uint8_t * u8_ct_totitle (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_totitle (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_totitle (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the titlecase mapping of a string that is surrounded by a + prefix and a suffix. + + For example, to uppercase the UTF-8 substring between `s + +start_index' and `s + end_index' of a string that extends from `s' to +`s + u8_strlen (s)', you can use the statements + + size_t result_length; + uint8_t result = + u8_ct_toupper (s + start_index, end_index - start_index, + u8_casing_prefix_context (s, start_index), + u8_casing_suffix_context (s + end_index, + u8_strlen (s) - end_index), + iso639_language, NULL, NULL, &result_length); + + +File: libunistring.info, Node: Case insensitive comparison, Next: Case detection, Prev: Case mappings of substrings, Up: unicase.h + +13.4 Case insensitive comparison +================================ + + The following functions implement comparison that ignores +differences in case and normalization. + + -- Function: uint8_t * u8_casefold (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_casefold (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_casefold (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the case folded string. + + Comparing `u8_casefold (S1)' and `u8_casefold (S2)' with the + `u8_cmp2' function is equivalent to comparing S1 and S2 with + `u8_casecmp'. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + -- Function: uint8_t * u8_ct_casefold (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_casefold (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_casefold (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the case folded string. The case folding takes into + account the case mapping contexts of the prefix and suffix strings. + + -- Function: int u8_casecmp (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u16_casecmp (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u32_casecmp (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int ulc_casecmp (const char *S1, size_t N1, const char + *S2, size_t N2, const char *ISO639_LANGUAGE, uninorm_t NF, + int *RESULTP) + Compares S1 and S2, ignoring differences in case and normalization. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + The following functions additionally take into account the sorting +rules of the current locale. + + -- Function: char * u8_casexfrm (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + -- Function: char * u16_casexfrm (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + -- Function: char * u32_casexfrm (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + -- Function: char * ulc_casexfrm (const char *S, size_t N, const char + *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + Converts the string S of length N to a NUL-terminated byte + sequence, in such a way that comparing `u8_casexfrm (S1)' and + `u8_casexfrm (S2)' with the gnulib function `memcmp2' is + equivalent to comparing S1 and S2 with `u8_casecoll'. + + NF must be either `UNINORM_NFC', `UNINORM_NFKC', or NULL for no + normalization. + + -- Function: int u8_casecoll (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u16_casecoll (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u32_casecoll (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int ulc_casecoll (const char *S1, size_t N1, const char + *S2, size_t N2, const char *ISO639_LANGUAGE, uninorm_t NF, + int *RESULTP) + Compares S1 and S2, ignoring differences in case and normalization, + using the collation rules of the current locale. + + The NF argument identifies the normalization form to apply after + the case-mapping. It must be either `UNINORM_NFC' or + `UNINORM_NFKC'. It can also be NULL, for no normalization. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + +File: libunistring.info, Node: Case detection, Prev: Case insensitive comparison, Up: unicase.h + +13.5 Case detection +=================== + + The following functions determine whether a Unicode string is +entirely in upper case. or entirely in lower case, or entirely in title +case, or already case-folded. + + -- Function: int u8_is_uppercase (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_uppercase (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_uppercase (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if mapping NFD(S) to upper case is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + -- Function: int u8_is_lowercase (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_lowercase (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_lowercase (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if mapping NFD(S) to lower case is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + -- Function: int u8_is_titlecase (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_titlecase (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_titlecase (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if mapping NFD(S) to title case is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + -- Function: int u8_is_casefolded (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_casefolded (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_casefolded (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if applying case folding to NFD(S) is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + The following functions determine whether case mappings have any +effect on a Unicode string. + + -- Function: int u8_is_cased (const uint8_t *S, size_t N, const char + *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_cased (const uint16_t *S, size_t N, const char + *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_cased (const uint32_t *S, size_t N, const char + *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if case matters for S, that is, if mapping + NFD(S) to either upper case or lower case or title case is not a + no-op. Set `*RESULTP' to false if NFD(S) maps to itself under the + upper case mapping, under the lower case mapping, and under the + title case mapping; in other words, when NFD(S) consists entirely + of caseless characters. Upon failure, returns -1 with `errno' set. + + +File: libunistring.info, Node: uniregex.h, Next: Using the library, Prev: unicase.h, Up: Top + +14 Regular expressions `<uniregex.h>' +************************************* + + This include file is not yet implemented. + + +File: libunistring.info, Node: Using the library, Next: More functionality, Prev: uniregex.h, Up: Top + +15 Using the library +******************** + + This chapter explains some practical considerations, regarding the +installation and compiler options that are needed in order to use this +library. + +* Menu: + +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: + + +File: libunistring.info, Node: Installation, Next: Compiler options, Up: Using the library + +15.1 Installation +================= + + Before you can use the library, it must be installed. First, you +have to make sure all dependencies are installed. They are listed in +the file `DEPENDENCIES'. + + Then you can proceed to build and install the library, as described +in the file `INSTALL'. For installation on Windows systems, please +refer to the file `README.woe32'. + + +File: libunistring.info, Node: Compiler options, Next: Include files, Prev: Installation, Up: Using the library + +15.2 Compiler options +===================== + + Let's denote as `LIBUNISTRING_PREFIX' the value of the `--prefix' +option that you passed to `configure' while installing this package. +If you didn't pass any `--prefix' option, then the package is installed +in `/usr/local'. + + Let's denote as `LIBUNISTRING_INCLUDEDIR' the directory where the +include files were installed. This is usually the same as +`${LIBUNISTRING_PREFIX}/include'. Except that if you passed an +`--includedir' option to `configure', it is the value of that option. + + Let's further denote as `LIBUNISTRING_LIBDIR' the directory where +the library itself was installed. This is the value that you passed +with the `--libdir' option to `configure', or otherwise the same as +`${LIBUNISTRING_PREFIX}/lib'. Recall that when building in 64-bit mode +on a 64-bit GNU/Linux system that supports executables in either 64-bit +mode or 32-bit mode, you should have used the option +`--libdir=${LIBUNISTRING_PREFIX}/lib64'. + + So that the compiler finds the include files, you have to pass it the +option `-I${LIBUNISTRING_INCLUDEDIR}'. + + So that the compiler finds the library during its linking pass, you +have to pass it the options `-L${LIBUNISTRING_LIBDIR} -lunistring'. On +some systems, in some configurations, you also have to pass options +needed for linking with `libiconv'. The autoconf macro +`gl_LIBUNISTRING' (see *note Autoconf macro::) deals with this +particularity. + + +File: libunistring.info, Node: Include files, Next: Autoconf macro, Prev: Compiler options, Up: Using the library + +15.3 Include files +================== + + Most of the include files have been presented in the introduction, +see *note Introduction::, and subsequent detailed chapters. + + Another include file is `<unistring/version.h>'. It contains the +version number of the libunistring library. + + -- Macro: int _LIBUNISTRING_VERSION + This constant contains the version of libunistring that is being + used at compile time. It encodes the major and minor parts of the + version number only. These parts are encoded in the form + `(major<<8) + minor'. + + -- Constant: int _libunistring_version + This constant contains the version of libunistring that is being + used at run time. It encodes the major and minor parts of the + version number only. These parts are encoded in the form + `(major<<8) + minor'. + + It is possible that `_libunistring_version' is greater than +`_LIBUNISTRING_VERSION'. This can happen when you use `libunistring' +as a shared library, and a newer, binary backward-compatible version +has been installed after your program that uses `libunistring' was +installed. + + +File: libunistring.info, Node: Autoconf macro, Next: Reporting problems, Prev: Include files, Up: Using the library + +15.4 Autoconf macro +=================== + + GNU Gnulib provides an autoconf macro that tests for the availability +of `libunistring'. It is contained in the Gnulib module +`libunistring', see +`http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring'. + + The macro is called `gl_LIBUNISTRING'. It searches for an installed +libunistring. If found, it sets and AC_SUBSTs `HAVE_LIBUNISTRING=yes' +and the `LIBUNISTRING' and `LTLIBUNISTRING' variables and augments the +`CPPFLAGS' variable, and defines the C macro `HAVE_LIBUNISTRING' to 1. +Otherwise, it sets and AC_SUBSTs `HAVE_LIBUNISTRING=no' and +`LIBUNISTRING' and `LTLIBUNISTRING' to empty. + + The complexities that `gl_LIBUNISTRING' deals with are the following: + + * On some operating systems, in some configurations, libunistring + depends on `libiconv', and the options for linking with libiconv + must be mentioned explicitly on the link command line. + + * GNU `libunistring', if installed, is not necessarily already in the + search path (`CPPFLAGS' for the include file search path, + `LDFLAGS' for the library search path). + + * GNU `libunistring', if installed, is not necessarily already in the + run time library search path. To avoid the need for setting an + environment variable like `LD_LIBRARY_PATH', the macro adds the + appropriate run time search path options to the `LIBUNISTRING' + variable. This works on most systems. + + +File: libunistring.info, Node: Reporting problems, Prev: Autoconf macro, Up: Using the library + +15.5 Reporting problems +======================= + + If you encounter any problem, please don't hesitate to send a +detailed bug report to the `bug-libunistring@gnu.org' mailing list. +You can alternatively also use the bug tracker at the project page +`https://savannah.gnu.org/projects/libunistring'. + + Please always include the version number of this library, and a short +description of your operating system and compilation environment with +corresponding version numbers. + + For problems that appear while building and installing +`libunistring', for which you don't find the remedy in the `INSTALL' +file, please include a description of the options that you passed to +the `configure' script. + + +File: libunistring.info, Node: More functionality, Next: Licenses, Prev: Using the library, Up: Top + +16 More advanced functionality +****************************** + + For bidirectional reordering of strings, we recommend the GNU +FriBidi library: `http://www.fribidi.org/'. + + For the rendering of Unicode strings outside of the context of a +given toolkit (KDE/Qt or GNOME/Gtk), we recommend the Pango library: +`http://www.pango.org/'. + + +File: libunistring.info, Node: Licenses, Next: Index, Prev: More functionality, Up: Top + +Appendix A Licenses +******************* + + The files of this package are covered by the licenses indicated in +each particular file or directory. Here is a summary: + + * The `libunistring' library is covered by the GNU Lesser General + Public License (LGPL). A copy of the license is included in *note + GNU LGPL::. + + * This manual is free documentation. It is dually licensed under the + GNU FDL and the GNU GPL. This means that you can redistribute this + manual under either of these two licenses, at your choice. + This manual is covered by the GNU FDL. Permission is granted to + copy, distribute and/or modify this document under the terms of the + GNU Free Documentation License (FDL), either version 1.2 of the + License, or (at your option) any later version published by the + Free Software Foundation (FSF); with no Invariant Sections, with no + Front-Cover Text, and with no Back-Cover Texts. A copy of the + license is included in *note GNU FDL::. + This manual is covered by the GNU GPL. You can redistribute it + and/or modify it under the terms of the GNU General Public License + (GPL), either version 3 of the License, or (at your option) any + later version published by the Free Software Foundation (FSF). A + copy of the license is included in *note GNU GPL::. + +* Menu: + +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License + + +File: libunistring.info, Node: GNU GPL, Next: GNU LGPL, Up: Licenses + +A.1 GNU GENERAL PUBLIC LICENSE +============================== + + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. `http://fsf.org/' + + Everyone is permitted to copy and distribute verbatim copies of this + license document, but changing it is not allowed. + +Preamble +======== + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains +free software for all its users. We, the Free Software Foundation, use +the GNU General Public License for most of our software; it applies +also to any other work released this way by its authors. You can apply +it to your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the software, +or if you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those domains +in future versions of the GPL, as needed to protect the freedom of +users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + +TERMS AND CONDITIONS +==================== + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public + License. + + "Copyright" also means copyright-like laws that apply to other + kinds of works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this + License. Each licensee is addressed as "you". "Licensees" and + "recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the + work in a fashion requiring copyright permission, other than the + making of an exact copy. The resulting work is called a "modified + version" of the earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work + based on the Program. + + To "propagate" a work means to do anything with it that, without + permission, would make you directly or secondarily liable for + infringement under applicable copyright law, except executing it + on a computer or modifying a private copy. Propagation includes + copying, distribution (with or without modification), making + available to the public, and in some countries other activities as + well. + + To "convey" a work means any kind of propagation that enables other + parties to make or receive copies. Mere interaction with a user + through a computer network, with no transfer of a copy, is not + conveying. + + An interactive user interface displays "Appropriate Legal Notices" + to the extent that it includes a convenient and prominently visible + feature that (1) displays an appropriate copyright notice, and (2) + tells the user that there is no warranty for the work (except to + the extent that warranties are provided), that licensees may + convey the work under this License, and how to view a copy of this + License. If the interface presents a list of user commands or + options, such as a menu, a prominent item in the list meets this + criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work + for making modifications to it. "Object code" means any + non-source form of a work. + + A "Standard Interface" means an interface that either is an + official standard defined by a recognized standards body, or, in + the case of interfaces specified for a particular programming + language, one that is widely used among developers working in that + language. + + The "System Libraries" of an executable work include anything, + other than the work as a whole, that (a) is included in the normal + form of packaging a Major Component, but which is not part of that + Major Component, and (b) serves only to enable use of the work + with that Major Component, or to implement a Standard Interface + for which an implementation is available to the public in source + code form. A "Major Component", in this context, means a major + essential component (kernel, window system, and so on) of the + specific operating system (if any) on which the executable work + runs, or a compiler used to produce the work, or an object code + interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all + the source code needed to generate, install, and (for an executable + work) run the object code and to modify the work, including + scripts to control those activities. However, it does not include + the work's System Libraries, or general-purpose tools or generally + available free programs which are used unmodified in performing + those activities but which are not part of the work. For example, + Corresponding Source includes interface definition files + associated with source files for the work, and the source code for + shared libraries and dynamically linked subprograms that the work + is specifically designed to require, such as by intimate data + communication or control flow between those subprograms and other + parts of the work. + + The Corresponding Source need not include anything that users can + regenerate automatically from other parts of the Corresponding + Source. + + The Corresponding Source for a work in source code form is that + same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of + copyright on the Program, and are irrevocable provided the stated + conditions are met. This License explicitly affirms your unlimited + permission to run the unmodified Program. The output from running + a covered work is covered by this License only if the output, + given its content, constitutes a covered work. This License + acknowledges your rights of fair use or other equivalent, as + provided by copyright law. + + You may make, run and propagate covered works that you do not + convey, without conditions so long as your license otherwise + remains in force. You may convey covered works to others for the + sole purpose of having them make modifications exclusively for + you, or provide you with facilities for running those works, + provided that you comply with the terms of this License in + conveying all material for which you do not control copyright. + Those thus making or running the covered works for you must do so + exclusively on your behalf, under your direction and control, on + terms that prohibit them from making any copies of your + copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under + the conditions stated below. Sublicensing is not allowed; section + 10 makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological + measure under any applicable law fulfilling obligations under + article 11 of the WIPO copyright treaty adopted on 20 December + 1996, or similar laws prohibiting or restricting circumvention of + such measures. + + When you convey a covered work, you waive any legal power to forbid + circumvention of technological measures to the extent such + circumvention is effected by exercising rights under this License + with respect to the covered work, and you disclaim any intention + to limit operation or modification of the work as a means of + enforcing, against the work's users, your or third parties' legal + rights to forbid circumvention of technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you + receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice; + keep intact all notices stating that this License and any + non-permissive terms added in accord with section 7 apply to the + code; keep intact all notices of the absence of any warranty; and + give all recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, + and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to + produce it from the Program, in the form of source code under the + terms of section 4, provided that you also meet all of these + conditions: + + a. The work must carry prominent notices stating that you + modified it, and giving a relevant date. + + b. The work must carry prominent notices stating that it is + released under this License and any conditions added under + section 7. This requirement modifies the requirement in + section 4 to "keep intact all notices". + + c. You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable + section 7 additional terms, to the whole of the work, and all + its parts, regardless of how they are packaged. This License + gives no permission to license the work in any other way, but + it does not invalidate such permission if you have separately + received it. + + d. If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has + interactive interfaces that do not display Appropriate Legal + Notices, your work need not make them do so. + + A compilation of a covered work with other separate and independent + works, which are not by their nature extensions of the covered + work, and which are not combined with it such as to form a larger + program, in or on a volume of a storage or distribution medium, is + called an "aggregate" if the compilation and its resulting + copyright are not used to limit the access or legal rights of the + compilation's users beyond what the individual works permit. + Inclusion of a covered work in an aggregate does not cause this + License to apply to the other parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms + of sections 4 and 5, provided that you also convey the + machine-readable Corresponding Source under the terms of this + License, in one of these ways: + + a. Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b. Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for + as long as you offer spare parts or customer support for that + product model, to give anyone who possesses the object code + either (1) a copy of the Corresponding Source for all the + software in the product that is covered by this License, on a + durable physical medium customarily used for software + interchange, for a price no more than your reasonable cost of + physically performing this conveying of source, or (2) access + to copy the Corresponding Source from a network server at no + charge. + + c. Convey individual copies of the object code with a copy of + the written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, + and only if you received the object code with such an offer, + in accord with subsection 6b. + + d. Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access + to the Corresponding Source in the same way through the same + place at no further charge. You need not require recipients + to copy the Corresponding Source along with the object code. + If the place to copy the object code is a network server, the + Corresponding Source may be on a different server (operated + by you or a third party) that supports equivalent copying + facilities, provided you maintain clear directions next to + the object code saying where to find the Corresponding Source. + Regardless of what server hosts the Corresponding Source, you + remain obligated to ensure that it is available for as long + as needed to satisfy these requirements. + + e. Convey the object code using peer-to-peer transmission, + provided you inform other peers where the object code and + Corresponding Source of the work are being offered to the + general public at no charge under subsection 6d. + + + A separable portion of the object code, whose source code is + excluded from the Corresponding Source as a System Library, need + not be included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means + any tangible personal property which is normally used for personal, + family, or household purposes, or (2) anything designed or sold for + incorporation into a dwelling. In determining whether a product + is a consumer product, doubtful cases shall be resolved in favor of + coverage. For a particular product received by a particular user, + "normally used" refers to a typical or common use of that class of + product, regardless of the status of the particular user or of the + way in which the particular user actually uses, or expects or is + expected to use, the product. A product is a consumer product + regardless of whether the product has substantial commercial, + industrial or non-consumer uses, unless such uses represent the + only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, + procedures, authorization keys, or other information required to + install and execute modified versions of a covered work in that + User Product from a modified version of its Corresponding Source. + The information must suffice to ensure that the continued + functioning of the modified object code is in no case prevented or + interfered with solely because modification has been made. + + If you convey an object code work under this section in, or with, + or specifically for use in, a User Product, and the conveying + occurs as part of a transaction in which the right of possession + and use of the User Product is transferred to the recipient in + perpetuity or for a fixed term (regardless of how the transaction + is characterized), the Corresponding Source conveyed under this + section must be accompanied by the Installation Information. But + this requirement does not apply if neither you nor any third party + retains the ability to install modified object code on the User + Product (for example, the work has been installed in ROM). + + The requirement to provide Installation Information does not + include a requirement to continue to provide support service, + warranty, or updates for a work that has been modified or + installed by the recipient, or for the User Product in which it + has been modified or installed. Access to a network may be denied + when the modification itself materially and adversely affects the + operation of the network or violates the rules and protocols for + communication across the network. + + Corresponding Source conveyed, and Installation Information + provided, in accord with this section must be in a format that is + publicly documented (and with an implementation available to the + public in source code form), and must require no special password + or key for unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of + this License by making exceptions from one or more of its + conditions. Additional permissions that are applicable to the + entire Program shall be treated as though they were included in + this License, to the extent that they are valid under applicable + law. If additional permissions apply only to part of the Program, + that part may be used separately under those permissions, but the + entire Program remains governed by this License without regard to + the additional permissions. + + When you convey a copy of a covered work, you may at your option + remove any additional permissions from that copy, or from any part + of it. (Additional permissions may be written to require their own + removal in certain cases when you modify the work.) You may place + additional permissions on material, added by you to a covered work, + for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material + you add to a covered work, you may (if authorized by the copyright + holders of that material) supplement the terms of this License + with terms: + + a. Disclaiming warranty or limiting liability differently from + the terms of sections 15 and 16 of this License; or + + b. Requiring preservation of specified reasonable legal notices + or author attributions in that material or in the Appropriate + Legal Notices displayed by works containing it; or + + c. Prohibiting misrepresentation of the origin of that material, + or requiring that modified versions of such material be + marked in reasonable ways as different from the original + version; or + + d. Limiting the use for publicity purposes of names of licensors + or authors of the material; or + + e. Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f. Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified + versions of it) with contractual assumptions of liability to + the recipient, for any liability that these contractual + assumptions directly impose on those licensors and authors. + + All other non-permissive additional terms are considered "further + restrictions" within the meaning of section 10. If the Program as + you received it, or any part of it, contains a notice stating that + it is governed by this License along with a term that is a further + restriction, you may remove that term. If a license document + contains a further restriction but permits relicensing or + conveying under this License, you may add to a covered work + material governed by the terms of that license document, provided + that the further restriction does not survive such relicensing or + conveying. + + If you add terms to a covered work in accord with this section, you + must place, in the relevant source files, a statement of the + additional terms that apply to those files, or a notice indicating + where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in + the form of a separately written license, or stated as exceptions; + the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly + provided under this License. Any attempt otherwise to propagate or + modify it is void, and will automatically terminate your rights + under this License (including any patent licenses granted under + the third paragraph of section 11). + + However, if you cease all violation of this License, then your + license from a particular copyright holder is reinstated (a) + provisionally, unless and until the copyright holder explicitly + and finally terminates your license, and (b) permanently, if the + copyright holder fails to notify you of the violation by some + reasonable means prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from + that copyright holder, and you cure the violation prior to 30 days + after your receipt of the notice. + + Termination of your rights under this section does not terminate + the licenses of parties who have received copies or rights from + you under this License. If your rights have been terminated and + not permanently reinstated, you do not qualify to receive new + licenses for the same material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or + run a copy of the Program. Ancillary propagation of a covered work + occurring solely as a consequence of using peer-to-peer + transmission to receive a copy likewise does not require + acceptance. However, nothing other than this License grants you + permission to propagate or modify any covered work. These actions + infringe copyright if you do not accept this License. Therefore, + by modifying or propagating a covered work, you indicate your + acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically + receives a license from the original licensors, to run, modify and + propagate that work, subject to this License. You are not + responsible for enforcing compliance by third parties with this + License. + + An "entity transaction" is a transaction transferring control of an + organization, or substantially all assets of one, or subdividing an + organization, or merging organizations. If propagation of a + covered work results from an entity transaction, each party to that + transaction who receives a copy of the work also receives whatever + licenses to the work the party's predecessor in interest had or + could give under the previous paragraph, plus a right to + possession of the Corresponding Source of the work from the + predecessor in interest, if the predecessor has it or can get it + with reasonable efforts. + + You may not impose any further restrictions on the exercise of the + rights granted or affirmed under this License. For example, you + may not impose a license fee, royalty, or other charge for + exercise of rights granted under this License, and you may not + initiate litigation (including a cross-claim or counterclaim in a + lawsuit) alleging that any patent claim is infringed by making, + using, selling, offering for sale, or importing the Program or any + portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this + License of the Program or a work on which the Program is based. + The work thus licensed is called the contributor's "contributor + version". + + A contributor's "essential patent claims" are all patent claims + owned or controlled by the contributor, whether already acquired or + hereafter acquired, that would be infringed by some manner, + permitted by this License, of making, using, or selling its + contributor version, but do not include claims that would be + infringed only as a consequence of further modification of the + contributor version. For purposes of this definition, "control" + includes the right to grant patent sublicenses in a manner + consistent with the requirements of this License. + + Each contributor grants you a non-exclusive, worldwide, + royalty-free patent license under the contributor's essential + patent claims, to make, use, sell, offer for sale, import and + otherwise run, modify and propagate the contents of its + contributor version. + + In the following three paragraphs, a "patent license" is any + express agreement or commitment, however denominated, not to + enforce a patent (such as an express permission to practice a + patent or covenant not to sue for patent infringement). To + "grant" such a patent license to a party means to make such an + agreement or commitment not to enforce a patent against the party. + + If you convey a covered work, knowingly relying on a patent + license, and the Corresponding Source of the work is not available + for anyone to copy, free of charge and under the terms of this + License, through a publicly available network server or other + readily accessible means, then you must either (1) cause the + Corresponding Source to be so available, or (2) arrange to deprive + yourself of the benefit of the patent license for this particular + work, or (3) arrange, in a manner consistent with the requirements + of this License, to extend the patent license to downstream + recipients. "Knowingly relying" means you have actual knowledge + that, but for the patent license, your conveying the covered work + in a country, or your recipient's use of the covered work in a + country, would infringe one or more identifiable patents in that + country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or + arrangement, you convey, or propagate by procuring conveyance of, a + covered work, and grant a patent license to some of the parties + receiving the covered work authorizing them to use, propagate, + modify or convey a specific copy of the covered work, then the + patent license you grant is automatically extended to all + recipients of the covered work and works based on it. + + A patent license is "discriminatory" if it does not include within + the scope of its coverage, prohibits the exercise of, or is + conditioned on the non-exercise of one or more of the rights that + are specifically granted under this License. You may not convey a + covered work if you are a party to an arrangement with a third + party that is in the business of distributing software, under + which you make payment to the third party based on the extent of + your activity of conveying the work, and under which the third + party grants, to any of the parties who would receive the covered + work from you, a discriminatory patent license (a) in connection + with copies of the covered work conveyed by you (or copies made + from those copies), or (b) primarily for and in connection with + specific products or compilations that contain the covered work, + unless you entered into that arrangement, or that patent license + was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting + any implied license or other defenses to infringement that may + otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, + agreement or otherwise) that contradict the conditions of this + License, they do not excuse you from the conditions of this + License. If you cannot convey a covered work so as to satisfy + simultaneously your obligations under this License and any other + pertinent obligations, then as a consequence you may not convey it + at all. For example, if you agree to terms that obligate you to + collect a royalty for further conveying from those to whom you + convey the Program, the only way you could satisfy both those + terms and this License would be to refrain entirely from conveying + the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have + permission to link or combine any covered work with a work licensed + under version 3 of the GNU Affero General Public License into a + single combined work, and to convey the resulting work. The terms + of this License will continue to apply to the part which is the + covered work, but the special requirements of the GNU Affero + General Public License, section 13, concerning interaction through + a network will apply to the combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new + versions of the GNU General Public License from time to time. + Such new versions will be similar in spirit to the present + version, but may differ in detail to address new problems or + concerns. + + Each version is given a distinguishing version number. If the + Program specifies that a certain numbered version of the GNU + General Public License "or any later version" applies to it, you + have the option of following the terms and conditions either of + that numbered version or of any later version published by the + Free Software Foundation. If the Program does not specify a + version number of the GNU General Public License, you may choose + any version ever published by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future + versions of the GNU General Public License can be used, that + proxy's public statement of acceptance of a version permanently + authorizes you to choose that version for the Program. + + Later license versions may give you additional or different + permissions. However, no additional obligations are imposed on any + author or copyright holder as a result of your choosing to follow a + later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY + APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE + COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE + RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. + SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN + WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES + AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU + FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR + CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE + THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA + BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD + PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER + PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF + THE POSSIBILITY OF SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided + above cannot be given local legal effect according to their terms, + reviewing courts shall apply local law that most closely + approximates an absolute waiver of all civil liability in + connection with the Program, unless a warranty or assumption of + liability accompanies a copy of the Program in return for a fee. + + +END OF TERMS AND CONDITIONS +=========================== + +How to Apply These Terms to Your New Programs +============================================= + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. + Copyright (C) YEAR NAME OF AUTHOR + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see `http://www.gnu.org/licenses/'. + + Also add information on how to contact you by electronic and paper +mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + PROGRAM Copyright (C) YEAR NAME OF AUTHOR + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + + The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an "about box". + + You should also get your employer (if you work as a programmer) or +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. For more information on this, and how to apply and follow +the GNU GPL, see `http://www.gnu.org/licenses/'. + + The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use the +GNU Lesser General Public License instead of this License. But first, +please read `http://www.gnu.org/philosophy/why-not-lgpl.html'. + + +File: libunistring.info, Node: GNU LGPL, Next: GNU FDL, Prev: GNU GPL, Up: Licenses + +A.2 GNU LESSER GENERAL PUBLIC LICENSE +===================================== + + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. `http://fsf.org/' + + Everyone is permitted to copy and distribute verbatim copies of this + license document, but changing it is not allowed. + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU + Lesser General Public License, and the "GNU GPL" refers to version + 3 of the GNU General Public License. + + "The Library" refers to a covered work governed by this License, + other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface + provided by the Library, but which is not otherwise based on the + Library. Defining a subclass of a class defined by the Library is + deemed a mode of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an + Application with the Library. The particular version of the + Library with which the Combined Work was made is also called the + "Linked Version". + + The "Minimal Corresponding Source" for a Combined Work means the + Corresponding Source for the Combined Work, excluding any source + code for portions of the Combined Work that, considered in + isolation, are based on the Application, and not on the Linked + Version. + + The "Corresponding Application Code" for a Combined Work means the + object code and/or source code for the Application, including any + data and utility programs needed for reproducing the Combined Work + from the Application, but excluding the System Libraries of the + Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this + License without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a + facility refers to a function or data to be supplied by an + Application that uses the facility (other than as an argument + passed when the facility is invoked), then you may convey a copy + of the modified version: + + a. under this License, provided that you make a good faith + effort to ensure that, in the event an Application does not + supply the function or data, the facility still operates, and + performs whatever part of its purpose remains meaningful, or + + b. under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material + from a header file that is part of the Library. You may convey + such object code under terms of your choice, provided that, if the + incorporated material is not limited to numerical parameters, data + structure layouts and accessors, or small macros, inline functions + and templates (ten or fewer lines in length), you do both of the + following: + + a. Give prominent notice with each copy of the object code that + the Library is used in it and that the Library and its use are + covered by this License. + + b. Accompany the object code with a copy of the GNU GPL and this + license document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, + taken together, effectively do not restrict modification of the + portions of the Library contained in the Combined Work and reverse + engineering for debugging such modifications, if you also do each + of the following: + + a. Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b. Accompany the Combined Work with a copy of the GNU GPL and + this license document. + + c. For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to + the copies of the GNU GPL and this license document. + + d. Do one of the following: + + 0. Convey the Minimal Corresponding Source under the terms + of this License, and the Corresponding Application Code + in a form suitable for, and under terms that permit, the + user to recombine or relink the Application with a + modified version of the Linked Version to produce a + modified Combined Work, in the manner specified by + section 6 of the GNU GPL for conveying Corresponding + Source. + + 1. Use a suitable shared library mechanism for linking with + the Library. A suitable mechanism is one that (a) uses + at run time a copy of the Library already present on the + user's computer system, and (b) will operate properly + with a modified version of the Library that is + interface-compatible with the Linked Version. + + e. Provide Installation Information, but only if you would + otherwise be required to provide such information under + section 6 of the GNU GPL, and only to the extent that such + information is necessary to install and execute a modified + version of the Combined Work produced by recombining or + relinking the Application with a modified version of the + Linked Version. (If you use option 4d0, the Installation + Information must accompany the Minimal Corresponding Source + and Corresponding Application Code. If you use option 4d1, + you must provide the Installation Information in the manner + specified by section 6 of the GNU GPL for conveying + Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the + Library side by side in a single library together with other + library facilities that are not Applications and are not covered + by this License, and convey such a combined library under terms of + your choice, if you do both of the following: + + a. Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities, conveyed under the terms of this License. + + b. Give prominent notice with the combined library that part of + it is a work based on the Library, and explaining where to + find the accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new + versions of the GNU Lesser General Public License from time to + time. Such new versions will be similar in spirit to the present + version, but may differ in detail to address new problems or + concerns. + + Each version is given a distinguishing version number. If the + Library as you received it specifies that a certain numbered + version of the GNU Lesser General Public License "or any later + version" applies to it, you have the option of following the terms + and conditions either of that published version or of any later + version published by the Free Software Foundation. If the Library + as you received it does not specify a version number of the GNU + Lesser General Public License, you may choose any version of the + GNU Lesser General Public License ever published by the Free + Software Foundation. + + If the Library as you received it specifies that a proxy can decide + whether future versions of the GNU Lesser General Public License + shall apply, that proxy's public statement of acceptance of any + version is permanent authorization for you to choose that version + for the Library. + + + +File: libunistring.info, Node: GNU FDL, Prev: GNU LGPL, Up: Licenses + +A.3 GNU Free Documentation License +================================== + + Version 1.3, 3 November 2008 + + Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. + `http://fsf.org/' + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + 0. PREAMBLE + + The purpose of this License is to make a manual, textbook, or other + functional and useful document "free" in the sense of freedom: to + assure everyone the effective freedom to copy and redistribute it, + with or without modifying it, either commercially or + noncommercially. Secondarily, this License preserves for the + author and publisher a way to get credit for their work, while not + being considered responsible for modifications made by others. + + This License is a kind of "copyleft", which means that derivative + works of the document must themselves be free in the same sense. + It complements the GNU General Public License, which is a copyleft + license designed for free software. + + We have designed this License in order to use it for manuals for + free software, because free software needs free documentation: a + free program should come with manuals providing the same freedoms + that the software does. But this License is not limited to + software manuals; it can be used for any textual work, regardless + of subject matter or whether it is published as a printed book. + We recommend this License principally for works whose purpose is + instruction or reference. + + 1. APPLICABILITY AND DEFINITIONS + + This License applies to any manual or other work, in any medium, + that contains a notice placed by the copyright holder saying it + can be distributed under the terms of this License. Such a notice + grants a world-wide, royalty-free license, unlimited in duration, + to use that work under the conditions stated herein. The + "Document", below, refers to any such manual or work. Any member + of the public is a licensee, and is addressed as "you". You + accept the license if you copy, modify or distribute the work in a + way requiring permission under copyright law. + + A "Modified Version" of the Document means any work containing the + Document or a portion of it, either copied verbatim, or with + modifications and/or translated into another language. + + A "Secondary Section" is a named appendix or a front-matter section + of the Document that deals exclusively with the relationship of the + publishers or authors of the Document to the Document's overall + subject (or to related matters) and contains nothing that could + fall directly within that overall subject. (Thus, if the Document + is in part a textbook of mathematics, a Secondary Section may not + explain any mathematics.) The relationship could be a matter of + historical connection with the subject or with related matters, or + of legal, commercial, philosophical, ethical or political position + regarding them. + + The "Invariant Sections" are certain Secondary Sections whose + titles are designated, as being those of Invariant Sections, in + the notice that says that the Document is released under this + License. If a section does not fit the above definition of + Secondary then it is not allowed to be designated as Invariant. + The Document may contain zero Invariant Sections. If the Document + does not identify any Invariant Sections then there are none. + + The "Cover Texts" are certain short passages of text that are + listed, as Front-Cover Texts or Back-Cover Texts, in the notice + that says that the Document is released under this License. A + Front-Cover Text may be at most 5 words, and a Back-Cover Text may + be at most 25 words. + + A "Transparent" copy of the Document means a machine-readable copy, + represented in a format whose specification is available to the + general public, that is suitable for revising the document + straightforwardly with generic text editors or (for images + composed of pixels) generic paint programs or (for drawings) some + widely available drawing editor, and that is suitable for input to + text formatters or for automatic translation to a variety of + formats suitable for input to text formatters. A copy made in an + otherwise Transparent file format whose markup, or absence of + markup, has been arranged to thwart or discourage subsequent + modification by readers is not Transparent. An image format is + not Transparent if used for any substantial amount of text. A + copy that is not "Transparent" is called "Opaque". + + Examples of suitable formats for Transparent copies include plain + ASCII without markup, Texinfo input format, LaTeX input format, + SGML or XML using a publicly available DTD, and + standard-conforming simple HTML, PostScript or PDF designed for + human modification. Examples of transparent image formats include + PNG, XCF and JPG. Opaque formats include proprietary formats that + can be read and edited only by proprietary word processors, SGML or + XML for which the DTD and/or processing tools are not generally + available, and the machine-generated HTML, PostScript or PDF + produced by some word processors for output purposes only. + + The "Title Page" means, for a printed book, the title page itself, + plus such following pages as are needed to hold, legibly, the + material this License requires to appear in the title page. For + works in formats which do not have any title page as such, "Title + Page" means the text near the most prominent appearance of the + work's title, preceding the beginning of the body of the text. + + The "publisher" means any person or entity that distributes copies + of the Document to the public. + + A section "Entitled XYZ" means a named subunit of the Document + whose title either is precisely XYZ or contains XYZ in parentheses + following text that translates XYZ in another language. (Here XYZ + stands for a specific section name mentioned below, such as + "Acknowledgements", "Dedications", "Endorsements", or "History".) + To "Preserve the Title" of such a section when you modify the + Document means that it remains a section "Entitled XYZ" according + to this definition. + + The Document may include Warranty Disclaimers next to the notice + which states that this License applies to the Document. These + Warranty Disclaimers are considered to be included by reference in + this License, but only as regards disclaiming warranties: any other + implication that these Warranty Disclaimers may have is void and + has no effect on the meaning of this License. + + 2. VERBATIM COPYING + + You may copy and distribute the Document in any medium, either + commercially or noncommercially, provided that this License, the + copyright notices, and the license notice saying this License + applies to the Document are reproduced in all copies, and that you + add no other conditions whatsoever to those of this License. You + may not use technical measures to obstruct or control the reading + or further copying of the copies you make or distribute. However, + you may accept compensation in exchange for copies. If you + distribute a large enough number of copies you must also follow + the conditions in section 3. + + You may also lend copies, under the same conditions stated above, + and you may publicly display copies. + + 3. COPYING IN QUANTITY + + If you publish printed copies (or copies in media that commonly + have printed covers) of the Document, numbering more than 100, and + the Document's license notice requires Cover Texts, you must + enclose the copies in covers that carry, clearly and legibly, all + these Cover Texts: Front-Cover Texts on the front cover, and + Back-Cover Texts on the back cover. Both covers must also clearly + and legibly identify you as the publisher of these copies. The + front cover must present the full title with all words of the + title equally prominent and visible. You may add other material + on the covers in addition. Copying with changes limited to the + covers, as long as they preserve the title of the Document and + satisfy these conditions, can be treated as verbatim copying in + other respects. + + If the required texts for either cover are too voluminous to fit + legibly, you should put the first ones listed (as many as fit + reasonably) on the actual cover, and continue the rest onto + adjacent pages. + + If you publish or distribute Opaque copies of the Document + numbering more than 100, you must either include a + machine-readable Transparent copy along with each Opaque copy, or + state in or with each Opaque copy a computer-network location from + which the general network-using public has access to download + using public-standard network protocols a complete Transparent + copy of the Document, free of added material. If you use the + latter option, you must take reasonably prudent steps, when you + begin distribution of Opaque copies in quantity, to ensure that + this Transparent copy will remain thus accessible at the stated + location until at least one year after the last time you + distribute an Opaque copy (directly or through your agents or + retailers) of that edition to the public. + + It is requested, but not required, that you contact the authors of + the Document well before redistributing any large number of + copies, to give them a chance to provide you with an updated + version of the Document. + + 4. MODIFICATIONS + + You may copy and distribute a Modified Version of the Document + under the conditions of sections 2 and 3 above, provided that you + release the Modified Version under precisely this License, with + the Modified Version filling the role of the Document, thus + licensing distribution and modification of the Modified Version to + whoever possesses a copy of it. In addition, you must do these + things in the Modified Version: + + A. Use in the Title Page (and on the covers, if any) a title + distinct from that of the Document, and from those of + previous versions (which should, if there were any, be listed + in the History section of the Document). You may use the + same title as a previous version if the original publisher of + that version gives permission. + + B. List on the Title Page, as authors, one or more persons or + entities responsible for authorship of the modifications in + the Modified Version, together with at least five of the + principal authors of the Document (all of its principal + authors, if it has fewer than five), unless they release you + from this requirement. + + C. State on the Title page the name of the publisher of the + Modified Version, as the publisher. + + D. Preserve all the copyright notices of the Document. + + E. Add an appropriate copyright notice for your modifications + adjacent to the other copyright notices. + + F. Include, immediately after the copyright notices, a license + notice giving the public permission to use the Modified + Version under the terms of this License, in the form shown in + the Addendum below. + + G. Preserve in that license notice the full lists of Invariant + Sections and required Cover Texts given in the Document's + license notice. + + H. Include an unaltered copy of this License. + + I. Preserve the section Entitled "History", Preserve its Title, + and add to it an item stating at least the title, year, new + authors, and publisher of the Modified Version as given on + the Title Page. If there is no section Entitled "History" in + the Document, create one stating the title, year, authors, + and publisher of the Document as given on its Title Page, + then add an item describing the Modified Version as stated in + the previous sentence. + + J. Preserve the network location, if any, given in the Document + for public access to a Transparent copy of the Document, and + likewise the network locations given in the Document for + previous versions it was based on. These may be placed in + the "History" section. You may omit a network location for a + work that was published at least four years before the + Document itself, or if the original publisher of the version + it refers to gives permission. + + K. For any section Entitled "Acknowledgements" or "Dedications", + Preserve the Title of the section, and preserve in the + section all the substance and tone of each of the contributor + acknowledgements and/or dedications given therein. + + L. Preserve all the Invariant Sections of the Document, + unaltered in their text and in their titles. Section numbers + or the equivalent are not considered part of the section + titles. + + M. Delete any section Entitled "Endorsements". Such a section + may not be included in the Modified Version. + + N. Do not retitle any existing section to be Entitled + "Endorsements" or to conflict in title with any Invariant + Section. + + O. Preserve any Warranty Disclaimers. + + If the Modified Version includes new front-matter sections or + appendices that qualify as Secondary Sections and contain no + material copied from the Document, you may at your option + designate some or all of these sections as invariant. To do this, + add their titles to the list of Invariant Sections in the Modified + Version's license notice. These titles must be distinct from any + other section titles. + + You may add a section Entitled "Endorsements", provided it contains + nothing but endorsements of your Modified Version by various + parties--for example, statements of peer review or that the text + has been approved by an organization as the authoritative + definition of a standard. + + You may add a passage of up to five words as a Front-Cover Text, + and a passage of up to 25 words as a Back-Cover Text, to the end + of the list of Cover Texts in the Modified Version. Only one + passage of Front-Cover Text and one of Back-Cover Text may be + added by (or through arrangements made by) any one entity. If the + Document already includes a cover text for the same cover, + previously added by you or by arrangement made by the same entity + you are acting on behalf of, you may not add another; but you may + replace the old one, on explicit permission from the previous + publisher that added the old one. + + The author(s) and publisher(s) of the Document do not by this + License give permission to use their names for publicity for or to + assert or imply endorsement of any Modified Version. + + 5. COMBINING DOCUMENTS + + You may combine the Document with other documents released under + this License, under the terms defined in section 4 above for + modified versions, provided that you include in the combination + all of the Invariant Sections of all of the original documents, + unmodified, and list them all as Invariant Sections of your + combined work in its license notice, and that you preserve all + their Warranty Disclaimers. + + The combined work need only contain one copy of this License, and + multiple identical Invariant Sections may be replaced with a single + copy. If there are multiple Invariant Sections with the same name + but different contents, make the title of each such section unique + by adding at the end of it, in parentheses, the name of the + original author or publisher of that section if known, or else a + unique number. Make the same adjustment to the section titles in + the list of Invariant Sections in the license notice of the + combined work. + + In the combination, you must combine any sections Entitled + "History" in the various original documents, forming one section + Entitled "History"; likewise combine any sections Entitled + "Acknowledgements", and any sections Entitled "Dedications". You + must delete all sections Entitled "Endorsements." + + 6. COLLECTIONS OF DOCUMENTS + + You may make a collection consisting of the Document and other + documents released under this License, and replace the individual + copies of this License in the various documents with a single copy + that is included in the collection, provided that you follow the + rules of this License for verbatim copying of each of the + documents in all other respects. + + You may extract a single document from such a collection, and + distribute it individually under this License, provided you insert + a copy of this License into the extracted document, and follow + this License in all other respects regarding verbatim copying of + that document. + + 7. AGGREGATION WITH INDEPENDENT WORKS + + A compilation of the Document or its derivatives with other + separate and independent documents or works, in or on a volume of + a storage or distribution medium, is called an "aggregate" if the + copyright resulting from the compilation is not used to limit the + legal rights of the compilation's users beyond what the individual + works permit. When the Document is included in an aggregate, this + License does not apply to the other works in the aggregate which + are not themselves derivative works of the Document. + + If the Cover Text requirement of section 3 is applicable to these + copies of the Document, then if the Document is less than one half + of the entire aggregate, the Document's Cover Texts may be placed + on covers that bracket the Document within the aggregate, or the + electronic equivalent of covers if the Document is in electronic + form. Otherwise they must appear on printed covers that bracket + the whole aggregate. + + 8. TRANSLATION + + Translation is considered a kind of modification, so you may + distribute translations of the Document under the terms of section + 4. Replacing Invariant Sections with translations requires special + permission from their copyright holders, but you may include + translations of some or all Invariant Sections in addition to the + original versions of these Invariant Sections. You may include a + translation of this License, and all the license notices in the + Document, and any Warranty Disclaimers, provided that you also + include the original English version of this License and the + original versions of those notices and disclaimers. In case of a + disagreement between the translation and the original version of + this License or a notice or disclaimer, the original version will + prevail. + + If a section in the Document is Entitled "Acknowledgements", + "Dedications", or "History", the requirement (section 4) to + Preserve its Title (section 1) will typically require changing the + actual title. + + 9. TERMINATION + + You may not copy, modify, sublicense, or distribute the Document + except as expressly provided under this License. Any attempt + otherwise to copy, modify, sublicense, or distribute it is void, + and will automatically terminate your rights under this License. + + However, if you cease all violation of this License, then your + license from a particular copyright holder is reinstated (a) + provisionally, unless and until the copyright holder explicitly + and finally terminates your license, and (b) permanently, if the + copyright holder fails to notify you of the violation by some + reasonable means prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from + that copyright holder, and you cure the violation prior to 30 days + after your receipt of the notice. + + Termination of your rights under this section does not terminate + the licenses of parties who have received copies or rights from + you under this License. If your rights have been terminated and + not permanently reinstated, receipt of a copy of some or all of + the same material does not give you any rights to use it. + + 10. FUTURE REVISIONS OF THIS LICENSE + + The Free Software Foundation may publish new, revised versions of + the GNU Free Documentation License from time to time. Such new + versions will be similar in spirit to the present version, but may + differ in detail to address new problems or concerns. See + `http://www.gnu.org/copyleft/'. + + Each version of the License is given a distinguishing version + number. If the Document specifies that a particular numbered + version of this License "or any later version" applies to it, you + have the option of following the terms and conditions either of + that specified version or of any later version that has been + published (not as a draft) by the Free Software Foundation. If + the Document does not specify a version number of this License, + you may choose any version ever published (not as a draft) by the + Free Software Foundation. If the Document specifies that a proxy + can decide which future versions of this License can be used, that + proxy's public statement of acceptance of a version permanently + authorizes you to choose that version for the Document. + + 11. RELICENSING + + "Massive Multiauthor Collaboration Site" (or "MMC Site") means any + World Wide Web server that publishes copyrightable works and also + provides prominent facilities for anybody to edit those works. A + public wiki that anybody can edit is an example of such a server. + A "Massive Multiauthor Collaboration" (or "MMC") contained in the + site means any set of copyrightable works thus published on the MMC + site. + + "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 + license published by Creative Commons Corporation, a not-for-profit + corporation with a principal place of business in San Francisco, + California, as well as future copyleft versions of that license + published by that same organization. + + "Incorporate" means to publish or republish a Document, in whole or + in part, as part of another Document. + + An MMC is "eligible for relicensing" if it is licensed under this + License, and if all works that were first published under this + License somewhere other than this MMC, and subsequently + incorporated in whole or in part into the MMC, (1) had no cover + texts or invariant sections, and (2) were thus incorporated prior + to November 1, 2008. + + The operator of an MMC Site may republish an MMC contained in the + site under CC-BY-SA on the same site at any time before August 1, + 2009, provided the MMC is eligible for relicensing. + + +ADDENDUM: How to use this License for your documents +==================================================== + + To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and license +notices just after the title page: + + Copyright (C) YEAR YOUR NAME. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. + + If you have Invariant Sections, Front-Cover Texts and Back-Cover +Texts, replace the "with...Texts." line with this: + + with the Invariant Sections being LIST THEIR TITLES, with + the Front-Cover Texts being LIST, and with the Back-Cover Texts + being LIST. + + If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + + If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, to +permit their use in free software. + + +File: libunistring.info, Node: Index, Prev: Licenses, Up: Top + +Index +***** + + +* Menu: + +* ambiguous width: uniwidth.h. (line 10) +* argument conventions: Conventions. (line 9) +* autoconf macro: Autoconf macro. (line 6) +* bidirectional category: Bidirectional category. + (line 6) +* bidirectional reordering: More functionality. (line 6) +* block: Blocks. (line 6) +* breaks, line: unilbrk.h. (line 6) +* breaks, word: uniwbrk.h. (line 6) +* bug reports: Reporting problems. (line 6) +* bug tracker: Reporting problems. (line 6) +* C string functions: char * strings. (line 6) +* C, programming language: ISO C and Java syntax. + (line 6) +* C-like API: Classifications like in ISO C. + (line 6) +* canonical combining class: Canonical combining class. + (line 6) +* case detection: Case detection. (line 6) +* case mappings: Case mappings of strings. + (line 6) +* casing_prefix_context_t: Case mappings of substrings. + (line 15) +* casing_suffix_context_t: Case mappings of substrings. + (line 46) +* char, type: char * strings. (line 23) +* combining, Unicode characters: Composition of characters. + (line 6) +* comparing <1>: Elementary string functions on NUL terminated strings. + (line 128) +* comparing: Elementary string functions. + (line 108) +* comparing, ignoring case: Case insensitive comparison. + (line 6) +* comparing, ignoring case, with collation rules: Case insensitive comparison. + (line 66) +* comparing, ignoring normalization: Normalizing comparisons. + (line 6) +* comparing, ignoring normalization and case: Case insensitive comparison. + (line 6) +* comparing, ignoring normalization and case, with collation rules: Case insensitive comparison. + (line 66) +* comparing, ignoring normalization, with collation rules: Normalizing comparisons. + (line 23) +* comparing, with collation rules: Elementary string functions on NUL terminated strings. + (line 140) +* comparing, with collation rules, ignoring case: Case insensitive comparison. + (line 66) +* comparing, with collation rules, ignoring normalization: Normalizing comparisons. + (line 23) +* comparing, with collation rules, ignoring normalization and case: Case insensitive comparison. + (line 66) +* compiler options: Compiler options. (line 24) +* composing, Unicode characters: Composition of characters. + (line 6) +* converting <1>: uniconv.h. (line 45) +* converting: Elementary string conversions. + (line 6) +* copying <1>: Elementary string functions on NUL terminated strings. + (line 61) +* copying: Elementary string functions. + (line 72) +* counting: Elementary string functions. + (line 153) +* decomposing: Decomposition of characters. + (line 6) +* dependencies: Installation. (line 6) +* detecting case: Case detection. (line 6) +* duplicating <1>: Elementary string functions on NUL terminated strings. + (line 166) +* duplicating: Elementary string functions with memory allocation. + (line 6) +* enum iconv_ilseq_handler: uniconv.h. (line 30) +* FDL, GNU Free Documentation License: GNU FDL. (line 6) +* formatted output: unistdio.h. (line 6) +* fullwidth: uniwidth.h. (line 22) +* general category: General category. (line 6) +* gl_LIBUNISTRING: Autoconf macro. (line 11) +* GPL, GNU General Public License: GNU GPL. (line 6) +* halfwidth: uniwidth.h. (line 22) +* identifiers: ISO C and Java syntax. + (line 6) +* installation: Installation. (line 10) +* internationalization: Unicode and i18n. (line 6) +* iterating <1>: Elementary string functions on NUL terminated strings. + (line 15) +* iterating: Elementary string functions. + (line 6) +* Java, programming language: ISO C and Java syntax. + (line 6) +* LGPL, GNU Lesser General Public License: GNU LGPL. (line 6) +* License, GNU FDL: GNU FDL. (line 6) +* License, GNU GPL: GNU GPL. (line 6) +* License, GNU LGPL: GNU LGPL. (line 6) +* Licenses: Licenses. (line 6) +* line breaks: unilbrk.h. (line 6) +* locale: Locale encodings. (line 6) +* locale categories: Locale encodings. (line 10) +* locale encoding <1>: uniconv.h. (line 10) +* locale encoding: Locale encodings. (line 28) +* locale language: Case mappings of strings. + (line 16) +* locale, multibyte: char * strings. (line 13) +* locale_charset: uniconv.h. (line 13) +* lowercasing: Case mappings of strings. + (line 6) +* mailing list: Reporting problems. (line 6) +* mirroring, of Unicode character: Mirrored character. (line 6) +* normal forms: uninorm.h. (line 6) +* normalizing: uninorm.h. (line 6) +* output, formatted: unistdio.h. (line 6) +* properties, of Unicode character: Properties. (line 6) +* regular expression: uniregex.h. (line 6) +* rendering: More functionality. (line 9) +* return value conventions: Conventions. (line 47) +* scripts: Scripts. (line 6) +* searching, for a character <1>: Elementary string functions on NUL terminated strings. + (line 176) +* searching, for a character: Elementary string functions. + (line 140) +* searching, for a substring: Elementary string functions on NUL terminated strings. + (line 232) +* stream, normalizing a: Normalization of streams. + (line 6) +* struct uninorm_filter: Normalization of streams. + (line 11) +* titlecasing: Case mappings of strings. + (line 6) +* u16_asnprintf: unistdio.h. (line 132) +* u16_asprintf: unistdio.h. (line 129) +* u16_casecmp: Case insensitive comparison. + (line 51) +* u16_casecoll: Case insensitive comparison. + (line 95) +* u16_casefold: Case insensitive comparison. + (line 15) +* u16_casexfrm: Case insensitive comparison. + (line 75) +* u16_casing_prefix_context: Case mappings of substrings. + (line 30) +* u16_casing_prefixes_context: Case mappings of substrings. + (line 39) +* u16_casing_suffix_context: Case mappings of substrings. + (line 61) +* u16_casing_suffixes_context: Case mappings of substrings. + (line 70) +* u16_check: Elementary string checks. + (line 11) +* u16_chr: Elementary string functions. + (line 145) +* u16_cmp: Elementary string functions. + (line 115) +* u16_cmp2: Elementary string functions. + (line 131) +* u16_conv_from_encoding: uniconv.h. (line 54) +* u16_conv_to_encoding: uniconv.h. (line 91) +* u16_cpy: Elementary string functions. + (line 78) +* u16_cpy_alloc: Elementary string functions with memory allocation. + (line 10) +* u16_ct_casefold: Case insensitive comparison. + (line 37) +* u16_ct_tolower: Case mappings of substrings. + (line 107) +* u16_ct_totitle: Case mappings of substrings. + (line 125) +* u16_ct_toupper: Case mappings of substrings. + (line 89) +* u16_endswith: Elementary string functions on NUL terminated strings. + (line 258) +* u16_is_cased: Case detection. (line 57) +* u16_is_casefolded: Case detection. (line 44) +* u16_is_lowercase: Case detection. (line 24) +* u16_is_titlecase: Case detection. (line 34) +* u16_is_uppercase: Case detection. (line 14) +* u16_mblen: Elementary string functions. + (line 11) +* u16_mbsnlen: Elementary string functions. + (line 157) +* u16_mbtouc: Elementary string functions. + (line 38) +* u16_mbtouc_unsafe: Elementary string functions. + (line 23) +* u16_mbtoucr: Elementary string functions. + (line 45) +* u16_move: Elementary string functions. + (line 89) +* u16_next: Elementary string functions on NUL terminated strings. + (line 24) +* u16_normalize: Normalization of strings. + (line 50) +* u16_normcmp: Normalizing comparisons. + (line 13) +* u16_normcoll: Normalizing comparisons. + (line 40) +* u16_normxfrm: Normalizing comparisons. + (line 27) +* u16_possible_linebreaks: unilbrk.h. (line 46) +* u16_prev: Elementary string functions on NUL terminated strings. + (line 36) +* u16_set: Elementary string functions. + (line 101) +* u16_snprintf: unistdio.h. (line 126) +* u16_sprintf: unistdio.h. (line 123) +* u16_startswith: Elementary string functions on NUL terminated strings. + (line 250) +* u16_stpcpy: Elementary string functions on NUL terminated strings. + (line 76) +* u16_stpncpy: Elementary string functions on NUL terminated strings. + (line 99) +* u16_strcat: Elementary string functions on NUL terminated strings. + (line 110) +* u16_strchr: Elementary string functions on NUL terminated strings. + (line 180) +* u16_strcmp: Elementary string functions on NUL terminated strings. + (line 132) +* u16_strcoll: Elementary string functions on NUL terminated strings. + (line 142) +* u16_strconv_from_encoding: uniconv.h. (line 129) +* u16_strconv_from_locale: uniconv.h. (line 157) +* u16_strconv_to_encoding: uniconv.h. (line 142) +* u16_strconv_to_locale: uniconv.h. (line 167) +* u16_strcpy: Elementary string functions on NUL terminated strings. + (line 66) +* u16_strcspn: Elementary string functions on NUL terminated strings. + (line 201) +* u16_strdup: Elementary string functions on NUL terminated strings. + (line 170) +* u16_strlen: Elementary string functions on NUL terminated strings. + (line 47) +* u16_strmblen: Elementary string functions on NUL terminated strings. + (line 11) +* u16_strmbtouc: Elementary string functions on NUL terminated strings. + (line 17) +* u16_strncat: Elementary string functions on NUL terminated strings. + (line 121) +* u16_strncmp: Elementary string functions on NUL terminated strings. + (line 159) +* u16_strncpy: Elementary string functions on NUL terminated strings. + (line 88) +* u16_strnlen: Elementary string functions on NUL terminated strings. + (line 55) +* u16_strpbrk: Elementary string functions on NUL terminated strings. + (line 225) +* u16_strrchr: Elementary string functions on NUL terminated strings. + (line 188) +* u16_strspn: Elementary string functions on NUL terminated strings. + (line 213) +* u16_strstr: Elementary string functions on NUL terminated strings. + (line 239) +* u16_strtok: Elementary string functions on NUL terminated strings. + (line 268) +* u16_strwidth: uniwidth.h. (line 39) +* u16_to_u32: Elementary string conversions. + (line 23) +* u16_to_u8: Elementary string conversions. + (line 19) +* u16_tolower: Case mappings of strings. + (line 44) +* u16_totitle: Case mappings of strings. + (line 58) +* u16_toupper: Case mappings of strings. + (line 30) +* u16_u16_asnprintf: unistdio.h. (line 159) +* u16_u16_asprintf: unistdio.h. (line 156) +* u16_u16_snprintf: unistdio.h. (line 153) +* u16_u16_sprintf: unistdio.h. (line 150) +* u16_u16_vasnprintf: unistdio.h. (line 171) +* u16_u16_vasprintf: unistdio.h. (line 168) +* u16_u16_vsnprintf: unistdio.h. (line 165) +* u16_u16_vsprintf: unistdio.h. (line 162) +* u16_uctomb: Elementary string functions. + (line 62) +* u16_vasnprintf: unistdio.h. (line 144) +* u16_vasprintf: unistdio.h. (line 141) +* u16_vsnprintf: unistdio.h. (line 138) +* u16_vsprintf: unistdio.h. (line 135) +* u16_width: uniwidth.h. (line 31) +* u16_width_linebreaks: unilbrk.h. (line 65) +* u16_wordbreaks: Word breaks in a string. + (line 10) +* u32_asnprintf: unistdio.h. (line 185) +* u32_asprintf: unistdio.h. (line 182) +* u32_casecmp: Case insensitive comparison. + (line 54) +* u32_casecoll: Case insensitive comparison. + (line 98) +* u32_casefold: Case insensitive comparison. + (line 18) +* u32_casexfrm: Case insensitive comparison. + (line 78) +* u32_casing_prefix_context: Case mappings of substrings. + (line 32) +* u32_casing_prefixes_context: Case mappings of substrings. + (line 42) +* u32_casing_suffix_context: Case mappings of substrings. + (line 63) +* u32_casing_suffixes_context: Case mappings of substrings. + (line 73) +* u32_check: Elementary string checks. + (line 12) +* u32_chr: Elementary string functions. + (line 147) +* u32_cmp: Elementary string functions. + (line 117) +* u32_cmp2: Elementary string functions. + (line 133) +* u32_conv_from_encoding: uniconv.h. (line 57) +* u32_conv_to_encoding: uniconv.h. (line 94) +* u32_cpy: Elementary string functions. + (line 80) +* u32_cpy_alloc: Elementary string functions with memory allocation. + (line 11) +* u32_ct_casefold: Case insensitive comparison. + (line 42) +* u32_ct_tolower: Case mappings of substrings. + (line 112) +* u32_ct_totitle: Case mappings of substrings. + (line 130) +* u32_ct_toupper: Case mappings of substrings. + (line 94) +* u32_endswith: Elementary string functions on NUL terminated strings. + (line 260) +* u32_is_cased: Case detection. (line 59) +* u32_is_casefolded: Case detection. (line 46) +* u32_is_lowercase: Case detection. (line 26) +* u32_is_titlecase: Case detection. (line 36) +* u32_is_uppercase: Case detection. (line 16) +* u32_mblen: Elementary string functions. + (line 12) +* u32_mbsnlen: Elementary string functions. + (line 158) +* u32_mbtouc: Elementary string functions. + (line 39) +* u32_mbtouc_unsafe: Elementary string functions. + (line 25) +* u32_mbtoucr: Elementary string functions. + (line 46) +* u32_move: Elementary string functions. + (line 91) +* u32_next: Elementary string functions on NUL terminated strings. + (line 25) +* u32_normalize: Normalization of strings. + (line 52) +* u32_normcmp: Normalizing comparisons. + (line 15) +* u32_normcoll: Normalizing comparisons. + (line 42) +* u32_normxfrm: Normalizing comparisons. + (line 29) +* u32_possible_linebreaks: unilbrk.h. (line 48) +* u32_prev: Elementary string functions on NUL terminated strings. + (line 38) +* u32_set: Elementary string functions. + (line 102) +* u32_snprintf: unistdio.h. (line 179) +* u32_sprintf: unistdio.h. (line 176) +* u32_startswith: Elementary string functions on NUL terminated strings. + (line 252) +* u32_stpcpy: Elementary string functions on NUL terminated strings. + (line 78) +* u32_stpncpy: Elementary string functions on NUL terminated strings. + (line 101) +* u32_strcat: Elementary string functions on NUL terminated strings. + (line 112) +* u32_strchr: Elementary string functions on NUL terminated strings. + (line 181) +* u32_strcmp: Elementary string functions on NUL terminated strings. + (line 133) +* u32_strcoll: Elementary string functions on NUL terminated strings. + (line 143) +* u32_strconv_from_encoding: uniconv.h. (line 131) +* u32_strconv_from_locale: uniconv.h. (line 158) +* u32_strconv_to_encoding: uniconv.h. (line 144) +* u32_strconv_to_locale: uniconv.h. (line 168) +* u32_strcpy: Elementary string functions on NUL terminated strings. + (line 68) +* u32_strcspn: Elementary string functions on NUL terminated strings. + (line 203) +* u32_strdup: Elementary string functions on NUL terminated strings. + (line 171) +* u32_strlen: Elementary string functions on NUL terminated strings. + (line 48) +* u32_strmblen: Elementary string functions on NUL terminated strings. + (line 12) +* u32_strmbtouc: Elementary string functions on NUL terminated strings. + (line 18) +* u32_strncat: Elementary string functions on NUL terminated strings. + (line 123) +* u32_strncmp: Elementary string functions on NUL terminated strings. + (line 161) +* u32_strncpy: Elementary string functions on NUL terminated strings. + (line 90) +* u32_strnlen: Elementary string functions on NUL terminated strings. + (line 56) +* u32_strpbrk: Elementary string functions on NUL terminated strings. + (line 227) +* u32_strrchr: Elementary string functions on NUL terminated strings. + (line 189) +* u32_strspn: Elementary string functions on NUL terminated strings. + (line 215) +* u32_strstr: Elementary string functions on NUL terminated strings. + (line 241) +* u32_strtok: Elementary string functions on NUL terminated strings. + (line 270) +* u32_strwidth: uniwidth.h. (line 40) +* u32_to_u16: Elementary string conversions. + (line 31) +* u32_to_u8: Elementary string conversions. + (line 27) +* u32_tolower: Case mappings of strings. + (line 47) +* u32_totitle: Case mappings of strings. + (line 61) +* u32_toupper: Case mappings of strings. + (line 33) +* u32_u32_asnprintf: unistdio.h. (line 212) +* u32_u32_asprintf: unistdio.h. (line 209) +* u32_u32_snprintf: unistdio.h. (line 206) +* u32_u32_sprintf: unistdio.h. (line 203) +* u32_u32_vasnprintf: unistdio.h. (line 224) +* u32_u32_vasprintf: unistdio.h. (line 221) +* u32_u32_vsnprintf: unistdio.h. (line 218) +* u32_u32_vsprintf: unistdio.h. (line 215) +* u32_uctomb: Elementary string functions. + (line 63) +* u32_vasnprintf: unistdio.h. (line 197) +* u32_vasprintf: unistdio.h. (line 194) +* u32_vsnprintf: unistdio.h. (line 191) +* u32_vsprintf: unistdio.h. (line 188) +* u32_width: uniwidth.h. (line 33) +* u32_width_linebreaks: unilbrk.h. (line 68) +* u32_wordbreaks: Word breaks in a string. + (line 11) +* u8_asnprintf: unistdio.h. (line 79) +* u8_asprintf: unistdio.h. (line 76) +* u8_casecmp: Case insensitive comparison. + (line 48) +* u8_casecoll: Case insensitive comparison. + (line 92) +* u8_casefold: Case insensitive comparison. + (line 12) +* u8_casexfrm: Case insensitive comparison. + (line 72) +* u8_casing_prefix_context: Case mappings of substrings. + (line 28) +* u8_casing_prefixes_context: Case mappings of substrings. + (line 36) +* u8_casing_suffix_context: Case mappings of substrings. + (line 59) +* u8_casing_suffixes_context: Case mappings of substrings. + (line 67) +* u8_check: Elementary string checks. + (line 10) +* u8_chr: Elementary string functions. + (line 143) +* u8_cmp: Elementary string functions. + (line 113) +* u8_cmp2: Elementary string functions. + (line 129) +* u8_conv_from_encoding: uniconv.h. (line 51) +* u8_conv_to_encoding: uniconv.h. (line 88) +* u8_cpy: Elementary string functions. + (line 76) +* u8_cpy_alloc: Elementary string functions with memory allocation. + (line 9) +* u8_ct_casefold: Case insensitive comparison. + (line 32) +* u8_ct_tolower: Case mappings of substrings. + (line 102) +* u8_ct_totitle: Case mappings of substrings. + (line 120) +* u8_ct_toupper: Case mappings of substrings. + (line 84) +* u8_endswith: Elementary string functions on NUL terminated strings. + (line 256) +* u8_is_cased: Case detection. (line 55) +* u8_is_casefolded: Case detection. (line 42) +* u8_is_lowercase: Case detection. (line 22) +* u8_is_titlecase: Case detection. (line 32) +* u8_is_uppercase: Case detection. (line 12) +* u8_mblen: Elementary string functions. + (line 10) +* u8_mbsnlen: Elementary string functions. + (line 156) +* u8_mbtouc: Elementary string functions. + (line 37) +* u8_mbtouc_unsafe: Elementary string functions. + (line 21) +* u8_mbtoucr: Elementary string functions. + (line 44) +* u8_move: Elementary string functions. + (line 87) +* u8_next: Elementary string functions on NUL terminated strings. + (line 23) +* u8_normalize: Normalization of strings. + (line 48) +* u8_normcmp: Normalizing comparisons. + (line 11) +* u8_normcoll: Normalizing comparisons. + (line 38) +* u8_normxfrm: Normalizing comparisons. + (line 25) +* u8_possible_linebreaks: unilbrk.h. (line 44) +* u8_prev: Elementary string functions on NUL terminated strings. + (line 34) +* u8_set: Elementary string functions. + (line 100) +* u8_snprintf: unistdio.h. (line 73) +* u8_sprintf: unistdio.h. (line 70) +* u8_startswith: Elementary string functions on NUL terminated strings. + (line 248) +* u8_stpcpy: Elementary string functions on NUL terminated strings. + (line 74) +* u8_stpncpy: Elementary string functions on NUL terminated strings. + (line 97) +* u8_strcat: Elementary string functions on NUL terminated strings. + (line 108) +* u8_strchr: Elementary string functions on NUL terminated strings. + (line 179) +* u8_strcmp: Elementary string functions on NUL terminated strings. + (line 131) +* u8_strcoll: Elementary string functions on NUL terminated strings. + (line 141) +* u8_strconv_from_encoding: uniconv.h. (line 127) +* u8_strconv_from_locale: uniconv.h. (line 156) +* u8_strconv_to_encoding: uniconv.h. (line 140) +* u8_strconv_to_locale: uniconv.h. (line 166) +* u8_strcpy: Elementary string functions on NUL terminated strings. + (line 64) +* u8_strcspn: Elementary string functions on NUL terminated strings. + (line 199) +* u8_strdup: Elementary string functions on NUL terminated strings. + (line 169) +* u8_strlen: Elementary string functions on NUL terminated strings. + (line 46) +* u8_strmblen: Elementary string functions on NUL terminated strings. + (line 10) +* u8_strmbtouc: Elementary string functions on NUL terminated strings. + (line 16) +* u8_strncat: Elementary string functions on NUL terminated strings. + (line 119) +* u8_strncmp: Elementary string functions on NUL terminated strings. + (line 157) +* u8_strncpy: Elementary string functions on NUL terminated strings. + (line 86) +* u8_strnlen: Elementary string functions on NUL terminated strings. + (line 54) +* u8_strpbrk: Elementary string functions on NUL terminated strings. + (line 223) +* u8_strrchr: Elementary string functions on NUL terminated strings. + (line 187) +* u8_strspn: Elementary string functions on NUL terminated strings. + (line 211) +* u8_strstr: Elementary string functions on NUL terminated strings. + (line 237) +* u8_strtok: Elementary string functions on NUL terminated strings. + (line 266) +* u8_strwidth: uniwidth.h. (line 38) +* u8_to_u16: Elementary string conversions. + (line 11) +* u8_to_u32: Elementary string conversions. + (line 15) +* u8_tolower: Case mappings of strings. + (line 41) +* u8_totitle: Case mappings of strings. + (line 55) +* u8_toupper: Case mappings of strings. + (line 27) +* u8_u8_asnprintf: unistdio.h. (line 106) +* u8_u8_asprintf: unistdio.h. (line 103) +* u8_u8_snprintf: unistdio.h. (line 100) +* u8_u8_sprintf: unistdio.h. (line 97) +* u8_u8_vasnprintf: unistdio.h. (line 118) +* u8_u8_vasprintf: unistdio.h. (line 115) +* u8_u8_vsnprintf: unistdio.h. (line 112) +* u8_u8_vsprintf: unistdio.h. (line 109) +* u8_uctomb: Elementary string functions. + (line 61) +* u8_vasnprintf: unistdio.h. (line 91) +* u8_vasprintf: unistdio.h. (line 88) +* u8_vsnprintf: unistdio.h. (line 85) +* u8_vsprintf: unistdio.h. (line 82) +* u8_width: uniwidth.h. (line 29) +* u8_width_linebreaks: unilbrk.h. (line 62) +* u8_wordbreaks: Word breaks in a string. + (line 9) +* uc_all_blocks: Blocks. (line 38) +* uc_all_scripts: Scripts. (line 37) +* uc_bidi_category: Bidirectional category. + (line 88) +* uc_bidi_category_byname: Bidirectional category. + (line 82) +* uc_bidi_category_name: Bidirectional category. + (line 79) +* uc_block: Blocks. (line 27) +* uc_block_t: Blocks. (line 12) +* uc_c_ident_category: ISO C and Java syntax. + (line 39) +* uc_canonical_decomposition: Decomposition of characters. + (line 92) +* uc_combining_class: Canonical combining class. + (line 89) +* uc_composition: Composition of characters. + (line 10) +* uc_decimal_value: Decimal digit value. (line 11) +* uc_decomposition: Decomposition of characters. + (line 82) +* uc_digit_value: Digit value. (line 11) +* uc_fraction_t: Numeric value. (line 14) +* uc_general_category: Object oriented API. (line 207) +* uc_general_category_and: Object oriented API. (line 179) +* uc_general_category_and_not: Object oriented API. (line 186) +* uc_general_category_byname: Object oriented API. (line 201) +* uc_general_category_name: Object oriented API. (line 195) +* uc_general_category_or: Object oriented API. (line 173) +* uc_general_category_t: Object oriented API. (line 7) +* uc_is_alnum: Classifications like in ISO C. + (line 14) +* uc_is_alpha: Classifications like in ISO C. + (line 18) +* uc_is_bidi_category: Bidirectional category. + (line 91) +* uc_is_blank: Classifications like in ISO C. + (line 64) +* uc_is_block: Blocks. (line 32) +* uc_is_c_whitespace: ISO C and Java syntax. + (line 10) +* uc_is_cntrl: Classifications like in ISO C. + (line 24) +* uc_is_digit: Classifications like in ISO C. + (line 27) +* uc_is_general_category: Object oriented API. (line 213) +* uc_is_general_category_withtable: Bit mask API. (line 52) +* uc_is_graph: Classifications like in ISO C. + (line 31) +* uc_is_java_whitespace: ISO C and Java syntax. + (line 14) +* uc_is_lower: Classifications like in ISO C. + (line 35) +* uc_is_print: Classifications like in ISO C. + (line 41) +* uc_is_property: Properties as objects. + (line 140) +* uc_is_property_alphabetic: Properties as functions. + (line 10) +* uc_is_property_ascii_hex_digit: Properties as functions. + (line 74) +* uc_is_property_bidi_arabic_digit: Properties as functions. + (line 60) +* uc_is_property_bidi_arabic_right_to_left: Properties as functions. + (line 56) +* uc_is_property_bidi_block_separator: Properties as functions. + (line 62) +* uc_is_property_bidi_boundary_neutral: Properties as functions. + (line 66) +* uc_is_property_bidi_common_separator: Properties as functions. + (line 61) +* uc_is_property_bidi_control: Properties as functions. + (line 53) +* uc_is_property_bidi_embedding_or_override: Properties as functions. + (line 68) +* uc_is_property_bidi_eur_num_separator: Properties as functions. + (line 58) +* uc_is_property_bidi_eur_num_terminator: Properties as functions. + (line 59) +* uc_is_property_bidi_european_digit: Properties as functions. + (line 57) +* uc_is_property_bidi_hebrew_right_to_left: Properties as functions. + (line 55) +* uc_is_property_bidi_left_to_right: Properties as functions. + (line 54) +* uc_is_property_bidi_non_spacing_mark: Properties as functions. + (line 65) +* uc_is_property_bidi_other_neutral: Properties as functions. + (line 69) +* uc_is_property_bidi_pdf: Properties as functions. + (line 67) +* uc_is_property_bidi_segment_separator: Properties as functions. + (line 63) +* uc_is_property_bidi_whitespace: Properties as functions. + (line 64) +* uc_is_property_combining: Properties as functions. + (line 104) +* uc_is_property_composite: Properties as functions. + (line 105) +* uc_is_property_currency_symbol: Properties as functions. + (line 99) +* uc_is_property_dash: Properties as functions. + (line 91) +* uc_is_property_decimal_digit: Properties as functions. + (line 106) +* uc_is_property_default_ignorable_code_point: Properties as functions. + (line 14) +* uc_is_property_deprecated: Properties as functions. + (line 17) +* uc_is_property_diacritic: Properties as functions. + (line 108) +* uc_is_property_extender: Properties as functions. + (line 109) +* uc_is_property_format_control: Properties as functions. + (line 90) +* uc_is_property_grapheme_base: Properties as functions. + (line 46) +* uc_is_property_grapheme_extend: Properties as functions. + (line 47) +* uc_is_property_grapheme_link: Properties as functions. + (line 49) +* uc_is_property_hex_digit: Properties as functions. + (line 73) +* uc_is_property_hyphen: Properties as functions. + (line 92) +* uc_is_property_id_continue: Properties as functions. + (line 36) +* uc_is_property_id_start: Properties as functions. + (line 34) +* uc_is_property_ideographic: Properties as functions. + (line 78) +* uc_is_property_ids_binary_operator: Properties as functions. + (line 81) +* uc_is_property_ids_trinary_operator: Properties as functions. + (line 82) +* uc_is_property_ignorable_control: Properties as functions. + (line 110) +* uc_is_property_iso_control: Properties as functions. + (line 89) +* uc_is_property_join_control: Properties as functions. + (line 45) +* uc_is_property_left_of_pair: Properties as functions. + (line 103) +* uc_is_property_line_separator: Properties as functions. + (line 94) +* uc_is_property_logical_order_exception: Properties as functions. + (line 18) +* uc_is_property_lowercase: Properties as functions. + (line 27) +* uc_is_property_math: Properties as functions. + (line 100) +* uc_is_property_non_break: Properties as functions. + (line 88) +* uc_is_property_not_a_character: Properties as functions. + (line 12) +* uc_is_property_numeric: Properties as functions. + (line 107) +* uc_is_property_other_alphabetic: Properties as functions. + (line 11) +* uc_is_property_other_default_ignorable_code_point: Properties as functions. + (line 16) +* uc_is_property_other_grapheme_extend: Properties as functions. + (line 48) +* uc_is_property_other_id_continue: Properties as functions. + (line 37) +* uc_is_property_other_id_start: Properties as functions. + (line 35) +* uc_is_property_other_lowercase: Properties as functions. + (line 28) +* uc_is_property_other_math: Properties as functions. + (line 101) +* uc_is_property_other_uppercase: Properties as functions. + (line 26) +* uc_is_property_paired_punctuation: Properties as functions. + (line 102) +* uc_is_property_paragraph_separator: Properties as functions. + (line 95) +* uc_is_property_pattern_syntax: Properties as functions. + (line 41) +* uc_is_property_pattern_white_space: Properties as functions. + (line 40) +* uc_is_property_private_use: Properties as functions. + (line 20) +* uc_is_property_punctuation: Properties as functions. + (line 93) +* uc_is_property_quotation_mark: Properties as functions. + (line 96) +* uc_is_property_radical: Properties as functions. + (line 80) +* uc_is_property_sentence_terminal: Properties as functions. + (line 97) +* uc_is_property_soft_dotted: Properties as functions. + (line 30) +* uc_is_property_space: Properties as functions. + (line 87) +* uc_is_property_terminal_punctuation: Properties as functions. + (line 98) +* uc_is_property_titlecase: Properties as functions. + (line 29) +* uc_is_property_unassigned_code_value: Properties as functions. + (line 21) +* uc_is_property_unified_ideograph: Properties as functions. + (line 79) +* uc_is_property_uppercase: Properties as functions. + (line 25) +* uc_is_property_variation_selector: Properties as functions. + (line 19) +* uc_is_property_white_space: Properties as functions. + (line 9) +* uc_is_property_xid_continue: Properties as functions. + (line 39) +* uc_is_property_xid_start: Properties as functions. + (line 38) +* uc_is_property_zero_width: Properties as functions. + (line 86) +* uc_is_punct: Classifications like in ISO C. + (line 44) +* uc_is_script: Scripts. (line 31) +* uc_is_space: Classifications like in ISO C. + (line 49) +* uc_is_upper: Classifications like in ISO C. + (line 54) +* uc_is_xdigit: Classifications like in ISO C. + (line 60) +* uc_java_ident_category: ISO C and Java syntax. + (line 43) +* uc_locale_language: Case mappings of strings. + (line 21) +* uc_mirror_char: Mirrored character. (line 14) +* uc_numeric_value: Numeric value. (line 23) +* uc_property_byname: Properties as objects. + (line 123) +* uc_property_is_valid: Properties as objects. + (line 133) +* uc_property_t: Properties as objects. + (line 9) +* uc_script: Scripts. (line 20) +* uc_script_byname: Scripts. (line 25) +* uc_script_t: Scripts. (line 11) +* uc_tolower: Case mappings of characters. + (line 20) +* uc_totitle: Case mappings of characters. + (line 23) +* uc_toupper: Case mappings of characters. + (line 17) +* uc_width: uniwidth.h. (line 23) +* uc_wordbreak_property: Word break property. (line 32) +* UCS-4: Unicode. (line 14) +* ucs4_t: unitypes.h. (line 16) +* uint16_t: unitypes.h. (line 10) +* uint32_t: unitypes.h. (line 11) +* uint8_t: unitypes.h. (line 9) +* ulc_asnprintf: unistdio.h. (line 53) +* ulc_asprintf: unistdio.h. (line 50) +* ulc_casecmp: Case insensitive comparison. + (line 57) +* ulc_casecoll: Case insensitive comparison. + (line 101) +* ulc_casexfrm: Case insensitive comparison. + (line 81) +* ulc_fprintf: unistdio.h. (line 229) +* ulc_possible_linebreaks: unilbrk.h. (line 50) +* ulc_snprintf: unistdio.h. (line 48) +* ulc_sprintf: unistdio.h. (line 45) +* ulc_vasnprintf: unistdio.h. (line 65) +* ulc_vasprintf: unistdio.h. (line 62) +* ulc_vfprintf: unistdio.h. (line 232) +* ulc_vsnprintf: unistdio.h. (line 59) +* ulc_vsprintf: unistdio.h. (line 56) +* ulc_width_linebreaks: unilbrk.h. (line 71) +* ulc_wordbreaks: Word breaks in a string. + (line 12) +* Unicode: Unicode. (line 6) +* Unicode character, bidirectional category: Bidirectional category. + (line 6) +* Unicode character, block: Blocks. (line 24) +* Unicode character, canonical combining class: Canonical combining class. + (line 6) +* Unicode character, case mappings: Case mappings of characters. + (line 6) +* Unicode character, classification: General category. (line 6) +* Unicode character, classification like in C: Classifications like in ISO C. + (line 6) +* Unicode character, general category: General category. (line 6) +* Unicode character, mirroring: Mirrored character. (line 6) +* Unicode character, name: uniname.h. (line 6) +* Unicode character, properties: Properties. (line 6) +* Unicode character, script: Scripts. (line 17) +* Unicode character, validity in C identifiers: ISO C and Java syntax. + (line 38) +* Unicode character, validity in Java identifiers: ISO C and Java syntax. + (line 42) +* Unicode character, value <1>: Numeric value. (line 6) +* Unicode character, value <2>: Digit value. (line 6) +* Unicode character, value: Decimal digit value. (line 6) +* Unicode character, width: uniwidth.h. (line 22) +* unicode_character_name: uniname.h. (line 19) +* unicode_name_character: uniname.h. (line 25) +* uninorm_decomposing_form: Normalization of strings. + (line 40) +* uninorm_filter_create: Normalization of streams. + (line 19) +* uninorm_filter_flush: Normalization of streams. + (line 33) +* uninorm_filter_free: Normalization of streams. + (line 43) +* uninorm_filter_write: Normalization of streams. + (line 29) +* uninorm_is_compat_decomposing: Normalization of strings. + (line 32) +* uninorm_is_composing: Normalization of strings. + (line 36) +* uninorm_t: Normalization of strings. + (line 10) +* uppercasing: Case mappings of strings. + (line 6) +* use cases: Introduction. (line 44) +* UTF-16: Unicode. (line 14) +* UTF-16, strings: Unicode strings. (line 6) +* UTF-32: Unicode. (line 14) +* UTF-32, strings: Unicode strings. (line 6) +* UTF-8: Unicode. (line 14) +* UTF-8, strings: Unicode strings. (line 6) +* validity: Elementary string checks. + (line 6) +* value, of libunistring: Introduction. (line 44) +* value, of Unicode character <1>: Numeric value. (line 6) +* value, of Unicode character <2>: Digit value. (line 6) +* value, of Unicode character: Decimal digit value. (line 6) +* verification: Elementary string checks. + (line 6) +* wchar_t, type: The wchar_t mess. (line 6) +* width: uniwidth.h. (line 6) +* word breaks: uniwbrk.h. (line 6) +* wrapping: unilbrk.h. (line 6) + + + +Tag Table: +Node: Top270 +Node: Introduction3239 +Node: Unicode5236 +Node: Unicode and i18n7116 +Node: Locale encodings8579 +Node: In-memory representation10787 +Node: char * strings11896 +Node: The wchar_t mess17153 +Node: Unicode strings19357 +Node: Conventions20508 +Node: unitypes.h22708 +Node: unistr.h23280 +Node: Elementary string checks23837 +Node: Elementary string conversions24459 +Node: Elementary string functions25761 +Node: Elementary string functions with memory allocation32732 +Node: Elementary string functions on NUL terminated strings33354 +Node: uniconv.h45090 +Node: unistdio.h52801 +Node: uniname.h61004 +Node: unictype.h62337 +Node: General category63246 +Node: Object oriented API64289 +Node: Bit mask API72751 +Node: Canonical combining class75005 +Node: Bidirectional category78219 +Node: Decimal digit value81276 +Node: Digit value81837 +Node: Numeric value82398 +Node: Mirrored character83289 +Node: Properties83962 +Node: Properties as objects84653 +Node: Properties as functions91031 +Node: Scripts96582 +Node: Blocks97968 +Node: ISO C and Java syntax99291 +Node: Classifications like in ISO C101001 +Node: uniwidth.h103705 +Node: uniwbrk.h105742 +Node: Word breaks in a string106269 +Node: Word break property107320 +Node: unilbrk.h108416 +Node: uninorm.h112587 +Node: Decomposition of characters113219 +Node: Composition of characters116595 +Node: Normalization of strings117304 +Node: Normalizing comparisons119366 +Node: Normalization of streams121722 +Node: unicase.h123810 +Node: Case mappings of characters124495 +Node: Case mappings of strings126542 +Node: Case mappings of substrings129875 +Node: Case insensitive comparison136805 +Node: Case detection142156 +Node: uniregex.h145424 +Node: Using the library145647 +Node: Installation146058 +Node: Compiler options146531 +Node: Include files148090 +Node: Autoconf macro149314 +Node: Reporting problems150872 +Node: More functionality151669 +Node: Licenses152112 +Node: GNU GPL153747 +Node: GNU LGPL191292 +Node: GNU FDL199738 +Node: Index224863 + +End Tag Table + + +Local Variables: +coding: utf-8 +End: diff --git a/doc/libunistring.texi b/doc/libunistring.texi new file mode 100644 index 00000000..8eb8061f --- /dev/null +++ b/doc/libunistring.texi @@ -0,0 +1,989 @@ +\input texinfo @c -*-texinfo-*- +@comment %**start of header +@setfilename libunistring.info +@documentencoding UTF-8 +@settitle GNU libunistring +@finalout +@c Indices: +@c am = autoconf macro @amindex +@c cp = concept @cindex +@c fn = function @findex +@c tp = type @tindex +@c Unused predefined indices: +@c ky = keystroke @kindex +@c pg = program @pindex +@c vr = variable @vindex +@defcodeindex am +@syncodeindex am cp +@syncodeindex fn cp +@syncodeindex tp cp +@ifclear texi2html +@firstparagraphindent insert +@end ifclear +@c texi2html-1.76 does not support @arrow{}. +@ifset texi2html +@macro arrow{} +→ +@end macro +@end ifset +@comment %**end of header + +@include version.texi + +@c Location of the POSIX specification on the web. +@set POSIXURL http://www.opengroup.org/onlinepubs/9699919799 + +@c Macro for referencing a POSIX function. +@c We don't write it as func(), see section "GNU Manuals" of the +@c GNU coding standards. +@ifinfo +@macro posixfunc{func} +@code{\func\} +@end macro +@end ifinfo +@ifnotinfo +@macro posixfunc{func} +@uref{@value{POSIXURL}/functions/\func\.html,,@code{\func\}} +@end macro +@end ifnotinfo + +@c Macro for referencing a normal function. +@c We don't write it as func(), see section "GNU Manuals" of the +@c GNU coding standards. +@macro func{func} +@code{\func\} +@end macro + +@c Macro for an advisory ragged line break in TeX mode. +@c Needed because there are long unbreakable pieces of text (such as URLs or +@c formulas), TeX is too shy to move them to a new line. TeX considers only +@c two choices: a line break in aligned mode (which it rejects due to aesthetic +@c reasons) and writing into the margin. What we want in many cases is a line +@c break without filling the first line. Like what @* delivers. But we want it +@c only when needed, so that it disappears when unrelated changes in the same +@c paragraph cause a line break in a nearby position. And we need it only in +@c TeX mode. info and HTML modes are fine. +@c This trick is from Karl Berry. +@iftex +@macro texnl +@hfil@penalty9000@hfilneg +@end macro +@end iftex +@ifnottex +@macro texnl +@end macro +@end ifnottex + +@ifinfo +@dircategory Software development +@direntry +* GNU libunistring: (libunistring). Unicode string library. +@end direntry +@end ifinfo + +@ifinfo +This manual is for GNU libunistring. + +@ignore +@c This was: @copying but it triggers a makeinfo 4.13 bug +Copyright (C) 2001-2009 Free Software Foundation, Inc. + +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. + +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in @ref{GNU FDL}. + +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in @ref{GNU GPL}. +@end ignore +@end ifinfo + +@titlepage +@title GNU libunistring, version @value{VERSION} +@subtitle updated @value{UPDATED} +@subtitle Edition @value{EDITION}, @value{UPDATED} +@author Bruno Haible + +@ifnothtml +@page +@vskip 0pt plus 1filll +@c @insertcopying +Copyright (C) 2001-2009 Free Software Foundation, Inc. + +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. + +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in @ref{GNU FDL}. + +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in @ref{GNU GPL}. +@end ifnothtml +@end titlepage + +@c Table of Contents +@contents + +@ifnottex +@node Top +@top GNU libunistring +@end ifnottex + +@menu +* Introduction:: Who may need Unicode strings? +* Conventions:: Conventions used in this manual +* unitypes.h:: Elementary types +* unistr.h:: Elementary Unicode string functions +* uniconv.h:: Conversions between Unicode and encodings +* unistdio.h:: Output with Unicode strings +* uniname.h:: Names of Unicode characters +* unictype.h:: Unicode character classification and properties +* uniwidth.h:: Display width +* uniwbrk.h:: Word breaks in strings +* unilbrk.h:: Line breaking +* uninorm.h:: Normalization forms +* unicase.h:: Case mappings +* uniregex.h:: Regular expressions +* Using the library:: How to link with the library and use it? +* More functionality:: More advanced functionality +* Licenses:: Licenses + +* Index:: General Index + +@detailmenu + --- The Detailed Node Listing --- + +Introduction + +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with @code{char *} strings +* The wchar_t mess:: Why @code{wchar_t *} strings are useless +* Unicode strings:: How are Unicode strings represented? + +unistr.h + +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: + +unictype.h + +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: + +General category + +* Object oriented API:: +* Bit mask API:: + +Properties + +* Properties as objects:: +* Properties as functions:: + +uniwbrk.h + +* Word breaks in a string:: +* Word break property:: + +uninorm.h + +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: + +unicase,h + +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: + +Using the library + +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: + +Licenses + +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License + +@end detailmenu +@end menu + +@node Introduction +@chapter Introduction + +This library provides functions for manipulating Unicode strings and +for manipulating C strings according to the Unicode standard. + +It consists of the following parts: + +@table @code +@item <unistr.h> +elementary string functions +@item <uniconv.h> +conversion from/to legacy encodings +@item <unistdio.h> +formatted output to strings +@item <uniname.h> +character names +@item <unictype.h> +character classification and properties +@item <uniwidth.h> +string width when using nonproportional fonts +@item <uniwbrk.h> +word breaks +@item <unilbrk.h> +line breaking algorithm +@item <uninorm.h> +normalization (composition and decomposition) +@item <unicase.h> +case folding +@item <uniregex.h> +regular expressions (not yet implemented) +@end table + +@cindex use cases +@cindex value, of libunistring +libunistring is for you if your application involves non-trivial text +processing, such as upper/lower case conversions, line breaking, operations +on words, or more advanced analysis of text. Text provided by the user can, +in general, contain characters of all kinds of scripts. The text processing +functions provided by this library handle all scripts and all languages. + +libunistring is for you if your application already uses the ISO C / POSIX +@code{<ctype.h>}, @code{<wctype.h>} functions and the text it operates on is +provided by the user and can be in any language. + +libunistring is also for you if your application uses Unicode strings as +internal in-memory representation. + +@menu +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with @code{char *} strings +* The wchar_t mess:: Why @code{wchar_t *} strings are useless +* Unicode strings:: How are Unicode strings represented? +@end menu + +@node Unicode +@section Unicode + +@cindex Unicode +Unicode is a standardized repertoire of characters that contains characters +from all scripts of the world, from Latin letters to Chinese ideographs +and Babylonian cuneiform glyphs. It also specifies how these characters +are to be rendered on a screen or on paper, and how common text processing +(word selection, line breaking, uppercasing of page titles etc.) is supposed +to behave on Unicode text. + +Unicode also specifies three ways of storing sequences of Unicode +characters in a computer whose basic unit of data is an 8-bit byte: +@cindex UTF-8 +@cindex UTF-16 +@cindex UTF-32 +@cindex UCS-4 +@table @asis +@item UTF-8 +Every character is represented as 1 to 4 bytes. +@item UTF-16 +Every character is represented as 1 to 2 units of 16 bits. +@item UTF-32, a.k.a@. UCS-4 +Every character is represented as 1 unit of 32 bits. +@end table + +For encoding Unicode text in a file, UTF-8 is usually used. For encoding +Unicode strings in memory for a program, either of the three encoding forms +can be reasonably used. + +Unicode is widely used on the web. Prior to the use of Unicode, web pages +were in many different encodings (ISO-8859-1 for English, French, Spanish, +ISO-8859-2 for Polish, ISO-8859-7 for Greek, KOI8-R for Russian, GB2312 or +BIG5 for Chinese, ISO-2022-JP-2 or EUC-JP or Shift_JIS for Japanese, and many +many others). It was next to impossible to create a document that contained +Chinese and Polish text in the same document. Due to the many encodings for +Japanese, even the processing of pure Japanese text was error prone. + +References: +@itemize @bullet +@item +The Unicode standard:@texnl{} @url{http://www.unicode.org/} +@item +Definition of UTF-8:@texnl{} @url{http://www.rfc-editor.org/rfc/rfc3629.txt} +@item +Definition of UTF-16:@texnl{} @url{http://www.rfc-editor.org/rfc/rfc2781.txt} +@item +Markus Kuhn's UTF-8 and Unicode FAQ:@texnl{} +@url{http://www.cl.cam.ac.uk/~mgk25/unicode.html} +@end itemize + +@node Unicode and i18n +@section Unicode and Internationalization + +@cindex internationalization +Internationalization is the process of changing the source code of a program +so that it can meet the expectations of users in any culture, if culture +specific data (translations, images etc.) are provided. + +Use of Unicode is not strictly required for internationalization, but it +makes internationalization much easier, because operations that need to +look at specific characters (like hyphenation, spell checking, or the +automatic conversion of double-quotes to opening and closing double-quote +characters) don't need to consider multiple possible encodings of the text. + +Use of Unicode also enables multilingualization: the ability of having text +in multiple languages present in the same document or even in the same line +of text. + +But use of Unicode is not everything. Internationalization usually consists +of three features: +@itemize @bullet +@item +Use of Unicode where needed for text processing. This is what this library +is for. +@item +Use of message catalogs for messages shown to the user, This is what +GNU gettext is about. +@item +Use of locale specific conventions for date and time formats, for numeric +formatting, or for sorting of text. This can be done adequately with the +POSIX APIs and the implementation of locales in the GNU C library. +@end itemize + +@node Locale encodings +@section Locale encodings + +@cindex locale +A locale is a set of cultural conventions. According to POSIX, for a program, +at any moment, there is one locale being designated as the ``current locale''. +(Actually, POSIX supports also one locale per thread, but this feature is not +yet universally implemented and not widely used.) +@cindex locale categories +The locale is partitioned into several aspects, called the ``categories'' +of the locale. The main various aspects are: +@itemize +@item +The character encoding and the character properties. This is the +@code{LC_CTYPE} category. +@item +The sorting rules for text. This is the @code{LC_COLLATE} category. +@item +The language specific translations of messages. This is the +@code{LC_MESSAGES} category. +@item +The formatting rules for numbers, such as the decimal separator. This is +the @code{LC_NUMERIC} category. +@item +The formatting rules for amounts of money. This is the @code{LC_MONETARY} +category. +@item +The formatting of date and time. This is the @code{LC_TIME} category. +@end itemize + +@cindex locale encoding +In particular, the @code{LC_CTYPE} category of the current locale determines +the character encoding. This is the encoding of @samp{char *} strings. +We also call it the ``locale encoding''. GNU libunistring has a function, +@func{locale_charset}, that returns a standardized (platform independent) +name for this encoding. + +All locale encodings used on glibc systems are essentially ASCII compatible: +Most graphic ASCII characters have the same representation, as a single byte, +in that encoding as in ASCII. + +Among the possible locale encodings are UTF-8 and GB18030. Both allow +to represent any Unicode character as a sequence of bytes. UTF-8 is used in +most of the world, whereas GB18030 is used in the People's Republic of China, +because it is backward compatible with the GB2312 encoding that was used in +this country earlier. + +The legacy locale encodings, ISO-8859-15 (which supplanted ISO-8859-1 in +most of Europe), ISO-8859-2, KOI8-R, EUC-JP, etc., are still in use in +many places, though. + +UTF-16 and UTF-32 are not used as locale encodings, because they are not +ASCII compatible. + +@node In-memory representation +@section Choice of in-memory representation of strings + +There are three ways of representing strings in memory of a running +program. +@itemize +@item +As @samp{char *} strings. Such strings are represented in locale encoding. +This approach is employed when not much text processing is done by the +program. When some Unicode aware processing is to be done, a string is +converted to Unicode on the fly and back to locale encoding afterwards. +@item +As UTF-8 or UTF-16 or UTF-32 strings. This implies that conversion from +locale encoding to Unicode is performed on input, and in the opposite +direction on output. This approach is employed when the program does +a significant amount of text processing, or when the program has multiple +threads operating on the same data but in different locales. +@item +As @samp{wchar_t *}, a.k.a@. ``wide strings''. This approach is misguided, +see @ref{The wchar_t mess}. +@end itemize + +@node char * strings +@section @samp{char *} strings + +@cindex C string functions +The classical C strings, with its C library support standardized by +ISO C and POSIX, can be used in internationalized programs with some +precautions. The problem with this API is that many of the C library +functions for strings don't work correctly on strings in locale +encodings, leading to bugs that only people in some cultures of the +world will experience. + +@cindex locale, multibyte +The first problem with the C library API is the support of multibyte +locales. According to the locale encoding, in general, every character +is represented by one or more bytes (up to 4 bytes in practice --- but +use @code{MB_LEN_MAX} instead of the number 4 in the code). +When every character is represented by only 1 byte, we speak of an +``unibyte locale'', otherwise of a ``multibyte locale''. It is important +to realize that the majority of Unix installations nowadays use UTF-8 +or GB18030 as locale encoding; therefore, the majority of users are +using multibyte locales. + +@cindex char, type +The important fact to remember is: +@cartouche +@emph{A @samp{char} is a byte, not a character.} +@end cartouche + +As a consequence: +@itemize +@item +The @code{<ctype.h>} API is useless in this context; it does not work in +multibyte locales. +@item +The @posixfunc{strlen} function does not return the number of characters +in a string. Nor does it return the number of screen columns occupied +by a string after it is output. It merely returns the number of +@emph{bytes} occupied by a string. +@item +Truncating a string, for example, with @posixfunc{strncpy}, can have the +effect of truncating it in the middle of a multibyte character. Such +a string will, when output, have a garbled character at its end, often +represented by a hollow box. +@item +@posixfunc{strchr} and @posixfunc{strrchr} do not work with multibyte strings +if the locale encoding is GB18030 and the character to be searched is +a digit. +@item +@posixfunc{strstr} does not work with multibyte strings if the locale encoding +is different from UTF-8. +@item +@posixfunc{strcspn}, @posixfunc{strpbrk}, @posixfunc{strspn} cannot work +correctly in multibyte locales: they assume the second argument is a list of +single-byte characters. Even in this simple case, they do not work with +multibyte strings if the locale encoding is GB18030 and one of the +characters to be searched is a digit. +@item +@posixfunc{strsep} and @posixfunc{strtok_r} do not work with multibyte strings +unless all of the delimiter characters are ASCII characters < 0x30. +@item +The @posixfunc{strcasecmp}, @posixfunc{strncasecmp}, and @posixfunc{strcasestr} +functions do not work with multibyte strings. +@end itemize + +The workarounds can be found in GNU gnulib +@url{http://www.gnu.org/software/gnulib/}. +@itemize +@item +gnulib has modules @samp{mbchar}, @samp{mbiter}, @samp{mbuiter} that +represent multibyte characters and allow to iterate across a multibyte +string with the same ease as through a unibyte string. +@item +gnulib has functions @func{mbslen} and @func{mbswidth} that can be +used instead of @posixfunc{strlen} when the number of characters or the +number of screen columns of a string is requested. +@item +gnulib has functions @func{mbschr} and @func{mbsrrchr} that are +like @posixfunc{strchr} and @posixfunc{strrchr}, but work in multibyte locales. +@item +gnulib has a function @func{mbsstr}, like @posixfunc{strstr}, but works +in multibyte locales. +@item +gnulib has functions @func{mbscspn}, @func{mbspbrk}, @func{mbsspn} +that are like @posixfunc{strcspn}, @posixfunc{strpbrk}, @posixfunc{strspn}, but +work in multibyte locales. +@item +gnulib has functions @func{mbssep} and @func{mbstok_r} that are +like @posixfunc{strsep} and @posixfunc{strtok_r} but work in multibyte locales. +@item +gnulib has functions @func{mbscasecmp}, @func{mbsncasecmp}, +@func{mbspcasecmp}, and @func{mbscasestr} that are like @posixfunc{strcasecmp}, +@posixfunc{strncasecmp}, and @posixfunc{strcasestr}, but +work in multibyte locales. Still, the function @code{ulc_casecmp} is +preferable to these functions; see below. +@end itemize + +The second problem with the C library API is that it has some assumptions built-in that are not valid in some languages: +@itemize +@item +It assumes that there are only two forms of every character: uppercase +and lowercase. This is not true for Croatian, where the character +@sc{LETTER DZ WITH CARON} comes in three forms: +@sc{LATIN CAPITAL LETTER DZ WITH CARON} (DZ), +@sc{LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} (Dz), +@sc{LATIN SMALL LETTER DZ WITH CARON} (dz). +@item +It assumes that uppercasing of 1 character leads to 1 character. This +is not true for German, where the @sc{LATIN SMALL LETTER SHARP S}, when +uppercased, becomes @samp{SS}. +@item +It assumes that there is 1:1 mapping between uppercase and lowercase forms. +This is not true for the Greek sigma: @sc{GREEK CAPITAL LETTER SIGMA} is +the uppercase of both @sc{GREEK SMALL LETTER SIGMA} and +@sc{GREEK SMALL LETTER FINAL SIGMA}. +@item +It assumes that the upper/lowercase mappings are position independent. +This is not true for the Greek sigma and the Lithuanian i. +@end itemize + +The correct way to deal with this problem is +@enumerate +@item +to provide functions for titlecasing, as well as for upper- and +lowercasing, +@item +to view case transformations as functions that operates on strings, +rather than on characters. +@end enumerate + +This is implemented in this library, through the functions declared in @code{<unicase.h>}, see @ref{unicase.h}. + +@node The wchar_t mess +@section The @code{wchar_t} mess + +@cindex wchar_t, type +The ISO C and POSIX standard creators made an attempt to fix the first +problem mentioned in the previous section. They introduced +@itemize +@item +a type @samp{wchar_t}, designed to encapsulate an entire character, +@item +a ``wide string'' type @samp{wchar_t *}, and +@item +functions declared in @code{<wctype.h>} that were meant to supplant the +ones in @code{<ctype.h>}. +@end itemize + +Unfortunately, this API and its implementation has numerous problems: + +@itemize +@item +On AIX and Windows platforms, @code{wchar_t} is a 16-bit type. This +means that it can never accommodate an entire Unicode character. Either +the @code{wchar_t *} strings are limited to characters in UCS-2 (the +``Basic Multilingual Plane'' of Unicode), or --- if @code{wchar_t *} +strings are encoded in UTF-16 --- a @code{wchar_t} represents only half +of a character in the worst case, making the @code{<wctype.h>} functions +pointless. + +@item +On Solaris and FreeBSD, the @code{wchar_t} encoding is locale dependent +and undocumented. This means, if you want to know any property of a +@code{wchar_t} character, other than the properties defined by +@code{<wctype.h>} --- such as whether it's a dash, currency symbol, +paragraph separator, or similar ---, you have to convert it to +@code{char *} encoding first, by use of the function @posixfunc{wctomb}. + +@item +When you read a stream of wide characters, through the functions +@posixfunc{fgetwc} and @posixfunc{fgetws}, and when the input stream/file is +not in the expected encoding, you have no way to determine the invalid +byte sequence and do some corrective action. If you use these +functions, your program becomes ``garbage in - more garbage out'' or +``garbage in - abort''. +@end itemize + +As a consequence, it is better to use multibyte strings, as explained in +the previous section. Such multibyte strings can bypass limitations +of the @code{wchar_t} type, if you use functions defined in gnulib and +libunistring for text processing. They can also faithfully transport +malformed characters that were present in the input, without requiring +the program to produce garbage or abort. + +@node Unicode strings +@section Unicode strings + +libunistring supports Unicode strings in three representations: +@cindex UTF-8, strings +@cindex UTF-16, strings +@cindex UTF-32, strings +@itemize +@item +UTF-8 strings, through the type @samp{uint8_t *}. The units are bytes +(@code{uint8_t}). +@item +UTF-16 strings, through the type @samp{uint16_t *}, The units are 16-bit +memory words (@code{uint16_t}). +@item +UTF-32 strings, through the type @samp{uint32_t *}. The units are 32-bit +memory words (@code{uint32_t}). +@end itemize + +As with C strings, there are two variants: +@itemize +@item +Unicode strings with a terminating NUL character are represented as +a pointer to the first unit of the string. There is a unit containing +a 0 value at the end. It is considered part of the string for all +memory allocation purposes, but is not considered part of the string +for all other logical purposes. +@item +Unicode strings where embedded NUL characters are allowed. These +are represented by a pointer to the first unit and the number of units +(not bytes!) of the string. In this setting, there is no trailing +zero-valued unit used as ``end marker''. +@end itemize + +@node Conventions +@chapter Conventions + +This chapter explains conventions valid throughout the libunistring library. + +@cindex argument conventions +Variables of type @code{char *} denote C strings in locale encoding. +See @ref{Locale encodings}. + +Variables of type @code{uint8_t *} denote UTF-8 strings. Their units +are bytes. + +Variables of type @code{uint16_t *} denote UTF-16 strings, without byte +order mark. Their units are 2-byte words. + +Variables of type @code{uint32_t *} denote UTF-32 strings, without byte +order mark. Their units are 4-byte words. + +Argument pairs @code{(@var{s}, @var{n})} denote a string +@code{@var{s}[0..@var{n}-1]} with exactly @var{n} units. + +All functions with prefix @samp{ulc_} operate on C strings in locale +encoding. + +All functions with prefix @samp{u8_} operate on UTF-8 strings. + +All functions with prefix @samp{u16_} operate on UTF-16 strings. + +All functions with prefix @samp{u32_} operate on UTF-32 strings. + +For every function with prefix @samp{u8_}, operating on UTF-8 strings, +there is also a corresponding function with prefix @samp{u16_}, +operating on UTF-16 strings, and a corresponding function with prefix +@samp{u32_}, operating on UTF-32 strings. Their description is +analogous; in this documentation we describe only the function that +operates on UTF-8 strings, for brevity. + +A declaration with a variable @var{n} denotes the three concrete +declarations with @var{n} = 8, @var{n} = 16, @var{n} = 32. + +All parameters starting with @samp{str} and the parameters of +functions starting with @code{u8_str}/@code{u16_str}/@code{u32_str} +denote a NUL terminated string. + +@cindex return value conventions +Error values are always returned through the @code{errno} variable, +usually with a return value that indicates the presence of an error +(NULL for functions that return an pointer, or -1 for functions that +return an @code{int}). + +Functions returning a string result take a +@code{(@var{resultbuf}, @var{lengthp})} +argument pair. If @var{resultbuf} is not NULL and the result fits +into @code{*@var{lengthp}} units, it is put in @var{resultbuf}, and +@var{resultbuf} is returned. Otherwise, a freshly allocated string +is returned. In both cases, @code{*@var{lengthp}} is set to the +length (number of units) of the returned string. In case of error, +NULL is returned and @code{errno} is set. + +@include unitypes.texi +@include unistr.texi +@include uniconv.texi +@include unistdio.texi +@include uniname.texi +@include unictype.texi +@include uniwidth.texi +@include uniwbrk.texi +@include unilbrk.texi +@include uninorm.texi +@include unicase.texi +@include uniregex.texi + +@node Using the library +@chapter Using the library + +This chapter explains some practical considerations, regarding the +installation and compiler options that are needed in order to use this +library. + +@menu +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: +@end menu + +@node Installation +@section Installation + +@cindex dependencies +Before you can use the library, it must be installed. First, you have to +make sure all dependencies are installed. They are listed in the file +@file{DEPENDENCIES}. + +@cindex installation +Then you can proceed to build and install the library, as described in the +file @file{INSTALL}. For installation on Windows systems, please refer to +the file @file{README.woe32}. + +@node Compiler options +@section Compiler options + +Let's denote as @code{LIBUNISTRING_PREFIX} the value of the @samp{--prefix} +option that you passed to @code{configure} while installing this package. +If you didn't pass any @samp{--prefix} option, then the package is installed +in @file{/usr/local}. + +Let's denote as @code{LIBUNISTRING_INCLUDEDIR} the directory where the +include files were installed. This is usually the same as +@code{$@{LIBUNISTRING_PREFIX@}/include}. Except that if you passed an +@samp{--includedir} option to @code{configure}, it is the value of that +option. + +Let's further denote as @code{LIBUNISTRING_LIBDIR} the directory where +the library itself was installed. This is the value that you passed +with the @samp{--libdir} option to @code{configure}, or otherwise the +same as @code{$@{LIBUNISTRING_PREFIX@}/lib}. Recall that when building +in 64-bit mode on a 64-bit GNU/Linux system that supports executables +in either 64-bit mode or 32-bit mode, you should have used the option +@code{--libdir=$@{LIBUNISTRING_PREFIX@}/lib64}. + +@cindex compiler options +So that the compiler finds the include files, you have to pass it the +option @code{-I$@{LIBUNISTRING_INCLUDEDIR@}}. + +So that the compiler finds the library during its linking pass, you have +to pass it the options @code{-L$@{LIBUNISTRING_LIBDIR@} -lunistring}. +On some systems, in some configurations, you also have to pass options +needed for linking with @code{libiconv}. The autoconf macro +@code{gl_LIBUNISTRING} (see @ref{Autoconf macro}) deals with this +particularity. + +@node Include files +@section Include files + +Most of the include files have been presented in the introduction, see +@ref{Introduction}, and subsequent detailed chapters. + +Another include file is @code{<unistring/version.h>}. It contains the +version number of the libunistring library. + +@deftypevr Macro int _LIBUNISTRING_VERSION +This constant contains the version of libunistring that is being used +at compile time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form @code{(major<<8) + minor}. +@end deftypevr + +@deftypevr Constant int _libunistring_version +This constant contains the version of libunistring that is being used +at run time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form @code{(major<<8) + minor}. +@end deftypevr + +It is possible that @code{_libunistring_version} is greater than +@code{_LIBUNISTRING_VERSION}. This can happen when you use +@code{libunistring} as a shared library, and a newer, binary +backward-compatible version has been installed after your program +that uses @code{libunistring} was installed. + +@node Autoconf macro +@section Autoconf macro + +@cindex autoconf macro +GNU Gnulib provides an autoconf macro that tests for the availability +of @code{libunistring}. It is contained in the Gnulib module +@samp{libunistring}, see@texnl{} +@url{http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring}. + +@amindex gl_LIBUNISTRING +The macro is called @code{gl_LIBUNISTRING}. It searches for an installed +libunistring. If found, it sets and AC_SUBSTs @code{HAVE_LIBUNISTRING=yes} +and the @code{LIBUNISTRING} and @code{LTLIBUNISTRING} variables and augments +the @code{CPPFLAGS} variable, and defines the C macro +@code{HAVE_LIBUNISTRING} to 1. Otherwise, it sets and AC_SUBSTs +@code{HAVE_LIBUNISTRING=no} and @code{LIBUNISTRING} and @code{LTLIBUNISTRING} +to empty. + +The complexities that @code{gl_LIBUNISTRING} deals with are the following: + +@itemize @bullet +@item +On some operating systems, in some configurations, libunistring depends +on @code{libiconv}, and the options for linking with libiconv must be +mentioned explicitly on the link command line. + +@item +GNU @code{libunistring}, if installed, is not necessarily already in the +search path (@code{CPPFLAGS} for the include file search path, +@code{LDFLAGS} for the library search path). + +@item +GNU @code{libunistring}, if installed, is not necessarily already in the +run time library search path. To avoid the need for setting an environment +variable like @code{LD_LIBRARY_PATH}, the macro adds the appropriate +run time search path options to the @code{LIBUNISTRING} variable. This works +on most systems. +@end itemize + +@node Reporting problems +@section Reporting problems + +@cindex bug reports +@cindex bug tracker +@cindex mailing list +If you encounter any problem, please don't hesitate to send a detailed +bug report to the @code{bug-libunistring@@gnu.org} mailing list. You can +alternatively also use the bug tracker at the project page +@url{https://savannah.gnu.org/projects/libunistring}. + +Please always include the version number of this library, and a short +description of your operating system and compilation environment with +corresponding version numbers. + +For problems that appear while building and installing @code{libunistring}, +for which you don't find the remedy in the @file{INSTALL} file, please include +a description of the options that you passed to the @samp{configure} script. + +@node More functionality +@chapter More advanced functionality + +@cindex bidirectional reordering +For bidirectional reordering of strings, we recommend the GNU FriBidi library: +@url{http://www.fribidi.org/}. + +@cindex rendering +For the rendering of Unicode strings outside of the context of a given toolkit +(KDE/Qt or GNOME/Gtk), we recommend the Pango library: +@url{http://www.pango.org/}. + +@node Licenses +@appendix Licenses +@cindex Licenses + +The files of this package are covered by the licenses indicated in each +particular file or directory. Here is a summary: + +@itemize @bullet +@item +The @code{libunistring} library is covered by the +GNU Lesser General Public License (LGPL). +A copy of the license is included in @ref{GNU LGPL}. + +@item +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. +@* +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in @ref{GNU FDL}. +@* +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in @ref{GNU GPL}. +@end itemize + +@menu +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License +@end menu + +@page +@node GNU GPL +@appendixsec GNU GENERAL PUBLIC LICENSE +@cindex GPL, GNU General Public License +@cindex License, GNU GPL +@include gpl.texi +@page +@node GNU LGPL +@appendixsec GNU LESSER GENERAL PUBLIC LICENSE +@cindex LGPL, GNU Lesser General Public License +@cindex License, GNU LGPL +@include lgpl.texi +@page +@node GNU FDL +@appendixsec GNU Free Documentation License +@cindex FDL, GNU Free Documentation License +@cindex License, GNU FDL +@include fdl.texi + +@node Index +@unnumbered Index + +@printindex cp + +@bye + +@c Local Variables: +@c indent-tabs-mode: nil +@c whitespace-check-buffer-indent: nil +@c End: diff --git a/doc/libunistring_1.html b/doc/libunistring_1.html new file mode 100644 index 00000000..646fdc65 --- /dev/null +++ b/doc/libunistring_1.html @@ -0,0 +1,531 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 1. Introduction</title> + +<meta name="description" content="GNU libunistring: 1. Introduction"> +<meta name="keywords" content="GNU libunistring: 1. Introduction"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[ << ]</td> +<td valign="middle" align="left">[<a href="libunistring_2.html#SEC9" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="Introduction"></a> +<a name="SEC1"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC1">1. Introduction</a> </h1> + +<p>This library provides functions for manipulating Unicode strings and +for manipulating C strings according to the Unicode standard. +</p> +<p>It consists of the following parts: +</p> +<dl compact="compact"> +<dt> <code><unistr.h></code></dt> +<dd><p>elementary string functions +</p></dd> +<dt> <code><uniconv.h></code></dt> +<dd><p>conversion from/to legacy encodings +</p></dd> +<dt> <code><unistdio.h></code></dt> +<dd><p>formatted output to strings +</p></dd> +<dt> <code><uniname.h></code></dt> +<dd><p>character names +</p></dd> +<dt> <code><unictype.h></code></dt> +<dd><p>character classification and properties +</p></dd> +<dt> <code><uniwidth.h></code></dt> +<dd><p>string width when using nonproportional fonts +</p></dd> +<dt> <code><uniwbrk.h></code></dt> +<dd><p>word breaks +</p></dd> +<dt> <code><unilbrk.h></code></dt> +<dd><p>line breaking algorithm +</p></dd> +<dt> <code><uninorm.h></code></dt> +<dd><p>normalization (composition and decomposition) +</p></dd> +<dt> <code><unicase.h></code></dt> +<dd><p>case folding +</p></dd> +<dt> <code><uniregex.h></code></dt> +<dd><p>regular expressions (not yet implemented) +</p></dd> +</dl> + +<a name="IDX1"></a> +<a name="IDX2"></a> +<p>libunistring is for you if your application involves non-trivial text +processing, such as upper/lower case conversions, line breaking, operations +on words, or more advanced analysis of text. Text provided by the user can, +in general, contain characters of all kinds of scripts. The text processing +functions provided by this library handle all scripts and all languages. +</p> +<p>libunistring is for you if your application already uses the ISO C / POSIX +<code><ctype.h></code>, <code><wctype.h></code> functions and the text it operates on is +provided by the user and can be in any language. +</p> +<p>libunistring is also for you if your application uses Unicode strings as +internal in-memory representation. +</p> + +<hr size="6"> +<a name="Unicode"></a> +<a name="SEC2"></a> +<h2 class="section"> <a href="libunistring.html#TOC2">1.1 Unicode</a> </h2> + +<p>Unicode is a standardized repertoire of characters that contains characters +from all scripts of the world, from Latin letters to Chinese ideographs +and Babylonian cuneiform glyphs. It also specifies how these characters +are to be rendered on a screen or on paper, and how common text processing +(word selection, line breaking, uppercasing of page titles etc.) is supposed +to behave on Unicode text. +</p> +<p>Unicode also specifies three ways of storing sequences of Unicode +characters in a computer whose basic unit of data is an 8-bit byte: +<a name="IDX3"></a> +<a name="IDX4"></a> +<a name="IDX5"></a> +<a name="IDX6"></a> +</p><dl compact="compact"> +<dt> UTF-8</dt> +<dd><p>Every character is represented as 1 to 4 bytes. +</p></dd> +<dt> UTF-16</dt> +<dd><p>Every character is represented as 1 to 2 units of 16 bits. +</p></dd> +<dt> UTF-32, a.k.a. UCS-4</dt> +<dd><p>Every character is represented as 1 unit of 32 bits. +</p></dd> +</dl> + +<p>For encoding Unicode text in a file, UTF-8 is usually used. For encoding +Unicode strings in memory for a program, either of the three encoding forms +can be reasonably used. +</p> +<p>Unicode is widely used on the web. Prior to the use of Unicode, web pages +were in many different encodings (ISO-8859-1 for English, French, Spanish, +ISO-8859-2 for Polish, ISO-8859-7 for Greek, KOI8-R for Russian, GB2312 or +BIG5 for Chinese, ISO-2022-JP-2 or EUC-JP or Shift_JIS for Japanese, and many +many others). It was next to impossible to create a document that contained +Chinese and Polish text in the same document. Due to the many encodings for +Japanese, even the processing of pure Japanese text was error prone. +</p> +<p>References: +</p><ul> +<li> +The Unicode standard: <a href="http://www.unicode.org/">http://www.unicode.org/</a> +</li><li> +Definition of UTF-8: <a href="http://www.rfc-editor.org/rfc/rfc3629.txt">http://www.rfc-editor.org/rfc/rfc3629.txt</a> +</li><li> +Definition of UTF-16: <a href="http://www.rfc-editor.org/rfc/rfc2781.txt">http://www.rfc-editor.org/rfc/rfc2781.txt</a> +</li><li> +Markus Kuhn's UTF-8 and Unicode FAQ: +<a href="http://www.cl.cam.ac.uk/~mgk25/unicode.html">http://www.cl.cam.ac.uk/~mgk25/unicode.html</a> +</li></ul> + +<hr size="6"> +<a name="Unicode-and-i18n"></a> +<a name="SEC3"></a> +<h2 class="section"> <a href="libunistring.html#TOC3">1.2 Unicode and Internationalization</a> </h2> + +<p>Internationalization is the process of changing the source code of a program +so that it can meet the expectations of users in any culture, if culture +specific data (translations, images etc.) are provided. +</p> +<p>Use of Unicode is not strictly required for internationalization, but it +makes internationalization much easier, because operations that need to +look at specific characters (like hyphenation, spell checking, or the +automatic conversion of double-quotes to opening and closing double-quote +characters) don't need to consider multiple possible encodings of the text. +</p> +<p>Use of Unicode also enables multilingualization: the ability of having text +in multiple languages present in the same document or even in the same line +of text. +</p> +<p>But use of Unicode is not everything. Internationalization usually consists +of three features: +</p><ul> +<li> +Use of Unicode where needed for text processing. This is what this library +is for. +</li><li> +Use of message catalogs for messages shown to the user, This is what +GNU gettext is about. +</li><li> +Use of locale specific conventions for date and time formats, for numeric +formatting, or for sorting of text. This can be done adequately with the +POSIX APIs and the implementation of locales in the GNU C library. +</li></ul> + +<hr size="6"> +<a name="Locale-encodings"></a> +<a name="SEC4"></a> +<h2 class="section"> <a href="libunistring.html#TOC4">1.3 Locale encodings</a> </h2> + +<p>A locale is a set of cultural conventions. According to POSIX, for a program, +at any moment, there is one locale being designated as the “current locale”. +(Actually, POSIX supports also one locale per thread, but this feature is not +yet universally implemented and not widely used.) +<a name="IDX7"></a> +The locale is partitioned into several aspects, called the “categories” +of the locale. The main various aspects are: +</p><ul class="toc"> +<li> +The character encoding and the character properties. This is the +<code>LC_CTYPE</code> category. +</li><li> +The sorting rules for text. This is the <code>LC_COLLATE</code> category. +</li><li> +The language specific translations of messages. This is the +<code>LC_MESSAGES</code> category. +</li><li> +The formatting rules for numbers, such as the decimal separator. This is +the <code>LC_NUMERIC</code> category. +</li><li> +The formatting rules for amounts of money. This is the <code>LC_MONETARY</code> +category. +</li><li> +The formatting of date and time. This is the <code>LC_TIME</code> category. +</li></ul> + +<a name="IDX8"></a> +<p>In particular, the <code>LC_CTYPE</code> category of the current locale determines +the character encoding. This is the encoding of ‘<samp>char *</samp>’ strings. +We also call it the “locale encoding”. GNU libunistring has a function, +<code>locale_charset</code>, that returns a standardized (platform independent) +name for this encoding. +</p> +<p>All locale encodings used on glibc systems are essentially ASCII compatible: +Most graphic ASCII characters have the same representation, as a single byte, +in that encoding as in ASCII. +</p> +<p>Among the possible locale encodings are UTF-8 and GB18030. Both allow +to represent any Unicode character as a sequence of bytes. UTF-8 is used in +most of the world, whereas GB18030 is used in the People's Republic of China, +because it is backward compatible with the GB2312 encoding that was used in +this country earlier. +</p> +<p>The legacy locale encodings, ISO-8859-15 (which supplanted ISO-8859-1 in +most of Europe), ISO-8859-2, KOI8-R, EUC-JP, etc., are still in use in +many places, though. +</p> +<p>UTF-16 and UTF-32 are not used as locale encodings, because they are not +ASCII compatible. +</p> +<hr size="6"> +<a name="In_002dmemory-representation"></a> +<a name="SEC5"></a> +<h2 class="section"> <a href="libunistring.html#TOC5">1.4 Choice of in-memory representation of strings</a> </h2> + +<p>There are three ways of representing strings in memory of a running +program. +</p><ul class="toc"> +<li> +As ‘<samp>char *</samp>’ strings. Such strings are represented in locale encoding. +This approach is employed when not much text processing is done by the +program. When some Unicode aware processing is to be done, a string is +converted to Unicode on the fly and back to locale encoding afterwards. +</li><li> +As UTF-8 or UTF-16 or UTF-32 strings. This implies that conversion from +locale encoding to Unicode is performed on input, and in the opposite +direction on output. This approach is employed when the program does +a significant amount of text processing, or when the program has multiple +threads operating on the same data but in different locales. +</li><li> +As ‘<samp>wchar_t *</samp>’, a.k.a. “wide strings”. This approach is misguided, +see <a href="#SEC7">The <code>wchar_t</code> mess</a>. +</li></ul> + +<hr size="6"> +<a name="char-_002a-strings"></a> +<a name="SEC6"></a> +<h2 class="section"> <a href="libunistring.html#TOC6">1.5 ‘<samp>char *</samp>’ strings</a> </h2> + +<p>The classical C strings, with its C library support standardized by +ISO C and POSIX, can be used in internationalized programs with some +precautions. The problem with this API is that many of the C library +functions for strings don't work correctly on strings in locale +encodings, leading to bugs that only people in some cultures of the +world will experience. +</p> +<a name="IDX9"></a> +<p>The first problem with the C library API is the support of multibyte +locales. According to the locale encoding, in general, every character +is represented by one or more bytes (up to 4 bytes in practice — but +use <code>MB_LEN_MAX</code> instead of the number 4 in the code). +When every character is represented by only 1 byte, we speak of an +“unibyte locale”, otherwise of a “multibyte locale”. It is important +to realize that the majority of Unix installations nowadays use UTF-8 +or GB18030 as locale encoding; therefore, the majority of users are +using multibyte locales. +</p> +<a name="IDX10"></a> +<p>The important fact to remember is: +</p><table class="cartouche" border="1"><tr><td> +<p><em>A ‘<samp>char</samp>’ is a byte, not a character.</em> +</p></td></tr></table> + +<p>As a consequence: +</p><ul class="toc"> +<li> +The <code><ctype.h></code> API is useless in this context; it does not work in +multibyte locales. +</li><li> +The <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strlen.html"><code>strlen</code></a> function does not return the number of characters +in a string. Nor does it return the number of screen columns occupied +by a string after it is output. It merely returns the number of +<em>bytes</em> occupied by a string. +</li><li> +Truncating a string, for example, with <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strncpy.html"><code>strncpy</code></a>, can have the +effect of truncating it in the middle of a multibyte character. Such +a string will, when output, have a garbled character at its end, often +represented by a hollow box. +</li><li> +<a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strchr.html"><code>strchr</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strrchr.html"><code>strrchr</code></a> do not work with multibyte strings +if the locale encoding is GB18030 and the character to be searched is +a digit. +</li><li> +<a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strstr.html"><code>strstr</code></a> does not work with multibyte strings if the locale encoding +is different from UTF-8. +</li><li> +<a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcspn.html"><code>strcspn</code></a>, <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strpbrk.html"><code>strpbrk</code></a>, <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strspn.html"><code>strspn</code></a> cannot work +correctly in multibyte locales: they assume the second argument is a list of +single-byte characters. Even in this simple case, they do not work with +multibyte strings if the locale encoding is GB18030 and one of the +characters to be searched is a digit. +</li><li> +<a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strsep.html"><code>strsep</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strtok_r.html"><code>strtok_r</code></a> do not work with multibyte strings +unless all of the delimiter characters are ASCII characters < 0x30. +</li><li> +The <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcasecmp.html"><code>strcasecmp</code></a>, <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strncasecmp.html"><code>strncasecmp</code></a>, and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcasestr.html"><code>strcasestr</code></a> +functions do not work with multibyte strings. +</li></ul> + +<p>The workarounds can be found in GNU gnulib +<a href="http://www.gnu.org/software/gnulib/">http://www.gnu.org/software/gnulib/</a>. +</p><ul class="toc"> +<li> +gnulib has modules ‘<samp>mbchar</samp>’, ‘<samp>mbiter</samp>’, ‘<samp>mbuiter</samp>’ that +represent multibyte characters and allow to iterate across a multibyte +string with the same ease as through a unibyte string. +</li><li> +gnulib has functions <code>mbslen</code> and <code>mbswidth</code> that can be +used instead of <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strlen.html"><code>strlen</code></a> when the number of characters or the +number of screen columns of a string is requested. +</li><li> +gnulib has functions <code>mbschr</code> and <code>mbsrrchr</code> that are +like <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strchr.html"><code>strchr</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strrchr.html"><code>strrchr</code></a>, but work in multibyte locales. +</li><li> +gnulib has a function <code>mbsstr</code>, like <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strstr.html"><code>strstr</code></a>, but works +in multibyte locales. +</li><li> +gnulib has functions <code>mbscspn</code>, <code>mbspbrk</code>, <code>mbsspn</code> +that are like <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcspn.html"><code>strcspn</code></a>, <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strpbrk.html"><code>strpbrk</code></a>, <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strspn.html"><code>strspn</code></a>, but +work in multibyte locales. +</li><li> +gnulib has functions <code>mbssep</code> and <code>mbstok_r</code> that are +like <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strsep.html"><code>strsep</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strtok_r.html"><code>strtok_r</code></a> but work in multibyte locales. +</li><li> +gnulib has functions <code>mbscasecmp</code>, <code>mbsncasecmp</code>, +<code>mbspcasecmp</code>, and <code>mbscasestr</code> that are like <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcasecmp.html"><code>strcasecmp</code></a>, +<a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strncasecmp.html"><code>strncasecmp</code></a>, and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcasestr.html"><code>strcasestr</code></a>, but +work in multibyte locales. Still, the function <code>ulc_casecmp</code> is +preferable to these functions; see below. +</li></ul> + +<p>The second problem with the C library API is that it has some assumptions built-in that are not valid in some languages: +</p><ul class="toc"> +<li> +It assumes that there are only two forms of every character: uppercase +and lowercase. This is not true for Croatian, where the character +<small>LETTER DZ WITH CARON</small> comes in three forms: +<small>LATIN CAPITAL LETTER DZ WITH CARON</small> (DZ), +<small>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</small> (Dz), +<small>LATIN SMALL LETTER DZ WITH CARON</small> (dz). +</li><li> +It assumes that uppercasing of 1 character leads to 1 character. This +is not true for German, where the <small>LATIN SMALL LETTER SHARP S</small>, when +uppercased, becomes ‘<samp>SS</samp>’. +</li><li> +It assumes that there is 1:1 mapping between uppercase and lowercase forms. +This is not true for the Greek sigma: <small>GREEK CAPITAL LETTER SIGMA</small> is +the uppercase of both <small>GREEK SMALL LETTER SIGMA</small> and +<small>GREEK SMALL LETTER FINAL SIGMA</small>. +</li><li> +It assumes that the upper/lowercase mappings are position independent. +This is not true for the Greek sigma and the Lithuanian i. +</li></ul> + +<p>The correct way to deal with this problem is +</p><ol> +<li> +to provide functions for titlecasing, as well as for upper- and +lowercasing, +</li><li> +to view case transformations as functions that operates on strings, +rather than on characters. +</li></ol> + +<p>This is implemented in this library, through the functions declared in <code><unicase.h></code>, see <a href="libunistring_13.html#SEC48">Case mappings <code><unicase.h></code></a>. +</p> +<hr size="6"> +<a name="The-wchar_005ft-mess"></a> +<a name="SEC7"></a> +<h2 class="section"> <a href="libunistring.html#TOC7">1.6 The <code>wchar_t</code> mess</a> </h2> + +<p>The ISO C and POSIX standard creators made an attempt to fix the first +problem mentioned in the previous section. They introduced +</p><ul class="toc"> +<li> +a type ‘<samp>wchar_t</samp>’, designed to encapsulate an entire character, +</li><li> +a “wide string” type ‘<samp>wchar_t *</samp>’, and +</li><li> +functions declared in <code><wctype.h></code> that were meant to supplant the +ones in <code><ctype.h></code>. +</li></ul> + +<p>Unfortunately, this API and its implementation has numerous problems: +</p> +<ul class="toc"> +<li> +On AIX and Windows platforms, <code>wchar_t</code> is a 16-bit type. This +means that it can never accommodate an entire Unicode character. Either +the <code>wchar_t *</code> strings are limited to characters in UCS-2 (the +“Basic Multilingual Plane” of Unicode), or — if <code>wchar_t *</code> +strings are encoded in UTF-16 — a <code>wchar_t</code> represents only half +of a character in the worst case, making the <code><wctype.h></code> functions +pointless. + +</li><li> +On Solaris and FreeBSD, the <code>wchar_t</code> encoding is locale dependent +and undocumented. This means, if you want to know any property of a +<code>wchar_t</code> character, other than the properties defined by +<code><wctype.h></code> — such as whether it's a dash, currency symbol, +paragraph separator, or similar —, you have to convert it to +<code>char *</code> encoding first, by use of the function <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wctomb.html"><code>wctomb</code></a>. + +</li><li> +When you read a stream of wide characters, through the functions +<a href="http://www.opengroup.org/onlinepubs/9699919799/functions/fgetwc.html"><code>fgetwc</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/fgetws.html"><code>fgetws</code></a>, and when the input stream/file is +not in the expected encoding, you have no way to determine the invalid +byte sequence and do some corrective action. If you use these +functions, your program becomes “garbage in - more garbage out” or +“garbage in - abort”. +</li></ul> + +<p>As a consequence, it is better to use multibyte strings, as explained in +the previous section. Such multibyte strings can bypass limitations +of the <code>wchar_t</code> type, if you use functions defined in gnulib and +libunistring for text processing. They can also faithfully transport +malformed characters that were present in the input, without requiring +the program to produce garbage or abort. +</p> +<hr size="6"> +<a name="Unicode-strings"></a> +<a name="SEC8"></a> +<h2 class="section"> <a href="libunistring.html#TOC8">1.7 Unicode strings</a> </h2> + +<p>libunistring supports Unicode strings in three representations: +<a name="IDX11"></a> +<a name="IDX12"></a> +<a name="IDX13"></a> +</p><ul class="toc"> +<li> +UTF-8 strings, through the type ‘<samp>uint8_t *</samp>’. The units are bytes +(<code>uint8_t</code>). +</li><li> +UTF-16 strings, through the type ‘<samp>uint16_t *</samp>’, The units are 16-bit +memory words (<code>uint16_t</code>). +</li><li> +UTF-32 strings, through the type ‘<samp>uint32_t *</samp>’. The units are 32-bit +memory words (<code>uint32_t</code>). +</li></ul> + +<p>As with C strings, there are two variants: +</p><ul class="toc"> +<li> +Unicode strings with a terminating NUL character are represented as +a pointer to the first unit of the string. There is a unit containing +a 0 value at the end. It is considered part of the string for all +memory allocation purposes, but is not considered part of the string +for all other logical purposes. +</li><li> +Unicode strings where embedded NUL characters are allowed. These +are represented by a pointer to the first unit and the number of units +(not bytes!) of the string. In this setting, there is no trailing +zero-valued unit used as “end marker”. +</li></ul> + +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC1" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_2.html#SEC9" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_10.html b/doc/libunistring_10.html new file mode 100644 index 00000000..bf22ca1b --- /dev/null +++ b/doc/libunistring_10.html @@ -0,0 +1,192 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 10. Word breaks in strings <uniwbrk.h></title> + +<meta name="description" content="GNU libunistring: 10. Word breaks in strings <uniwbrk.h>"> +<meta name="keywords" content="GNU libunistring: 10. Word breaks in strings <uniwbrk.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_9.html#SEC37" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_11.html#SEC41" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="uniwbrk_002eh"></a> +<a name="SEC38"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC38">10. Word breaks in strings <code><uniwbrk.h></code></a> </h1> + +<p>This include file declares functions for determining where in a string +“words” start and end. Here “words” are not necessarily the same as +entities that can be looked up in dictionaries, but rather groups of +consecutive characters that should not be split by text processing +operations. +</p> + +<hr size="6"> +<a name="Word-breaks-in-a-string"></a> +<a name="SEC39"></a> +<h2 class="section"> <a href="libunistring.html#TOC39">10.1 Word breaks in a string</a> </h2> + +<p>The following functions determine the word breaks in a string. +</p> +<dl> +<dt><u>Function:</u> void <b>u8_wordbreaks</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, char *<var>p</var>)</i> +<a name="IDX615"></a> +</dt> +<dt><u>Function:</u> void <b>u16_wordbreaks</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, char *<var>p</var>)</i> +<a name="IDX616"></a> +</dt> +<dt><u>Function:</u> void <b>u32_wordbreaks</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, char *<var>p</var>)</i> +<a name="IDX617"></a> +</dt> +<dt><u>Function:</u> void <b>ulc_wordbreaks</b><i> (const char *<var>s</var>, size_t <var>n</var>, char *<var>p</var>)</i> +<a name="IDX618"></a> +</dt> +<dd><p>Determines the word break points in <var>s</var>, an array of <var>n</var> units, and +stores the result at <code><var>p</var>[0..<var>n</var>-1]</code>. +</p><dl compact="compact"> +<dt> <code><var>p</var>[i] = 1</code></dt> +<dd><p>means that there is a word boundary between <code><var>s</var>[i-1]</code> and +<code><var>s</var>[i]</code>. +</p></dd> +<dt> <code><var>p</var>[i] = 0</code></dt> +<dd><p>means that <code><var>s</var>[i-1]</code> and <code><var>s</var>[i]</code> must not be separated. +</p></dd> +</dl> +<p><code><var>p</var>[0]</code> is always set to 0. If an application wants to consider a +word break to be present at the beginning of the string (before +<code><var>s</var>[0]</code>) or at the end of the string (after +<code><var>s</var>[0..<var>n</var>-1]</code>), it has to treat these cases explicitly. +</p></dd></dl> + +<hr size="6"> +<a name="Word-break-property"></a> +<a name="SEC40"></a> +<h2 class="section"> <a href="libunistring.html#TOC40">10.2 Word break property</a> </h2> + +<p>This is a more low-level API. The word break property is a property defined +in Unicode Standard Annex #29, section “Word Boundaries”, see +<a href="http://www.unicode.org/reports/tr29/#Word_Boundaries">http://www.unicode.org/reports/tr29/#Word_Boundaries</a>. It is +used for determining the word breaks in a string. +</p> +<p>The following are the possible values of the word break property. More values +may be added in the future. +</p> +<dl> +<dt><u>Constant:</u> int <b>WBP_OTHER</b> +<a name="IDX619"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_CR</b> +<a name="IDX620"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_LF</b> +<a name="IDX621"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_NEWLINE</b> +<a name="IDX622"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_EXTEND</b> +<a name="IDX623"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_FORMAT</b> +<a name="IDX624"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_KATAKANA</b> +<a name="IDX625"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_ALETTER</b> +<a name="IDX626"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_MIDNUMLET</b> +<a name="IDX627"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_MIDLETTER</b> +<a name="IDX628"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_MIDNUM</b> +<a name="IDX629"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_NUMERIC</b> +<a name="IDX630"></a> +</dt> +<dt><u>Constant:</u> int <b>WBP_EXTENDNUMLET</b> +<a name="IDX631"></a> +</dt> +</dl> + +<p>The following function looks up the word break property of a character. +</p> +<dl> +<dt><u>Function:</u> int <b>uc_wordbreak_property</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX632"></a> +</dt> +<dd><p>Returns the Word_Break property of a Unicode character. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC38" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_11.html#SEC41" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_11.html b/doc/libunistring_11.html new file mode 100644 index 00000000..b0016788 --- /dev/null +++ b/doc/libunistring_11.html @@ -0,0 +1,200 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 11. Line breaking <unilbrk.h></title> + +<meta name="description" content="GNU libunistring: 11. Line breaking <unilbrk.h>"> +<meta name="keywords" content="GNU libunistring: 11. Line breaking <unilbrk.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_10.html#SEC38" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_12.html#SEC42" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="unilbrk_002eh"></a> +<a name="SEC41"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC41">11. Line breaking <code><unilbrk.h></code></a> </h1> + +<p>This include file declares functions for determining where in a string +line breaks could or should be introduced, in order to make the displayed +string fit into a column of given width. +</p> +<p>These functions are locale dependent. The <var>encoding</var> argument identifies +the encoding (e.g. <code>"ISO-8859-2"</code> for Polish). +</p> +<p>The following enumerated values indicate whether, at a given position, a line +break is possible or not. Given an string <var>s</var> as an array +<code><var>s</var>[0..<var>n</var>-1]</code> and a position <var>i</var>, the values have the +following meanings: +</p> +<dl> +<dt><u>Constant:</u> int <b>UC_BREAK_MANDATORY</b> +<a name="IDX633"></a> +</dt> +<dd><p>This value indicates that <code><var>s</var>[<var>i</var>]</code> is a line break character. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BREAK_POSSIBLE</b> +<a name="IDX634"></a> +</dt> +<dd><p>This value indicates that a line break may be inserted between +<code><var>s</var>[<var>i</var>-1]</code> and <code><var>s</var>[<var>i</var>]</code>. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BREAK_HYPHENATION</b> +<a name="IDX635"></a> +</dt> +<dd><p>This value indicates that a hyphen and a line break may be inserted between +<code><var>s</var>[<var>i</var>-1]</code> and <code><var>s</var>[<var>i</var>]</code>. But beware of language +dependent hyphenation rules. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BREAK_PROHIBITED</b> +<a name="IDX636"></a> +</dt> +<dd><p>This value indicates that <code><var>s</var>[<var>i</var>-1]</code> and <code><var>s</var>[<var>i</var>]</code> +must not be separated. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BREAK_UNDEFINED</b> +<a name="IDX637"></a> +</dt> +<dd><p>This value is not used as a return value; rather, in the overriding argument of +the <code>u*_width_linebreaks</code> functions, it indicates the absence of an +override. +</p></dd></dl> + +<p>The following functions determine the positions at which line breaks are +possible. +</p> +<dl> +<dt><u>Function:</u> void <b>u8_possible_linebreaks</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX638"></a> +</dt> +<dt><u>Function:</u> void <b>u16_possible_linebreaks</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX639"></a> +</dt> +<dt><u>Function:</u> void <b>u32_possible_linebreaks</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX640"></a> +</dt> +<dt><u>Function:</u> void <b>ulc_possible_linebreaks</b><i> (const char *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX641"></a> +</dt> +<dd><p>Determines the line break points in <var>s</var>, and stores the result at +<code><var>p</var>[0..<var>n</var>-1]</code>. Every <code><var>p</var>[<var>i</var>]</code> is assigned one of +the values <code>UC_BREAK_MANDATORY</code>, <code>UC_BREAK_POSSIBLE</code>, +<code>UC_BREAK_HYPHENATION</code>, <code>UC_BREAK_PROHIBITED</code>. +</p></dd></dl> + +<p>The following functions determine where line breaks should be inserted so that +each line fits in a given width, when output to a device that uses +non-proportional fonts. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_width_linebreaks</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, int <var>width</var>, int <var>start_column</var>, int <var>at_end_columns</var>, const char *<var>override</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX642"></a> +</dt> +<dt><u>Function:</u> int <b>u16_width_linebreaks</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, int <var>width</var>, int <var>start_column</var>, int <var>at_end_columns</var>, const char *<var>override</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX643"></a> +</dt> +<dt><u>Function:</u> int <b>u32_width_linebreaks</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, int <var>width</var>, int <var>start_column</var>, int <var>at_end_columns</var>, const char *<var>override</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX644"></a> +</dt> +<dt><u>Function:</u> int <b>ulc_width_linebreaks</b><i> (const char *<var>s</var>, size_t <var>n</var>, int <var>width</var>, int <var>start_column</var>, int <var>at_end_columns</var>, const char *<var>override</var>, const char *<var>encoding</var>, char *<var>p</var>)</i> +<a name="IDX645"></a> +</dt> +<dd><p>Chooses the best line breaks, assuming that every character occupies a width +given by the <code>uc_width</code> function (see <a href="libunistring_9.html#SEC37">Display width <code><uniwidth.h></code></a>). +</p> +<p>The string is <code><var>s</var>[0..<var>n</var>-1]</code>. +</p> +<p>The maximum number of columns per line is given as <var>width</var>. +The starting column of the string is given as <var>start_column</var>. +If the algorithm shall keep room after the last piece, this amount of room can +be given as <var>at_end_columns</var>. +</p> +<p><var>override</var> is an optional override; if +<code><var>override</var>[<var>i</var>] != UC_BREAK_UNDEFINED</code>, +<code><var>override</var>[<var>i</var>]</code> takes precedence over <code><var>p</var>[<var>i</var>]</code> +as returned by the <code>u*_possible_linebreaks</code> function. +</p> +<p>The given <var>encoding</var> is used for disambiguating widths in <code>uc_width</code>. +</p> +<p>Returns the column after the end of the string, and stores the result at +<code><var>p</var>[0..<var>n</var>-1]</code>. Every <code><var>p</var>[<var>i</var>]</code> is assigned one of +the values <code>UC_BREAK_MANDATORY</code>, <code>UC_BREAK_POSSIBLE</code>, +<code>UC_BREAK_HYPHENATION</code>, <code>UC_BREAK_PROHIBITED</code>. Here the value +<code>UC_BREAK_POSSIBLE</code> indicates that a line break <em>should</em> be inserted. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_10.html#SEC38" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_12.html#SEC42" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_12.html b/doc/libunistring_12.html new file mode 100644 index 00000000..0bf1e933 --- /dev/null +++ b/doc/libunistring_12.html @@ -0,0 +1,507 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 12. Normalization forms (composition and decomposition) <uninorm.h></title> + +<meta name="description" content="GNU libunistring: 12. Normalization forms (composition and decomposition) <uninorm.h>"> +<meta name="keywords" content="GNU libunistring: 12. Normalization forms (composition and decomposition) <uninorm.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_11.html#SEC41" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_13.html#SEC48" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="uninorm_002eh"></a> +<a name="SEC42"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC42">12. Normalization forms (composition and decomposition) <code><uninorm.h></code></a> </h1> + +<p>This include file defines functions for transforming Unicode strings to one +of the four normal forms, known as NFC, NFD, NKFC, NFKD. These +transformations involve decomposition and — for NFC and NFKC — composition +of Unicode characters. +</p> + +<hr size="6"> +<a name="Decomposition-of-characters"></a> +<a name="SEC43"></a> +<h2 class="section"> <a href="libunistring.html#TOC43">12.1 Decomposition of Unicode characters</a> </h2> + +<p>The following enumerated values are the possible types of decomposition of a +Unicode character. +</p> +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_CANONICAL</b> +<a name="IDX646"></a> +</dt> +<dd><p>Denotes canonical decomposition. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_FONT</b> +<a name="IDX647"></a> +</dt> +<dd><p>UCD marker: <code><font></code>. Denotes a font variant (e.g. a blackletter form). +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_NOBREAK</b> +<a name="IDX648"></a> +</dt> +<dd><p>UCD marker: <code><noBreak></code>. +Denotes a no-break version of a space or hyphen. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_INITIAL</b> +<a name="IDX649"></a> +</dt> +<dd><p>UCD marker: <code><initial></code>. +Denotes an initial presentation form (Arabic). +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_MEDIAL</b> +<a name="IDX650"></a> +</dt> +<dd><p>UCD marker: <code><medial></code>. +Denotes a medial presentation form (Arabic). +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_FINAL</b> +<a name="IDX651"></a> +</dt> +<dd><p>UCD marker: <code><final></code>. +Denotes a final presentation form (Arabic). +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_ISOLATED</b> +<a name="IDX652"></a> +</dt> +<dd><p>UCD marker: <code><isolated></code>. +Denotes an isolated presentation form (Arabic). +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_CIRCLE</b> +<a name="IDX653"></a> +</dt> +<dd><p>UCD marker: <code><circle></code>. +Denotes an encircled form. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_SUPER</b> +<a name="IDX654"></a> +</dt> +<dd><p>UCD marker: <code><super></code>. +Denotes a superscript form. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_SUB</b> +<a name="IDX655"></a> +</dt> +<dd><p>UCD marker: <code><sub></code>. +Denotes a subscript form. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_VERTICAL</b> +<a name="IDX656"></a> +</dt> +<dd><p>UCD marker: <code><vertical></code>. +Denotes a vertical layout presentation form. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_WIDE</b> +<a name="IDX657"></a> +</dt> +<dd><p>UCD marker: <code><wide></code>. +Denotes a wide (or zenkaku) compatibility character. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_NARROW</b> +<a name="IDX658"></a> +</dt> +<dd><p>UCD marker: <code><narrow></code>. +Denotes a narrow (or hankaku) compatibility character. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_SMALL</b> +<a name="IDX659"></a> +</dt> +<dd><p>UCD marker: <code><small></code>. +Denotes a small variant form (CNS compatibility). +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_SQUARE</b> +<a name="IDX660"></a> +</dt> +<dd><p>UCD marker: <code><square></code>. +Denotes a CJK squared font variant. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_FRACTION</b> +<a name="IDX661"></a> +</dt> +<dd><p>UCD marker: <code><fraction></code>. +Denotes a vulgar fraction form. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_DECOMP_COMPAT</b> +<a name="IDX662"></a> +</dt> +<dd><p>UCD marker: <code><compat></code>. +Denotes an otherwise unspecified compatibility character. +</p></dd></dl> + +<p>The following constant denotes the maximum size of decomposition of a single +Unicode character. +</p> +<dl> +<dt><u>Macro:</u> unsigned int <b>UC_DECOMPOSITION_MAX_LENGTH</b> +<a name="IDX663"></a> +</dt> +<dd><p>This macro expands to a constant that is the required size of buffer passed to +the <code>uc_decomposition</code> and <code>uc_canonical_decomposition</code> functions. +</p></dd></dl> + +<p>The following functions decompose a Unicode character. +</p> +<dl> +<dt><u>Function:</u> int <b>uc_decomposition</b><i> (ucs4_t <var>uc</var>, int *<var>decomp_tag</var>, ucs4_t *<var>decomposition</var>)</i> +<a name="IDX664"></a> +</dt> +<dd><p>Returns the character decomposition mapping of the Unicode character <var>uc</var>. +<var>decomposition</var> must point to an array of at least +<code>UC_DECOMPOSITION_MAX_LENGTH</code> <code>ucs_t</code> elements. +</p> +<p>When a decomposition exists, <code><var>decomposition</var>[0..<var>n</var>-1]</code> and +<code>*<var>decomp_tag</var></code> are filled and <var>n</var> is returned. Otherwise -1 is +returned. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>uc_canonical_decomposition</b><i> (ucs4_t <var>uc</var>, ucs4_t *<var>decomposition</var>)</i> +<a name="IDX665"></a> +</dt> +<dd><p>Returns the canonical character decomposition mapping of the Unicode character +<var>uc</var>. <var>decomposition</var> must point to an array of at least +<code>UC_DECOMPOSITION_MAX_LENGTH</code> <code>ucs_t</code> elements. +</p> +<p>When a decomposition exists, <code><var>decomposition</var>[0..<var>n</var>-1]</code> is filled +and <var>n</var> is returned. Otherwise -1 is returned. +</p></dd></dl> + +<hr size="6"> +<a name="Composition-of-characters"></a> +<a name="SEC44"></a> +<h2 class="section"> <a href="libunistring.html#TOC44">12.2 Composition of Unicode characters</a> </h2> + +<p>The following function composes a Unicode character from two Unicode +characters. +</p> +<dl> +<dt><u>Function:</u> ucs4_t <b>uc_composition</b><i> (ucs4_t <var>uc1</var>, ucs4_t <var>uc2</var>)</i> +<a name="IDX666"></a> +</dt> +<dd><p>Attempts to combine the Unicode characters <var>uc1</var>, <var>uc2</var>. +<var>uc1</var> is known to have canonical combining class 0. +</p> +<p>Returns the combination of <var>uc1</var> and <var>uc2</var>, if it exists. +Returns 0 otherwise. +</p> +<p>Not all decompositions can be recombined using this function. See the Unicode +file ‘<tt>CompositionExclusions.txt</tt>’ for details. +</p></dd></dl> + +<hr size="6"> +<a name="Normalization-of-strings"></a> +<a name="SEC45"></a> +<h2 class="section"> <a href="libunistring.html#TOC45">12.3 Normalization of strings</a> </h2> + +<p>The Unicode standard defines four normalization forms for Unicode strings. +The following type is used to denote a normalization form. +</p> +<dl> +<dt><u>Type:</u> <b>uninorm_t</b> +<a name="IDX667"></a> +</dt> +<dd><p>An object of type <code>uninorm_t</code> denotes a Unicode normalization form. +This is a scalar type; its values can be compared with <code>==</code>. +</p></dd></dl> + +<p>The following constants denote the four normalization forms. +</p> +<dl> +<dt><u>Macro:</u> uninorm_t <b>UNINORM_NFD</b> +<a name="IDX668"></a> +</dt> +<dd><p>Denotes Normalization form D: canonical decomposition. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uninorm_t <b>UNINORM_NFC</b> +<a name="IDX669"></a> +</dt> +<dd><p>Normalization form C: canonical decomposition, then canonical composition. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uninorm_t <b>UNINORM_NFKD</b> +<a name="IDX670"></a> +</dt> +<dd><p>Normalization form KD: compatibility decomposition. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uninorm_t <b>UNINORM_NFKC</b> +<a name="IDX671"></a> +</dt> +<dd><p>Normalization form KC: compatibility decomposition, then canonical composition. +</p></dd></dl> + +<p>The following functions operate on <code>uninorm_t</code> objects. +</p> +<dl> +<dt><u>Function:</u> bool <b>uninorm_is_compat_decomposing</b><i> (uninorm_t <var>nf</var>)</i> +<a name="IDX672"></a> +</dt> +<dd><p>Tests whether the normalization form <var>nf</var> does compatibility decomposition. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uninorm_is_composing</b><i> (uninorm_t <var>nf</var>)</i> +<a name="IDX673"></a> +</dt> +<dd><p>Tests whether the normalization form <var>nf</var> includes canonical composition. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uninorm_t <b>uninorm_decomposing_form</b><i> (uninorm_t <var>nf</var>)</i> +<a name="IDX674"></a> +</dt> +<dd><p>Returns the decomposing variant of the normalization form <var>nf</var>. +This maps NFC,NFD → NFD and NFKC,NFKD → NFKD. +</p></dd></dl> + +<p>The following functions apply a Unicode normalization form to a Unicode string. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_normalize</b><i> (uninorm_t <var>nf</var>, const uint8_t *<var>s</var>, size_t <var>n</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX675"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_normalize</b><i> (uninorm_t <var>nf</var>, const uint16_t *<var>s</var>, size_t <var>n</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX676"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_normalize</b><i> (uninorm_t <var>nf</var>, const uint32_t *<var>s</var>, size_t <var>n</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX677"></a> +</dt> +<dd><p>Returns the specified normalization form of a string. +</p></dd></dl> + +<hr size="6"> +<a name="Normalizing-comparisons"></a> +<a name="SEC46"></a> +<h2 class="section"> <a href="libunistring.html#TOC46">12.4 Normalizing comparisons</a> </h2> + +<p>The following functions compare Unicode string, ignoring differences in +normalization. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_normcmp</b><i> (const uint8_t *<var>s1</var>, size_t <var>n1</var>, const uint8_t *<var>s2</var>, size_t <var>n2</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX678"></a> +</dt> +<dt><u>Function:</u> int <b>u16_normcmp</b><i> (const uint16_t *<var>s1</var>, size_t <var>n1</var>, const uint16_t *<var>s2</var>, size_t <var>n2</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX679"></a> +</dt> +<dt><u>Function:</u> int <b>u32_normcmp</b><i> (const uint32_t *<var>s1</var>, size_t <var>n1</var>, const uint32_t *<var>s2</var>, size_t <var>n2</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX680"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, ignoring differences in normalization. +</p> +<p><var>nf</var> must be either <code>UNINORM_NFD</code> or <code>UNINORM_NFKD</code>. +</p> +<p>If successful, sets <code>*<var>resultp</var></code> to -1 if <var>s1</var> < <var>s2</var>, +0 if <var>s1</var> = <var>s2</var>, 1 if <var>s1</var> > <var>s2</var>, and returns 0. +Upon failure, returns -1 with <code>errno</code> set. +</p></dd></dl> + +<a name="IDX681"></a> +<a name="IDX682"></a> +<dl> +<dt><u>Function:</u> char * <b>u8_normxfrm</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX683"></a> +</dt> +<dt><u>Function:</u> char * <b>u16_normxfrm</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX684"></a> +</dt> +<dt><u>Function:</u> char * <b>u32_normxfrm</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX685"></a> +</dt> +<dd><p>Converts the string <var>s</var> of length <var>n</var> to a NUL-terminated byte +sequence, in such a way that comparing <code>u8_normxfrm (<var>s1</var>)</code> and +<code>u8_normxfrm (<var>s2</var>)</code> with the <code>u8_cmp2</code> function is equivalent to +comparing <var>s1</var> and <var>s2</var> with the <code>u8_normcoll</code> function. +</p> +<p><var>nf</var> must be either <code>UNINORM_NFC</code> or <code>UNINORM_NFKC</code>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_normcoll</b><i> (const uint8_t *<var>s1</var>, size_t <var>n1</var>, const uint8_t *<var>s2</var>, size_t <var>n2</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX686"></a> +</dt> +<dt><u>Function:</u> int <b>u16_normcoll</b><i> (const uint16_t *<var>s1</var>, size_t <var>n1</var>, const uint16_t *<var>s2</var>, size_t <var>n2</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX687"></a> +</dt> +<dt><u>Function:</u> int <b>u32_normcoll</b><i> (const uint32_t *<var>s1</var>, size_t <var>n1</var>, const uint32_t *<var>s2</var>, size_t <var>n2</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX688"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, ignoring differences in normalization, using +the collation rules of the current locale. +</p> +<p><var>nf</var> must be either <code>UNINORM_NFC</code> or <code>UNINORM_NFKC</code>. +</p> +<p>If successful, sets <code>*<var>resultp</var></code> to -1 if <var>s1</var> < <var>s2</var>, +0 if <var>s1</var> = <var>s2</var>, 1 if <var>s1</var> > <var>s2</var>, and returns 0. +Upon failure, returns -1 with <code>errno</code> set. +</p></dd></dl> + +<hr size="6"> +<a name="Normalization-of-streams"></a> +<a name="SEC47"></a> +<h2 class="section"> <a href="libunistring.html#TOC47">12.5 Normalization of streams of Unicode characters</a> </h2> + +<p>A “stream of Unicode characters” is essentially a function that accepts an +<code>ucs4_t</code> argument repeatedly, optionally combined with a function that +“flushes” the stream. +</p> +<dl> +<dt><u>Type:</u> <b>struct uninorm_filter</b> +<a name="IDX689"></a> +</dt> +<dd><p>This is the data type of a stream of Unicode characters that normalizes its +input according to a given normalization form and passes the normalized +character sequence to the encapsulated stream of Unicode characters. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> struct uninorm_filter * <b>uninorm_filter_create</b><i> (uninorm_t <var>nf</var>, int (*<var>stream_func</var>) (void *<var>stream_data</var>, ucs4_t <var>uc</var>), void *<var>stream_data</var>)</i> +<a name="IDX690"></a> +</dt> +<dd><p>Creates and returns a normalization filter for Unicode characters. +</p> +<p>The pair (<var>stream_func</var>, <var>stream_data</var>) is the encapsulated stream. +<code><var>stream_func</var> (<var>stream_data</var>, <var>uc</var>)</code> receives the Unicode +character <var>uc</var> and returns 0 if successful, or -1 with <code>errno</code> set +upon failure. +</p> +<p>Returns the new filter, or NULL with <code>errno</code> set upon failure. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>uninorm_filter_write</b><i> (struct uninorm_filter *<var>filter</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX691"></a> +</dt> +<dd><p>Stuffs a Unicode character into a normalizing filter. +Returns 0 if successful, or -1 with <code>errno</code> set upon failure. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>uninorm_filter_flush</b><i> (struct uninorm_filter *<var>filter</var>)</i> +<a name="IDX692"></a> +</dt> +<dd><p>Brings data buffered in the filter to its destination, the encapsulated stream. +</p> +<p>Returns 0 if successful, or -1 with <code>errno</code> set upon failure. +</p> +<p>Note! If after calling this function, additional characters are written +into the filter, the resulting character sequence in the encapsulated stream +will not necessarily be normalized. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>uninorm_filter_free</b><i> (struct uninorm_filter *<var>filter</var>)</i> +<a name="IDX693"></a> +</dt> +<dd><p>Brings data buffered in the filter to its destination, the encapsulated stream, +then closes and frees the filter. +</p> +<p>Returns 0 if successful, or -1 with <code>errno</code> set upon failure. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC42" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_13.html#SEC48" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_13.html b/doc/libunistring_13.html new file mode 100644 index 00000000..1597ec11 --- /dev/null +++ b/doc/libunistring_13.html @@ -0,0 +1,611 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 13. Case mappings <unicase.h></title> + +<meta name="description" content="GNU libunistring: 13. Case mappings <unicase.h>"> +<meta name="keywords" content="GNU libunistring: 13. Case mappings <unicase.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_12.html#SEC42" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_14.html#SEC54" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="unicase_002eh"></a> +<a name="SEC48"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC48">13. Case mappings <code><unicase.h></code></a> </h1> + +<p>This include file defines functions for case mapping for Unicode strings and +case insensitive comparison of Unicode strings and C strings. +</p> +<p>These string functions fix the problems that were mentioned in +<a href="libunistring_1.html#SEC6">‘<samp>char *</samp>’ strings</a>, namely, they handle the Croatian +<small>LETTER DZ WITH CARON</small>, the German <small>LATIN SMALL LETTER SHARP S</small>, the +Greek sigma and the Lithuanian i correctly. +</p> + +<hr size="6"> +<a name="Case-mappings-of-characters"></a> +<a name="SEC49"></a> +<h2 class="section"> <a href="libunistring.html#TOC49">13.1 Case mappings of characters</a> </h2> + +<p>The following functions implement case mappings on Unicode characters — +for those cases only where the result of the mapping is a again a single +Unicode character. +</p> +<p>These mappings are locale and context independent. +</p> +<table class="cartouche" border="1"><tr><td> +<p><strong>WARNING!</strong> These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that treat an +entire string at once and are language aware. +</p></td></tr></table> + +<dl> +<dt><u>Function:</u> ucs4_t <b>uc_toupper</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX694"></a> +</dt> +<dd><p>Returns the uppercase mapping of the Unicode character <var>uc</var>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> ucs4_t <b>uc_tolower</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX695"></a> +</dt> +<dd><p>Returns the lowercase mapping of the Unicode character <var>uc</var>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> ucs4_t <b>uc_totitle</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX696"></a> +</dt> +<dd><p>Returns the titlecase mapping of the Unicode character <var>uc</var>. +</p> +<p>The titlecase mapping of a character is to be used when the character should +look like upper case and the following characters are lower cased. +</p> +<p>For most characters, this is the same as the uppercase mapping. There are +only few characters where the title case variant and the uuper case variant +are different. These characters occur in the Latin writing of the Croatian, +Bosnian, and Serbian languages. +</p> +<table> +<thead><tr><th><p> Lower case </p></th><th><p> Title case </p></th><th><p> Upper case +</p></th></tr></thead> +<tr><td><p> LATIN SMALL LETTER LJ + </p></td><td><p> LATIN CAPITAL LETTER L WITH SMALL LETTER J + </p></td><td><p> LATIN CAPITAL LETTER LJ +</p></td></tr> +<tr><td><p> LATIN SMALL LETTER NJ + </p></td><td><p> LATIN CAPITAL LETTER N WITH SMALL LETTER J + </p></td><td><p> LATIN CAPITAL LETTER NJ +</p></td></tr> +<tr><td><p> LATIN SMALL LETTER DZ + </p></td><td><p> LATIN CAPITAL LETTER D WITH SMALL LETTER Z + </p></td><td><p> LATIN CAPITAL LETTER DZ +</p></td></tr> +<tr><td><p> LATIN SMALL LETTER DZ WITH CARON + </p></td><td><p> LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + </p></td><td><p> LATIN CAPITAL LETTER DZ WITH CARON +</p></td></tr> +</table> +</dd></dl> + +<hr size="6"> +<a name="Case-mappings-of-strings"></a> +<a name="SEC50"></a> +<h2 class="section"> <a href="libunistring.html#TOC50">13.2 Case mappings of strings</a> </h2> + +<p>Case mapping should always be performed on entire strings, not on individual +characters. The functions in this sections do so. +</p> +<p>These functions allow to apply a normalization after the case mapping. The +reason is that if you want to treat ‘<samp>ä</samp>’ and ‘<samp>Ä</samp>’ the same, +you most often also want to treat the composed and decomposed forms of such +a character, U+00C4 <small>LATIN CAPITAL LETTER A WITH DIAERESIS</small> and +U+0041 <small>LATIN CAPITAL LETTER A</small> U+0308 <small>COMBINING DIAERESIS</small> the same. +The <var>nf</var> argument designates the normalization. +</p> +<a name="IDX697"></a> +<p>These functions are locale dependent. The <var>iso639_language</var> argument +identifies the language (e.g. <code>"tr"</code> for Turkish). NULL means to use +locale independent case mappings. +</p> +<dl> +<dt><u>Function:</u> const char * <b>uc_locale_language</b><i> ()</i> +<a name="IDX698"></a> +</dt> +<dd><p>Returns the ISO 639 language code of the current locale. +Returns <code>""</code> if it is unknown, or in the "C" locale. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_toupper</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX699"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_toupper</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX700"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_toupper</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX701"></a> +</dt> +<dd><p>Returns the uppercase mapping of a string. +</p> +<p>The <var>nf</var> argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_tolower</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX702"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_tolower</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX703"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_tolower</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX704"></a> +</dt> +<dd><p>Returns the lowercase mapping of a string. +</p> +<p>The <var>nf</var> argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_totitle</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX705"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_totitle</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX706"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_totitle</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX707"></a> +</dt> +<dd><p>Returns the titlecase mapping of a string. +</p> +<p>Mapping to title case means that, in each word, the first cased character +is being mapped to title case and the remaining characters of the word +are being mapped to lower case. +</p> +<p>The <var>nf</var> argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +</p></dd></dl> + +<hr size="6"> +<a name="Case-mappings-of-substrings"></a> +<a name="SEC51"></a> +<h2 class="section"> <a href="libunistring.html#TOC51">13.3 Case mappings of substrings</a> </h2> + +<p>Case mapping of a substring cannot simply be performed by extracting the +substring and then applying the case mapping function to it. This does not +work because case mapping requires some information about the surrounding +characters. The following functions allow to apply case mappings to +substrings of a given string, while taking into account the characters that +precede it (the “prefix”) and the characters that follow it (the “suffix”). +</p> +<dl> +<dt><u>Type:</u> <b>casing_prefix_context_t</b> +<a name="IDX708"></a> +</dt> +<dd><p>This data type denotes the case-mapping context that is given by a prefix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> casing_prefix_context_t <b>unicase_empty_prefix_context</b> +<a name="IDX709"></a> +</dt> +<dd><p>This constant is the case-mapping context that corresponds to an empty prefix +string. +</p></dd></dl> + +<p>The following functions return <code>casing_prefix_context_t</code> objects: +</p> +<dl> +<dt><u>Function:</u> casing_prefix_context_t <b>u8_casing_prefix_context</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX710"></a> +</dt> +<dt><u>Function:</u> casing_prefix_context_t <b>u16_casing_prefix_context</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX711"></a> +</dt> +<dt><u>Function:</u> casing_prefix_context_t <b>u32_casing_prefix_context</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX712"></a> +</dt> +<dd><p>Returns the case-mapping context of a given prefix string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> casing_prefix_context_t <b>u8_casing_prefixes_context</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>a_context</var>)</i> +<a name="IDX713"></a> +</dt> +<dt><u>Function:</u> casing_prefix_context_t <b>u16_casing_prefixes_context</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>a_context</var>)</i> +<a name="IDX714"></a> +</dt> +<dt><u>Function:</u> casing_prefix_context_t <b>u32_casing_prefixes_context</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>a_context</var>)</i> +<a name="IDX715"></a> +</dt> +<dd><p>Returns the case-mapping context of the prefix concat(<var>a</var>, <var>s</var>), +given the case-mapping context of the prefix <var>a</var>. +</p></dd></dl> + +<dl> +<dt><u>Type:</u> <b>casing_suffix_context_t</b> +<a name="IDX716"></a> +</dt> +<dd><p>This data type denotes the case-mapping context that is given by a suffix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> casing_suffix_context_t <b>unicase_empty_suffix_context</b> +<a name="IDX717"></a> +</dt> +<dd><p>This constant is the case-mapping context that corresponds to an empty suffix +string. +</p></dd></dl> + +<p>The following functions return <code>casing_suffix_context_t</code> objects: +</p> +<dl> +<dt><u>Function:</u> casing_suffix_context_t <b>u8_casing_suffix_context</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX718"></a> +</dt> +<dt><u>Function:</u> casing_suffix_context_t <b>u16_casing_suffix_context</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX719"></a> +</dt> +<dt><u>Function:</u> casing_suffix_context_t <b>u32_casing_suffix_context</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX720"></a> +</dt> +<dd><p>Returns the case-mapping context of a given suffix string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> casing_suffix_context_t <b>u8_casing_suffixes_context</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, casing_suffix_context_t <var>a_context</var>)</i> +<a name="IDX721"></a> +</dt> +<dt><u>Function:</u> casing_suffix_context_t <b>u16_casing_suffixes_context</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, casing_suffix_context_t <var>a_context</var>)</i> +<a name="IDX722"></a> +</dt> +<dt><u>Function:</u> casing_suffix_context_t <b>u32_casing_suffixes_context</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, casing_suffix_context_t <var>a_context</var>)</i> +<a name="IDX723"></a> +</dt> +<dd><p>Returns the case-mapping context of the suffix concat(<var>s</var>, <var>a</var>), +given the case-mapping context of the suffix <var>a</var>. +</p></dd></dl> + +<p>The following functions perform a case mapping, considering the +prefix context and the suffix context. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_ct_toupper</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX724"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_ct_toupper</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX725"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_ct_toupper</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX726"></a> +</dt> +<dd><p>Returns the uppercase mapping of a string that is surrounded by a prefix +and a suffix. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_ct_tolower</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX727"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_ct_tolower</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX728"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_ct_tolower</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX729"></a> +</dt> +<dd><p>Returns the lowercase mapping of a string that is surrounded by a prefix +and a suffix. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_ct_totitle</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX730"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_ct_totitle</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX731"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_ct_totitle</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX732"></a> +</dt> +<dd><p>Returns the titlecase mapping of a string that is surrounded by a prefix +and a suffix. +</p></dd></dl> + +<p>For example, to uppercase the UTF-8 substring between <code>s + start_index</code> +and <code>s + end_index</code> of a string that extends from <code>s</code> to +<code>s + u8_strlen (s)</code>, you can use the statements +</p> +<table><tr><td> </td><td><pre class="smallexample">size_t result_length; +uint8_t result = + u8_ct_toupper (s + start_index, end_index - start_index, + u8_casing_prefix_context (s, start_index), + u8_casing_suffix_context (s + end_index, + u8_strlen (s) - end_index), + iso639_language, NULL, NULL, &result_length); +</pre></td></tr></table> + +<hr size="6"> +<a name="Case-insensitive-comparison"></a> +<a name="SEC52"></a> +<h2 class="section"> <a href="libunistring.html#TOC52">13.4 Case insensitive comparison</a> </h2> + +<p>The following functions implement comparison that ignores differences in case +and normalization. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_casefold</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX733"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_casefold</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX734"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_casefold</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX735"></a> +</dt> +<dd><p>Returns the case folded string. +</p> +<p>Comparing <code>u8_casefold (<var>s1</var>)</code> and <code>u8_casefold (<var>s2</var>)</code> +with the <code>u8_cmp2</code> function is equivalent to comparing <var>s1</var> and +<var>s2</var> with <code>u8_casecmp</code>. +</p> +<p>The <var>nf</var> argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_ct_casefold</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX736"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_ct_casefold</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX737"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_ct_casefold</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, casing_prefix_context_t <var>prefix_context</var>, casing_suffix_context_t <var>suffix_context</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX738"></a> +</dt> +<dd><p>Returns the case folded string. The case folding takes into account the +case mapping contexts of the prefix and suffix strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_casecmp</b><i> (const uint8_t *<var>s1</var>, size_t <var>n1</var>, const uint8_t *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX739"></a> +</dt> +<dt><u>Function:</u> int <b>u16_casecmp</b><i> (const uint16_t *<var>s1</var>, size_t <var>n1</var>, const uint16_t *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX740"></a> +</dt> +<dt><u>Function:</u> int <b>u32_casecmp</b><i> (const uint32_t *<var>s1</var>, size_t <var>n1</var>, const uint32_t *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX741"></a> +</dt> +<dt><u>Function:</u> int <b>ulc_casecmp</b><i> (const char *<var>s1</var>, size_t <var>n1</var>, const char *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX742"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, ignoring differences in case and normalization. +</p> +<p>The <var>nf</var> argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +</p> +<p>If successful, sets <code>*<var>resultp</var></code> to -1 if <var>s1</var> < <var>s2</var>, +0 if <var>s1</var> = <var>s2</var>, 1 if <var>s1</var> > <var>s2</var>, and returns 0. +Upon failure, returns -1 with <code>errno</code> set. +</p></dd></dl> + +<a name="IDX743"></a> +<a name="IDX744"></a> +<a name="IDX745"></a> +<a name="IDX746"></a> +<p>The following functions additionally take into account the sorting rules of the +current locale. +</p> +<dl> +<dt><u>Function:</u> char * <b>u8_casexfrm</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX747"></a> +</dt> +<dt><u>Function:</u> char * <b>u16_casexfrm</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX748"></a> +</dt> +<dt><u>Function:</u> char * <b>u32_casexfrm</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX749"></a> +</dt> +<dt><u>Function:</u> char * <b>ulc_casexfrm</b><i> (const char *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX750"></a> +</dt> +<dd><p>Converts the string <var>s</var> of length <var>n</var> to a NUL-terminated byte +sequence, in such a way that comparing <code>u8_casexfrm (<var>s1</var>)</code> and +<code>u8_casexfrm (<var>s2</var>)</code> with the gnulib function <code>memcmp2</code> is +equivalent to comparing <var>s1</var> and <var>s2</var> with <code>u8_casecoll</code>. +</p> +<p><var>nf</var> must be either <code>UNINORM_NFC</code>, <code>UNINORM_NFKC</code>, or NULL for +no normalization. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_casecoll</b><i> (const uint8_t *<var>s1</var>, size_t <var>n1</var>, const uint8_t *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX751"></a> +</dt> +<dt><u>Function:</u> int <b>u16_casecoll</b><i> (const uint16_t *<var>s1</var>, size_t <var>n1</var>, const uint16_t *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX752"></a> +</dt> +<dt><u>Function:</u> int <b>u32_casecoll</b><i> (const uint32_t *<var>s1</var>, size_t <var>n1</var>, const uint32_t *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX753"></a> +</dt> +<dt><u>Function:</u> int <b>ulc_casecoll</b><i> (const char *<var>s1</var>, size_t <var>n1</var>, const char *<var>s2</var>, size_t <var>n2</var>, const char *<var>iso639_language</var>, uninorm_t <var>nf</var>, int *<var>resultp</var>)</i> +<a name="IDX754"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, ignoring differences in case and normalization, +using the collation rules of the current locale. +</p> +<p>The <var>nf</var> argument identifies the normalization form to apply after the +case-mapping. It must be either <code>UNINORM_NFC</code> or <code>UNINORM_NFKC</code>. +It can also be NULL, for no normalization. +</p> +<p>If successful, sets <code>*<var>resultp</var></code> to -1 if <var>s1</var> < <var>s2</var>, +0 if <var>s1</var> = <var>s2</var>, 1 if <var>s1</var> > <var>s2</var>, and returns 0. +Upon failure, returns -1 with <code>errno</code> set. +</p></dd></dl> + +<hr size="6"> +<a name="Case-detection"></a> +<a name="SEC53"></a> +<h2 class="section"> <a href="libunistring.html#TOC53">13.5 Case detection</a> </h2> + +<p>The following functions determine whether a Unicode string is entirely in +upper case. or entirely in lower case, or entirely in title case, or already +case-folded. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_is_uppercase</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX755"></a> +</dt> +<dt><u>Function:</u> int <b>u16_is_uppercase</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX756"></a> +</dt> +<dt><u>Function:</u> int <b>u32_is_uppercase</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX757"></a> +</dt> +<dd><p>Sets <code>*<var>resultp</var></code> to true if mapping NFD(<var>s</var>) to upper case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +<code>errno</code> set. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_is_lowercase</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX758"></a> +</dt> +<dt><u>Function:</u> int <b>u16_is_lowercase</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX759"></a> +</dt> +<dt><u>Function:</u> int <b>u32_is_lowercase</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX760"></a> +</dt> +<dd><p>Sets <code>*<var>resultp</var></code> to true if mapping NFD(<var>s</var>) to lower case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +<code>errno</code> set. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_is_titlecase</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX761"></a> +</dt> +<dt><u>Function:</u> int <b>u16_is_titlecase</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX762"></a> +</dt> +<dt><u>Function:</u> int <b>u32_is_titlecase</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX763"></a> +</dt> +<dd><p>Sets <code>*<var>resultp</var></code> to true if mapping NFD(<var>s</var>) to title case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +<code>errno</code> set. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_is_casefolded</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX764"></a> +</dt> +<dt><u>Function:</u> int <b>u16_is_casefolded</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX765"></a> +</dt> +<dt><u>Function:</u> int <b>u32_is_casefolded</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX766"></a> +</dt> +<dd><p>Sets <code>*<var>resultp</var></code> to true if applying case folding to NFD(<var>S</var>) is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +<code>errno</code> set. +</p></dd></dl> + +<p>The following functions determine whether case mappings have any effect on a +Unicode string. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_is_cased</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX767"></a> +</dt> +<dt><u>Function:</u> int <b>u16_is_cased</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX768"></a> +</dt> +<dt><u>Function:</u> int <b>u32_is_cased</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>iso639_language</var>, bool *<var>resultp</var>)</i> +<a name="IDX769"></a> +</dt> +<dd><p>Sets <code>*<var>resultp</var></code> to true if case matters for <var>s</var>, that is, if +mapping NFD(<var>s</var>) to either upper case or lower case or title case is not +a no-op. Set <code>*<var>resultp</var></code> to false if NFD(<var>s</var>) maps to itself +under the upper case mapping, under the lower case mapping, and under the title +case mapping; in other words, when NFD(<var>s</var>) consists entirely of caseless +characters. Upon failure, returns -1 with <code>errno</code> set. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC48" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_14.html#SEC54" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_14.html b/doc/libunistring_14.html new file mode 100644 index 00000000..f0b21257 --- /dev/null +++ b/doc/libunistring_14.html @@ -0,0 +1,87 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 14. Regular expressions <uniregex.h></title> + +<meta name="description" content="GNU libunistring: 14. Regular expressions <uniregex.h>"> +<meta name="keywords" content="GNU libunistring: 14. Regular expressions <uniregex.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_13.html#SEC48" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_15.html#SEC55" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="uniregex_002eh"></a> +<a name="SEC54"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC54">14. Regular expressions <code><uniregex.h></code></a> </h1> + +<p>This include file is not yet implemented. +</p> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_13.html#SEC48" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_15.html#SEC55" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_15.html b/doc/libunistring_15.html new file mode 100644 index 00000000..7c7ac329 --- /dev/null +++ b/doc/libunistring_15.html @@ -0,0 +1,232 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 15. Using the library</title> + +<meta name="description" content="GNU libunistring: 15. Using the library"> +<meta name="keywords" content="GNU libunistring: 15. Using the library"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_14.html#SEC54" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_16.html#SEC61" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="Using-the-library"></a> +<a name="SEC55"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC55">15. Using the library</a> </h1> + +<p>This chapter explains some practical considerations, regarding the +installation and compiler options that are needed in order to use this +library. +</p> + +<hr size="6"> +<a name="Installation"></a> +<a name="SEC56"></a> +<h2 class="section"> <a href="libunistring.html#TOC56">15.1 Installation</a> </h2> + +<p>Before you can use the library, it must be installed. First, you have to +make sure all dependencies are installed. They are listed in the file +‘<tt>DEPENDENCIES</tt>’. +</p> +<a name="IDX770"></a> +<p>Then you can proceed to build and install the library, as described in the +file ‘<tt>INSTALL</tt>’. For installation on Windows systems, please refer to +the file ‘<tt>README.woe32</tt>’. +</p> +<hr size="6"> +<a name="Compiler-options"></a> +<a name="SEC57"></a> +<h2 class="section"> <a href="libunistring.html#TOC57">15.2 Compiler options</a> </h2> + +<p>Let's denote as <code>LIBUNISTRING_PREFIX</code> the value of the ‘<samp>--prefix</samp>’ +option that you passed to <code>configure</code> while installing this package. +If you didn't pass any ‘<samp>--prefix</samp>’ option, then the package is installed +in ‘<tt>/usr/local</tt>’. +</p> +<p>Let's denote as <code>LIBUNISTRING_INCLUDEDIR</code> the directory where the +include files were installed. This is usually the same as +<code>${LIBUNISTRING_PREFIX}/include</code>. Except that if you passed an +‘<samp>--includedir</samp>’ option to <code>configure</code>, it is the value of that +option. +</p> +<p>Let's further denote as <code>LIBUNISTRING_LIBDIR</code> the directory where +the library itself was installed. This is the value that you passed +with the ‘<samp>--libdir</samp>’ option to <code>configure</code>, or otherwise the +same as <code>${LIBUNISTRING_PREFIX}/lib</code>. Recall that when building +in 64-bit mode on a 64-bit GNU/Linux system that supports executables +in either 64-bit mode or 32-bit mode, you should have used the option +<code>--libdir=${LIBUNISTRING_PREFIX}/lib64</code>. +</p> +<a name="IDX771"></a> +<p>So that the compiler finds the include files, you have to pass it the +option <code>-I${LIBUNISTRING_INCLUDEDIR}</code>. +</p> +<p>So that the compiler finds the library during its linking pass, you have +to pass it the options <code>-L${LIBUNISTRING_LIBDIR} -lunistring</code>. +On some systems, in some configurations, you also have to pass options +needed for linking with <code>libiconv</code>. The autoconf macro +<code>gl_LIBUNISTRING</code> (see <a href="#SEC59">Autoconf macro</a>) deals with this +particularity. +</p> +<hr size="6"> +<a name="Include-files"></a> +<a name="SEC58"></a> +<h2 class="section"> <a href="libunistring.html#TOC58">15.3 Include files</a> </h2> + +<p>Most of the include files have been presented in the introduction, see +<a href="libunistring_1.html#SEC1">Introduction</a>, and subsequent detailed chapters. +</p> +<p>Another include file is <code><unistring/version.h></code>. It contains the +version number of the libunistring library. +</p> +<dl> +<dt><u>Macro:</u> int <b>_LIBUNISTRING_VERSION</b> +<a name="IDX772"></a> +</dt> +<dd><p>This constant contains the version of libunistring that is being used +at compile time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form <code>(major<<8) + minor</code>. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>_libunistring_version</b> +<a name="IDX773"></a> +</dt> +<dd><p>This constant contains the version of libunistring that is being used +at run time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form <code>(major<<8) + minor</code>. +</p></dd></dl> + +<p>It is possible that <code>_libunistring_version</code> is greater than +<code>_LIBUNISTRING_VERSION</code>. This can happen when you use +<code>libunistring</code> as a shared library, and a newer, binary +backward-compatible version has been installed after your program +that uses <code>libunistring</code> was installed. +</p> +<hr size="6"> +<a name="Autoconf-macro"></a> +<a name="SEC59"></a> +<h2 class="section"> <a href="libunistring.html#TOC59">15.4 Autoconf macro</a> </h2> + +<p>GNU Gnulib provides an autoconf macro that tests for the availability +of <code>libunistring</code>. It is contained in the Gnulib module +‘<samp>libunistring</samp>’, see +<a href="http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring">http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring</a>. +</p> +<a name="IDX774"></a> +<p>The macro is called <code>gl_LIBUNISTRING</code>. It searches for an installed +libunistring. If found, it sets and AC_SUBSTs <code>HAVE_LIBUNISTRING=yes</code> +and the <code>LIBUNISTRING</code> and <code>LTLIBUNISTRING</code> variables and augments +the <code>CPPFLAGS</code> variable, and defines the C macro +<code>HAVE_LIBUNISTRING</code> to 1. Otherwise, it sets and AC_SUBSTs +<code>HAVE_LIBUNISTRING=no</code> and <code>LIBUNISTRING</code> and <code>LTLIBUNISTRING</code> +to empty. +</p> +<p>The complexities that <code>gl_LIBUNISTRING</code> deals with are the following: +</p> +<ul> +<li> +On some operating systems, in some configurations, libunistring depends +on <code>libiconv</code>, and the options for linking with libiconv must be +mentioned explicitly on the link command line. + +</li><li> +GNU <code>libunistring</code>, if installed, is not necessarily already in the +search path (<code>CPPFLAGS</code> for the include file search path, +<code>LDFLAGS</code> for the library search path). + +</li><li> +GNU <code>libunistring</code>, if installed, is not necessarily already in the +run time library search path. To avoid the need for setting an environment +variable like <code>LD_LIBRARY_PATH</code>, the macro adds the appropriate +run time search path options to the <code>LIBUNISTRING</code> variable. This works +on most systems. +</li></ul> + +<hr size="6"> +<a name="Reporting-problems"></a> +<a name="SEC60"></a> +<h2 class="section"> <a href="libunistring.html#TOC60">15.5 Reporting problems</a> </h2> + +<p>If you encounter any problem, please don't hesitate to send a detailed +bug report to the <code>bug-libunistring@gnu.org</code> mailing list. You can +alternatively also use the bug tracker at the project page +<a href="https://savannah.gnu.org/projects/libunistring">https://savannah.gnu.org/projects/libunistring</a>. +</p> +<p>Please always include the version number of this library, and a short +description of your operating system and compilation environment with +corresponding version numbers. +</p> +<p>For problems that appear while building and installing <code>libunistring</code>, +for which you don't find the remedy in the ‘<tt>INSTALL</tt>’ file, please include +a description of the options that you passed to the ‘<samp>configure</samp>’ script. +</p> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC55" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_16.html#SEC61" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_16.html b/doc/libunistring_16.html new file mode 100644 index 00000000..bfe61265 --- /dev/null +++ b/doc/libunistring_16.html @@ -0,0 +1,93 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 16. More advanced functionality</title> + +<meta name="description" content="GNU libunistring: 16. More advanced functionality"> +<meta name="keywords" content="GNU libunistring: 16. More advanced functionality"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_15.html#SEC55" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_17.html#SEC62" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="More-functionality"></a> +<a name="SEC61"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC61">16. More advanced functionality</a> </h1> + +<p>For bidirectional reordering of strings, we recommend the GNU FriBidi library: +<a href="http://www.fribidi.org/">http://www.fribidi.org/</a>. +</p> +<a name="IDX775"></a> +<p>For the rendering of Unicode strings outside of the context of a given toolkit +(KDE/Qt or GNOME/Gtk), we recommend the Pango library: +<a href="http://www.pango.org/">http://www.pango.org/</a>. +</p> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_15.html#SEC55" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_17.html#SEC62" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_17.html b/doc/libunistring_17.html new file mode 100644 index 00000000..73ade2f2 --- /dev/null +++ b/doc/libunistring_17.html @@ -0,0 +1,1526 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: A. Licenses</title> + +<meta name="description" content="GNU libunistring: A. Licenses"> +<meta name="keywords" content="GNU libunistring: A. Licenses"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_16.html#SEC61" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="Licenses"></a> +<a name="SEC62"></a> +<h1 class="appendix"> <a href="libunistring.html#TOC62">A. Licenses</a> </h1> + +<p>The files of this package are covered by the licenses indicated in each +particular file or directory. Here is a summary: +</p> +<ul> +<li> +The <code>libunistring</code> library is covered by the +GNU Lesser General Public License (LGPL). +A copy of the license is included in <a href="#SEC68">GNU LESSER GENERAL PUBLIC LICENSE</a>. + +</li><li> +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. +<br> +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in <a href="#SEC69">GNU Free Documentation License</a>. +<br> +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in <a href="#SEC63">GNU GENERAL PUBLIC LICENSE</a>. +</li></ul> + + + +<hr size="6"> +<a name="GNU-GPL"></a> +<a name="SEC63"></a> +<h2 class="appendixsec"> <a href="libunistring.html#TOC63">A.1 GNU GENERAL PUBLIC LICENSE</a> </h2> +<p align="center"> Version 3, 29 June 2007 +</p> + +<table><tr><td> </td><td><pre class="display">Copyright © 2007 Free Software Foundation, Inc. <a href="http://fsf.org/">http://fsf.org/</a> + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. +</pre></td></tr></table> + +<a name="SEC64"></a> +<h2 class="heading"> Preamble </h2> + +<p>The GNU General Public License is a free, copyleft license for +software and other kinds of works. +</p> +<p>The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom +to share and change all versions of a program—to make sure it remains +free software for all its users. We, the Free Software Foundation, +use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You +can apply it to your programs, too. +</p> +<p>When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. +</p> +<p>To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the +software, or if you modify it: responsibilities to respect the freedom +of others. +</p> +<p>For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, +receive or can get the source code. And you must show them these +terms so they know their rights. +</p> +<p>Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. +</p> +<p>For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. +</p> +<p>Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those +domains in future versions of the GPL, as needed to protect the +freedom of users. +</p> +<p>Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish +to avoid the special danger that patents applied to a free program +could make it effectively proprietary. To prevent this, the GPL +assures that patents cannot be used to render the program non-free. +</p> +<p>The precise terms and conditions for copying, distribution and +modification follow. +</p> +<a name="SEC65"></a> +<h2 class="heading"> TERMS AND CONDITIONS </h2> + +<ol> +<li> Definitions. + +<p>“This License” refers to version 3 of the GNU General Public License. +</p> +<p>“Copyright” also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. +</p> +<p>“The Program” refers to any copyrightable work licensed under this +License. Each licensee is addressed as “you”. “Licensees” and +“recipients” may be individuals or organizations. +</p> +<p>To “modify” a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a “modified version” of +the earlier work or a work “based on” the earlier work. +</p> +<p>A “covered work” means either the unmodified Program or a work based +on the Program. +</p> +<p>To “propagate” a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. +</p> +<p>To “convey” a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. +</p> +<p>An interactive user interface displays “Appropriate Legal Notices” to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. +</p> +</li><li> Source Code. + +<p>The “source code” for a work means the preferred form of the work for +making modifications to it. “Object code” means any non-source form +of a work. +</p> +<p>A “Standard Interface” means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. +</p> +<p>The “System Libraries” of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +“Major Component”, in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. +</p> +<p>The “Corresponding Source” for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. +</p> +<p>The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. +</p> +<p>The Corresponding Source for a work in source code form is that same +work. +</p> +</li><li> Basic Permissions. + +<p>All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. +</p> +<p>You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. +</p> +<p>Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. +</p> +</li><li> Protecting Users' Legal Rights From Anti-Circumvention Law. + +<p>No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. +</p> +<p>When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. +</p> +</li><li> Conveying Verbatim Copies. + +<p>You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. +</p> +<p>You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. +</p> +</li><li> Conveying Modified Source Versions. + +<p>You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: +</p> +<ol> +<li> +The work must carry prominent notices stating that you modified it, +and giving a relevant date. + +</li><li> +The work must carry prominent notices stating that it is released +under this License and any conditions added under section 7. This +requirement modifies the requirement in section 4 to “keep intact all +notices”. + +</li><li> +You must license the entire work, as a whole, under this License to +anyone who comes into possession of a copy. This License will +therefore apply, along with any applicable section 7 additional terms, +to the whole of the work, and all its parts, regardless of how they +are packaged. This License gives no permission to license the work in +any other way, but it does not invalidate such permission if you have +separately received it. + +</li><li> +If the work has interactive user interfaces, each must display +Appropriate Legal Notices; however, if the Program has interactive +interfaces that do not display Appropriate Legal Notices, your work +need not make them do so. +</li></ol> + +<p>A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +“aggregate” if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. +</p> +</li><li> Conveying Non-Source Forms. + +<p>You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: +</p> +<ol> +<li> +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by the +Corresponding Source fixed on a durable physical medium customarily +used for software interchange. + +</li><li> +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by a written +offer, valid for at least three years and valid for as long as you +offer spare parts or customer support for that product model, to give +anyone who possesses the object code either (1) a copy of the +Corresponding Source for all the software in the product that is +covered by this License, on a durable physical medium customarily used +for software interchange, for a price no more than your reasonable +cost of physically performing this conveying of source, or (2) access +to copy the Corresponding Source from a network server at no charge. + +</li><li> +Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is +allowed only occasionally and noncommercially, and only if you +received the object code with such an offer, in accord with subsection +6b. + +</li><li> +Convey the object code by offering access from a designated place +(gratis or for a charge), and offer equivalent access to the +Corresponding Source in the same way through the same place at no +further charge. You need not require recipients to copy the +Corresponding Source along with the object code. If the place to copy +the object code is a network server, the Corresponding Source may be +on a different server (operated by you or a third party) that supports +equivalent copying facilities, provided you maintain clear directions +next to the object code saying where to find the Corresponding Source. +Regardless of what server hosts the Corresponding Source, you remain +obligated to ensure that it is available for as long as needed to +satisfy these requirements. + +</li><li> +Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of +the work are being offered to the general public at no charge under +subsection 6d. + +</li></ol> + +<p>A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. +</p> +<p>A “User Product” is either (1) a “consumer product”, which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +“normally used” refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. +</p> +<p>“Installation Information” for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. +</p> +<p>If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). +</p> +<p>The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. +</p> +<p>Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. +</p> +</li><li> Additional Terms. + +<p>“Additional permissions” are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. +</p> +<p>When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. +</p> +<p>Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: +</p> +<ol> +<li> +Disclaiming warranty or limiting liability differently from the terms +of sections 15 and 16 of this License; or + +</li><li> +Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices +displayed by works containing it; or + +</li><li> +Prohibiting misrepresentation of the origin of that material, or +requiring that modified versions of such material be marked in +reasonable ways as different from the original version; or + +</li><li> +Limiting the use for publicity purposes of names of licensors or +authors of the material; or + +</li><li> +Declining to grant rights under trademark law for use of some trade +names, trademarks, or service marks; or + +</li><li> +Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with +contractual assumptions of liability to the recipient, for any +liability that these contractual assumptions directly impose on those +licensors and authors. +</li></ol> + +<p>All other non-permissive additional terms are considered “further +restrictions” within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. +</p> +<p>If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. +</p> +<p>Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. +</p> +</li><li> Termination. + +<p>You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). +</p> +<p>However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. +</p> +<p>Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. +</p> +<p>Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. +</p> +</li><li> Acceptance Not Required for Having Copies. + +<p>You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. +</p> +</li><li> Automatic Licensing of Downstream Recipients. + +<p>Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. +</p> +<p>An “entity transaction” is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. +</p> +<p>You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. +</p> +</li><li> Patents. + +<p>A “contributor” is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's “contributor version”. +</p> +<p>A contributor's “essential patent claims” are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, “control” includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. +</p> +<p>Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. +</p> +<p>In the following three paragraphs, a “patent license” is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To “grant” such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. +</p> +<p>If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. “Knowingly relying” means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. +</p> +<p>If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. +</p> +<p>A patent license is “discriminatory” if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. +</p> +<p>Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. +</p> +</li><li> No Surrender of Others' Freedom. + +<p>If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey +a covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree +to terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. +</p> +</li><li> Use with the GNU Affero General Public License. + +<p>Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. +</p> +</li><li> Revised Versions of this License. + +<p>The Free Software Foundation may publish revised and/or new versions +of the GNU General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. +</p> +<p>Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU General Public +License “or any later version” applies to it, you have the option of +following the terms and conditions either of that numbered version or +of any later version published by the Free Software Foundation. If +the Program does not specify a version number of the GNU General +Public License, you may choose any version ever published by the Free +Software Foundation. +</p> +<p>If the Program specifies that a proxy can decide which future versions +of the GNU General Public License can be used, that proxy's public +statement of acceptance of a version permanently authorizes you to +choose that version for the Program. +</p> +<p>Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. +</p> +</li><li> Disclaimer of Warranty. + +<p>THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. +</p> +</li><li> Limitation of Liability. + +<p>IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +</p> +</li><li> Interpretation of Sections 15 and 16. + +<p>If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. +</p> +</li></ol> + +<a name="SEC66"></a> +<h2 class="heading"> END OF TERMS AND CONDITIONS </h2> + +<a name="SEC67"></a> +<h2 class="heading"> How to Apply These Terms to Your New Programs </h2> + +<p>If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. +</p> +<p>To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the “copyright” line and a pointer to where the full notice is found. +</p> +<table><tr><td> </td><td><pre class="smallexample"><var>one line to give the program's name and a brief idea of what it does.</var> +Copyright (C) <var>year</var> <var>name of author</var> + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <a href="http://www.gnu.org/licenses/">http://www.gnu.org/licenses/</a>. +</pre></td></tr></table> + +<p>Also add information on how to contact you by electronic and paper mail. +</p> +<p>If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: +</p> +<table><tr><td> </td><td><pre class="smallexample"><var>program</var> Copyright (C) <var>year</var> <var>name of author</var> +This program comes with ABSOLUTELY NO WARRANTY; for details type ‘<samp>show w</samp>’. +This is free software, and you are welcome to redistribute it +under certain conditions; type ‘<samp>show c</samp>’ for details. +</pre></td></tr></table> + +<p>The hypothetical commands ‘<samp>show w</samp>’ and ‘<samp>show c</samp>’ should show +the appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an “about box”. +</p> +<p>You should also get your employer (if you work as a programmer) or school, +if any, to sign a “copyright disclaimer” for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<a href="http://www.gnu.org/licenses/">http://www.gnu.org/licenses/</a>. +</p> +<p>The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use +the GNU Lesser General Public License instead of this License. But +first, please read <a href="http://www.gnu.org/philosophy/why-not-lgpl.html">http://www.gnu.org/philosophy/why-not-lgpl.html</a>. + +</p><hr size="6"> +<a name="GNU-LGPL"></a> +<a name="SEC68"></a> +<h2 class="appendixsec"> <a href="libunistring.html#TOC64">A.2 GNU LESSER GENERAL PUBLIC LICENSE</a> </h2> +<p align="center"> Version 3, 29 June 2007 +</p> + +<table><tr><td> </td><td><pre class="display">Copyright © 2007 Free Software Foundation, Inc. <a href="http://fsf.org/">http://fsf.org/</a> + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. +</pre></td></tr></table> + +<p>This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. +</p> +<ol> +<li> Additional Definitions. + +<p>As used herein, “this License” refers to version 3 of the GNU Lesser +General Public License, and the “GNU GPL” refers to version 3 of the GNU +General Public License. +</p> +<p>“The Library” refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. +</p> +<p>An “Application” is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. +</p> +<p>A “Combined Work” is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the “Linked +Version”. +</p> +<p>The “Minimal Corresponding Source” for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. +</p> +<p>The “Corresponding Application Code” for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. +</p> +</li><li> Exception to Section 3 of the GNU GPL. + +<p>You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. +</p> +</li><li> Conveying Modified Versions. + +<p>If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: +</p> +<ol> +<li> +under this License, provided that you make a good faith effort to +ensure that, in the event an Application does not supply the +function or data, the facility still operates, and performs +whatever part of its purpose remains meaningful, or + +</li><li> +under the GNU GPL, with none of the additional permissions of +this License applicable to that copy. +</li></ol> + +</li><li> Object Code Incorporating Material from Library Header Files. + +<p>The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: +</p> +<ol> +<li> +Give prominent notice with each copy of the object code that the +Library is used in it and that the Library and its use are +covered by this License. +</li><li> +Accompany the object code with a copy of the GNU GPL and this license +document. +</li></ol> + +</li><li> Combined Works. + +<p>You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: +</p> +<ol> +<li> +Give prominent notice with each copy of the Combined Work that +the Library is used in it and that the Library and its use are +covered by this License. +</li><li> +Accompany the Combined Work with a copy of the GNU GPL and this license +document. +</li><li> +For a Combined Work that displays copyright notices during +execution, include the copyright notice for the Library among +these notices, as well as a reference directing the user to the +copies of the GNU GPL and this license document. +</li><li> +Do one of the following: + +<ol> +<li> +Convey the Minimal Corresponding Source under the terms of this +License, and the Corresponding Application Code in a form +suitable for, and under terms that permit, the user to +recombine or relink the Application with a modified version of +the Linked Version to produce a modified Combined Work, in the +manner specified by section 6 of the GNU GPL for conveying +Corresponding Source. +</li><li> +Use a suitable shared library mechanism for linking with the +Library. A suitable mechanism is one that (a) uses at run time +a copy of the Library already present on the user's computer +system, and (b) will operate properly with a modified version +of the Library that is interface-compatible with the Linked +Version. +</li></ol> + +</li><li> +Provide Installation Information, but only if you would otherwise +be required to provide such information under section 6 of the +GNU GPL, and only to the extent that such information is +necessary to install and execute a modified version of the +Combined Work produced by recombining or relinking the +Application with a modified version of the Linked Version. (If +you use option 4d0, the Installation Information must accompany +the Minimal Corresponding Source and Corresponding Application +Code. If you use option 4d1, you must provide the Installation +Information in the manner specified by section 6 of the GNU GPL +for conveying Corresponding Source.) +</li></ol> + +</li><li> Combined Libraries. + +<p>You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: +</p> +<ol> +<li> +Accompany the combined library with a copy of the same work based +on the Library, uncombined with any other library facilities, +conveyed under the terms of this License. +</li><li> +Give prominent notice with the combined library that part of it +is a work based on the Library, and explaining where to find the +accompanying uncombined form of the same work. +</li></ol> + +</li><li> Revised Versions of the GNU Lesser General Public License. + +<p>The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. +</p> +<p>Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License “or any later version” +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. +</p> +<p>If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. +</p> +</li></ol> + +<hr size="6"> +<a name="GNU-FDL"></a> +<a name="SEC69"></a> +<h2 class="appendixsec"> <a href="libunistring.html#TOC65">A.3 GNU Free Documentation License</a> </h2> +<p align="center"> Version 1.3, 3 November 2008 +</p> + +<table><tr><td> </td><td><pre class="display">Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +<a href="http://fsf.org/">http://fsf.org/</a> + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +</pre></td></tr></table> + +<ol> +<li> +PREAMBLE + +<p>The purpose of this License is to make a manual, textbook, or other +functional and useful document <em>free</em> in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. +</p> +<p>This License is a kind of “copyleft”, which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. +</p> +<p>We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. +</p> +</li><li> +APPLICABILITY AND DEFINITIONS + +<p>This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The “Document”, below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as “you”. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. +</p> +<p>A “Modified Version” of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. +</p> +<p>A “Secondary Section” is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. +</p> +<p>The “Invariant Sections” are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. +</p> +<p>The “Cover Texts” are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. +</p> +<p>A “Transparent” copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not “Transparent” is called “Opaque”. +</p> +<p>Examples of suitable formats for Transparent copies include plain +<small>ASCII</small> without markup, Texinfo input format, LaTeX input +format, <acronym>SGML</acronym> or <acronym>XML</acronym> using a publicly available +<acronym>DTD</acronym>, and standard-conforming simple <acronym>HTML</acronym>, +PostScript or <acronym>PDF</acronym> designed for human modification. Examples +of transparent image formats include <acronym>PNG</acronym>, <acronym>XCF</acronym> and +<acronym>JPG</acronym>. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, <acronym>SGML</acronym> or +<acronym>XML</acronym> for which the <acronym>DTD</acronym> and/or processing tools are +not generally available, and the machine-generated <acronym>HTML</acronym>, +PostScript or <acronym>PDF</acronym> produced by some word processors for +output purposes only. +</p> +<p>The “Title Page” means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, “Title Page” means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. +</p> +<p>The “publisher” means any person or entity that distributes copies +of the Document to the public. +</p> +<p>A section “Entitled XYZ” means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as “Acknowledgements”, +“Dedications”, “Endorsements”, or “History”.) To “Preserve the Title” +of such a section when you modify the Document means that it remains a +section “Entitled XYZ” according to this definition. +</p> +<p>The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. +</p> +</li><li> +VERBATIM COPYING + +<p>You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. +</p> +<p>You may also lend copies, under the same conditions stated above, and +you may publicly display copies. +</p> +</li><li> +COPYING IN QUANTITY + +<p>If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. +</p> +<p>If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. +</p> +<p>If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. +</p> +<p>It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. +</p> +</li><li> +MODIFICATIONS + +<p>You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: +</p> +<ol> +<li> +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +</li><li> +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +</li><li> +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +</li><li> +Preserve all the copyright notices of the Document. + +</li><li> +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +</li><li> +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +</li><li> +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +</li><li> +Include an unaltered copy of this License. + +</li><li> +Preserve the section Entitled “History”, Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled “History” in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +</li><li> +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the “History” section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +</li><li> +For any section Entitled “Acknowledgements” or “Dedications”, Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +</li><li> +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +</li><li> +Delete any section Entitled “Endorsements”. Such a section +may not be included in the Modified Version. + +</li><li> +Do not retitle any existing section to be Entitled “Endorsements” or +to conflict in title with any Invariant Section. + +</li><li> +Preserve any Warranty Disclaimers. +</li></ol> + +<p>If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. +</p> +<p>You may add a section Entitled “Endorsements”, provided it contains +nothing but endorsements of your Modified Version by various +parties—for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. +</p> +<p>You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. +</p> +<p>The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. +</p> +</li><li> +COMBINING DOCUMENTS + +<p>You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. +</p> +<p>The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. +</p> +<p>In the combination, you must combine any sections Entitled “History” +in the various original documents, forming one section Entitled +“History”; likewise combine any sections Entitled “Acknowledgements”, +and any sections Entitled “Dedications”. You must delete all +sections Entitled “Endorsements.” +</p> +</li><li> +COLLECTIONS OF DOCUMENTS + +<p>You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. +</p> +<p>You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. +</p> +</li><li> +AGGREGATION WITH INDEPENDENT WORKS + +<p>A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an “aggregate” if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. +</p> +<p>If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. +</p> +</li><li> +TRANSLATION + +<p>Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. +</p> +<p>If a section in the Document is Entitled “Acknowledgements”, +“Dedications”, or “History”, the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. +</p> +</li><li> +TERMINATION + +<p>You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. +</p> +<p>However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. +</p> +<p>Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. +</p> +<p>Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. +</p> +</li><li> +FUTURE REVISIONS OF THIS LICENSE + +<p>The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +<a href="http://www.gnu.org/copyleft/">http://www.gnu.org/copyleft/</a>. +</p> +<p>Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License “or any later version” applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. +</p> +</li><li> +RELICENSING + +<p>“Massive Multiauthor Collaboration Site” (or “MMC Site”) means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +“Massive Multiauthor Collaboration” (or “MMC”) contained in the +site means any set of copyrightable works thus published on the MMC +site. +</p> +<p>“CC-BY-SA” means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. +</p> +<p>“Incorporate” means to publish or republish a Document, in whole or +in part, as part of another Document. +</p> +<p>An MMC is “eligible for relicensing” if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. +</p> +<p>The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. +</p> +</li></ol> + + +<a name="SEC70"></a> +<h2 class="heading"> ADDENDUM: How to use this License for your documents </h2> + +<p>To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: +</p> +<table><tr><td> </td><td><pre class="smallexample"> Copyright (C) <var>year</var> <var>your name</var>. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +</pre></td></tr></table> + +<p>If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the “with…Texts.” line with this: +</p> +<table><tr><td> </td><td><pre class="smallexample"> with the Invariant Sections being <var>list their titles</var>, with + the Front-Cover Texts being <var>list</var>, and with the Back-Cover Texts + being <var>list</var>. +</pre></td></tr></table> + +<p>If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. +</p> +<p>If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. +</p> + + +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC62" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_18.html b/doc/libunistring_18.html new file mode 100644 index 00000000..8237bf6a --- /dev/null +++ b/doc/libunistring_18.html @@ -0,0 +1,770 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: Index</title> + +<meta name="description" content="GNU libunistring: Index"> +<meta name="keywords" content="GNU libunistring: Index"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_17.html#SEC62" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_19.html#INDEX0" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="Index"></a> +<a name="SEC71"></a> +<h1 class="unnumbered"> <a href="libunistring.html#TOC66">Index</a> </h1> + +<table><tr><th valign="top">Jump to: </th><td><a href="#SEC71_0" class="summary-letter"><b>A</b></a> + +<a href="#SEC71_1" class="summary-letter"><b>B</b></a> + +<a href="#SEC71_2" class="summary-letter"><b>C</b></a> + +<a href="#SEC71_3" class="summary-letter"><b>D</b></a> + +<a href="#SEC71_4" class="summary-letter"><b>E</b></a> + +<a href="#SEC71_5" class="summary-letter"><b>F</b></a> + +<a href="#SEC71_6" class="summary-letter"><b>G</b></a> + +<a href="#SEC71_7" class="summary-letter"><b>H</b></a> + +<a href="#SEC71_8" class="summary-letter"><b>I</b></a> + +<a href="#SEC71_9" class="summary-letter"><b>J</b></a> + +<a href="#SEC71_10" class="summary-letter"><b>L</b></a> + +<a href="#SEC71_11" class="summary-letter"><b>M</b></a> + +<a href="#SEC71_12" class="summary-letter"><b>N</b></a> + +<a href="#SEC71_13" class="summary-letter"><b>O</b></a> + +<a href="#SEC71_14" class="summary-letter"><b>P</b></a> + +<a href="#SEC71_15" class="summary-letter"><b>R</b></a> + +<a href="#SEC71_16" class="summary-letter"><b>S</b></a> + +<a href="#SEC71_17" class="summary-letter"><b>T</b></a> + +<a href="#SEC71_18" class="summary-letter"><b>U</b></a> + +<a href="libunistring_19.html#INDEX0_0" class="summary-letter"><b>V</b></a> + +<a href="libunistring_19.html#INDEX0_1" class="summary-letter"><b>W</b></a> + +</td></tr></table> +<table border="0" class="index-cp"> +<tr><td></td><th align="left">Index Entry</th><th align="left"> Section</th></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_0">A</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX604">ambiguous width</a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_2.html#IDX14">argument conventions</a></td><td valign="top"><a href="libunistring_2.html#SEC9">2. Conventions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#SEC59">autoconf macro</a></td><td valign="top"><a href="libunistring_15.html#SEC59">15.4 Autoconf macro</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_1">B</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC25">bidirectional category</a></td><td valign="top"><a href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_16.html#SEC61">bidirectional reordering</a></td><td valign="top"><a href="libunistring_16.html#SEC61">16. More advanced functionality</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC34">block</a></td><td valign="top"><a href="libunistring_8.html#SEC34">8.10 Blocks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#SEC41">breaks, line</a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#SEC38">breaks, word</a></td><td valign="top"><a href="libunistring_10.html#SEC38">10. Word breaks in strings <code><uniwbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#SEC60">bug reports</a></td><td valign="top"><a href="libunistring_15.html#SEC60">15.5 Reporting problems</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#SEC60">bug tracker</a></td><td valign="top"><a href="libunistring_15.html#SEC60">15.5 Reporting problems</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_2">C</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#SEC6">C string functions</a></td><td valign="top"><a href="libunistring_1.html#SEC6">1.5 ‘<samp>char *</samp>’ strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC35">C, programming language</a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC36">C-like API</a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC24">canonical combining class</a></td><td valign="top"><a href="libunistring_8.html#SEC24">8.2 Canonical combining class</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC53">case detection</a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC50">case mappings</a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX708"><code>casing_prefix_context_t</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX716"><code>casing_suffix_context_t</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX10">char, type</a></td><td valign="top"><a href="libunistring_1.html#SEC6">1.5 ‘<samp>char *</samp>’ strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC44">combining, Unicode characters</a></td><td valign="top"><a href="libunistring_12.html#SEC44">12.2 Composition of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX54">comparing</a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX110">comparing</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC52">comparing, ignoring case</a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX743">comparing, ignoring case, with collation rules</a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC46">comparing, ignoring normalization</a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC52">comparing, ignoring normalization and case</a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX745">comparing, ignoring normalization and case, with collation rules</a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX681">comparing, ignoring normalization, with collation rules</a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX114">comparing, with collation rules</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX744">comparing, with collation rules, ignoring case</a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX682">comparing, with collation rules, ignoring normalization</a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX746">comparing, with collation rules, ignoring normalization and case</a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#IDX771">compiler options</a></td><td valign="top"><a href="libunistring_15.html#SEC57">15.2 Compiler options</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC44">composing, Unicode characters</a></td><td valign="top"><a href="libunistring_12.html#SEC44">12.2 Composition of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#SEC13">converting</a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX160">converting</a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX44">copying</a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX91">copying</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX65">counting</a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_3">D</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC43">decomposing</a></td><td valign="top"><a href="libunistring_12.html#SEC43">12.1 Decomposition of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#SEC56">dependencies</a></td><td valign="top"><a href="libunistring_15.html#SEC56">15.1 Installation</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC53">detecting case</a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#SEC15">duplicating</a></td><td valign="top"><a href="libunistring_4.html#SEC15">4.4 Elementary string functions with memory allocation</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX121">duplicating</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_4">E</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX156"><code>enum iconv_ilseq_handler</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_5">F</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC69">FDL, GNU Free Documentation License</a></td><td valign="top"><a href="libunistring_17.html#SEC69">A.3 GNU Free Documentation License</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#SEC18">formatted output</a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX607">fullwidth</a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_6">G</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC21">general category</a></td><td valign="top"><a href="libunistring_8.html#SEC21">8.1 General category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#IDX774"><code>gl_LIBUNISTRING</code></a></td><td valign="top"><a href="libunistring_15.html#SEC59">15.4 Autoconf macro</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC63">GPL, GNU General Public License</a></td><td valign="top"><a href="libunistring_17.html#SEC63">A.1 GNU GENERAL PUBLIC LICENSE</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_7">H</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX606">halfwidth</a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_8">I</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC35">identifiers</a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#IDX770">installation</a></td><td valign="top"><a href="libunistring_15.html#SEC56">15.1 Installation</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#SEC3">internationalization</a></td><td valign="top"><a href="libunistring_1.html#SEC3">1.2 Unicode and Internationalization</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#SEC14">iterating</a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX75">iterating</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_9">J</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC35">Java, programming language</a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_10">L</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC68">LGPL, GNU Lesser General Public License</a></td><td valign="top"><a href="libunistring_17.html#SEC68">A.2 GNU LESSER GENERAL PUBLIC LICENSE</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC69">License, GNU FDL</a></td><td valign="top"><a href="libunistring_17.html#SEC69">A.3 GNU Free Documentation License</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC63">License, GNU GPL</a></td><td valign="top"><a href="libunistring_17.html#SEC63">A.1 GNU GENERAL PUBLIC LICENSE</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC68">License, GNU LGPL</a></td><td valign="top"><a href="libunistring_17.html#SEC68">A.2 GNU LESSER GENERAL PUBLIC LICENSE</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_17.html#SEC62">Licenses</a></td><td valign="top"><a href="libunistring_17.html#SEC62">A. Licenses</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#SEC41">line breaks</a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#SEC4">locale</a></td><td valign="top"><a href="libunistring_1.html#SEC4">1.3 Locale encodings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX7">locale categories</a></td><td valign="top"><a href="libunistring_1.html#SEC4">1.3 Locale encodings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX8">locale encoding</a></td><td valign="top"><a href="libunistring_1.html#SEC4">1.3 Locale encodings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX154">locale encoding</a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX697">locale language</a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX9">locale, multibyte</a></td><td valign="top"><a href="libunistring_1.html#SEC6">1.5 ‘<samp>char *</samp>’ strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX155"><code>locale_charset</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC50">lowercasing</a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_11">M</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_15.html#SEC60">mailing list</a></td><td valign="top"><a href="libunistring_15.html#SEC60">15.5 Reporting problems</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC29">mirroring, of Unicode character</a></td><td valign="top"><a href="libunistring_8.html#SEC29">8.7 Mirrored character</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_12">N</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC42">normal forms</a></td><td valign="top"><a href="libunistring_12.html#SEC42">12. Normalization forms (composition and decomposition) <code><uninorm.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC42">normalizing</a></td><td valign="top"><a href="libunistring_12.html#SEC42">12. Normalization forms (composition and decomposition) <code><uninorm.h></code></a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_13">O</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#SEC18">output, formatted</a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_14">P</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC30">properties, of Unicode character</a></td><td valign="top"><a href="libunistring_8.html#SEC30">8.8 Properties</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_15">R</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_14.html#SEC54">regular expression</a></td><td valign="top"><a href="libunistring_14.html#SEC54">14. Regular expressions <code><uniregex.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_16.html#IDX775">rendering</a></td><td valign="top"><a href="libunistring_16.html#SEC61">16. More advanced functionality</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_2.html#IDX15">return value conventions</a></td><td valign="top"><a href="libunistring_2.html#SEC9">2. Conventions</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_16">S</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC33">scripts</a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX61">searching, for a character</a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX125">searching, for a character</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX141">searching, for a substring</a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#SEC47">stream, normalizing a</a></td><td valign="top"><a href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX689"><code>struct uninorm_filter</code></a></td><td valign="top"><a href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_17">T</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC50">titlecasing</a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="SEC71_18">U</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX206"><code>u16_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX205"><code>u16_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX740"><code>u16_casecmp</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX752"><code>u16_casecoll</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX734"><code>u16_casefold</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX748"><code>u16_casexfrm</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX711"><code>u16_casing_prefix_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX714"><code>u16_casing_prefixes_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX719"><code>u16_casing_suffix_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX722"><code>u16_casing_suffixes_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX21"><code>u16_check</code></a></td><td valign="top"><a href="libunistring_4.html#SEC12">4.1 Elementary string checks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX63"><code>u16_chr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX56"><code>u16_cmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX59"><code>u16_cmp2</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX162"><code>u16_conv_from_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX165"><code>u16_conv_to_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX46"><code>u16_cpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX70"><code>u16_cpy_alloc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC15">4.4 Elementary string functions with memory allocation</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX737"><code>u16_ct_casefold</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX728"><code>u16_ct_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX731"><code>u16_ct_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX725"><code>u16_ct_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX149"><code>u16_endswith</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX768"><code>u16_is_cased</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX765"><code>u16_is_casefolded</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX759"><code>u16_is_lowercase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX762"><code>u16_is_titlecase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX756"><code>u16_is_uppercase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX30"><code>u16_mblen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX67"><code>u16_mbsnlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX36"><code>u16_mbtouc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX33"><code>u16_mbtouc_unsafe</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX39"><code>u16_mbtoucr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX49"><code>u16_move</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX80"><code>u16_next</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX676"><code>u16_normalize</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX679"><code>u16_normcmp</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX687"><code>u16_normcoll</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX684"><code>u16_normxfrm</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX639"><code>u16_possible_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX83"><code>u16_prev</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX52"><code>u16_set</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX204"><code>u16_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX203"><code>u16_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX146"><code>u16_startswith</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX96"><code>u16_stpcpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX102"><code>u16_stpncpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX105"><code>u16_strcat</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX127"><code>u16_strchr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX112"><code>u16_strcmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX116"><code>u16_strcoll</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX168"><code>u16_strconv_from_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX174"><code>u16_strconv_from_locale</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX171"><code>u16_strconv_to_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX177"><code>u16_strconv_to_locale</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX93"><code>u16_strcpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX133"><code>u16_strcspn</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX123"><code>u16_strdup</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX86"><code>u16_strlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX73"><code>u16_strmblen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX77"><code>u16_strmbtouc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX108"><code>u16_strncat</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX119"><code>u16_strncmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX99"><code>u16_strncpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX89"><code>u16_strnlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX139"><code>u16_strpbrk</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX130"><code>u16_strrchr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX136"><code>u16_strspn</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX143"><code>u16_strstr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX152"><code>u16_strtok</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX613"><code>u16_strwidth</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX26"><code>u16_to_u32</code></a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX25"><code>u16_to_u8</code></a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX703"><code>u16_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX706"><code>u16_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX700"><code>u16_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX214"><code>u16_u16_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX213"><code>u16_u16_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX212"><code>u16_u16_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX211"><code>u16_u16_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX218"><code>u16_u16_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX217"><code>u16_u16_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX216"><code>u16_u16_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX215"><code>u16_u16_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX42"><code>u16_uctomb</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX210"><code>u16_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX209"><code>u16_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX208"><code>u16_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX207"><code>u16_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX610"><code>u16_width</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX643"><code>u16_width_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#IDX616"><code>u16_wordbreaks</code></a></td><td valign="top"><a href="libunistring_10.html#SEC39">10.1 Word breaks in a string</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX222"><code>u32_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX221"><code>u32_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX741"><code>u32_casecmp</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX753"><code>u32_casecoll</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX735"><code>u32_casefold</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX749"><code>u32_casexfrm</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX712"><code>u32_casing_prefix_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX715"><code>u32_casing_prefixes_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX720"><code>u32_casing_suffix_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX723"><code>u32_casing_suffixes_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX22"><code>u32_check</code></a></td><td valign="top"><a href="libunistring_4.html#SEC12">4.1 Elementary string checks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX64"><code>u32_chr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX57"><code>u32_cmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX60"><code>u32_cmp2</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX163"><code>u32_conv_from_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX166"><code>u32_conv_to_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX47"><code>u32_cpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX71"><code>u32_cpy_alloc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC15">4.4 Elementary string functions with memory allocation</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX738"><code>u32_ct_casefold</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX729"><code>u32_ct_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX732"><code>u32_ct_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX726"><code>u32_ct_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX150"><code>u32_endswith</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX769"><code>u32_is_cased</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX766"><code>u32_is_casefolded</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX760"><code>u32_is_lowercase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX763"><code>u32_is_titlecase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX757"><code>u32_is_uppercase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX31"><code>u32_mblen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX68"><code>u32_mbsnlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX37"><code>u32_mbtouc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX34"><code>u32_mbtouc_unsafe</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX40"><code>u32_mbtoucr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX50"><code>u32_move</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX81"><code>u32_next</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX677"><code>u32_normalize</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX680"><code>u32_normcmp</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX688"><code>u32_normcoll</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX685"><code>u32_normxfrm</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX640"><code>u32_possible_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX84"><code>u32_prev</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX53"><code>u32_set</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX220"><code>u32_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX219"><code>u32_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX147"><code>u32_startswith</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX97"><code>u32_stpcpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX103"><code>u32_stpncpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX106"><code>u32_strcat</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX128"><code>u32_strchr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX113"><code>u32_strcmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX117"><code>u32_strcoll</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX169"><code>u32_strconv_from_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX175"><code>u32_strconv_from_locale</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX172"><code>u32_strconv_to_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX178"><code>u32_strconv_to_locale</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX94"><code>u32_strcpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX134"><code>u32_strcspn</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX124"><code>u32_strdup</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX87"><code>u32_strlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX74"><code>u32_strmblen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX78"><code>u32_strmbtouc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX109"><code>u32_strncat</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX120"><code>u32_strncmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX100"><code>u32_strncpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX90"><code>u32_strnlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX140"><code>u32_strpbrk</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX131"><code>u32_strrchr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX137"><code>u32_strspn</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX144"><code>u32_strstr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX153"><code>u32_strtok</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX614"><code>u32_strwidth</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX28"><code>u32_to_u16</code></a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX27"><code>u32_to_u8</code></a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX704"><code>u32_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX707"><code>u32_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX701"><code>u32_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX230"><code>u32_u32_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX229"><code>u32_u32_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX228"><code>u32_u32_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX227"><code>u32_u32_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX234"><code>u32_u32_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX233"><code>u32_u32_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX232"><code>u32_u32_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX231"><code>u32_u32_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX43"><code>u32_uctomb</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX226"><code>u32_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX225"><code>u32_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX224"><code>u32_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX223"><code>u32_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX611"><code>u32_width</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX644"><code>u32_width_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#IDX617"><code>u32_wordbreaks</code></a></td><td valign="top"><a href="libunistring_10.html#SEC39">10.1 Word breaks in a string</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX190"><code>u8_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX189"><code>u8_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX739"><code>u8_casecmp</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX751"><code>u8_casecoll</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX733"><code>u8_casefold</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX747"><code>u8_casexfrm</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX710"><code>u8_casing_prefix_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX713"><code>u8_casing_prefixes_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX718"><code>u8_casing_suffix_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX721"><code>u8_casing_suffixes_context</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX20"><code>u8_check</code></a></td><td valign="top"><a href="libunistring_4.html#SEC12">4.1 Elementary string checks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX62"><code>u8_chr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX55"><code>u8_cmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX58"><code>u8_cmp2</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX161"><code>u8_conv_from_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX164"><code>u8_conv_to_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX45"><code>u8_cpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX69"><code>u8_cpy_alloc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC15">4.4 Elementary string functions with memory allocation</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX736"><code>u8_ct_casefold</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX727"><code>u8_ct_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX730"><code>u8_ct_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX724"><code>u8_ct_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX148"><code>u8_endswith</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX767"><code>u8_is_cased</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX764"><code>u8_is_casefolded</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX758"><code>u8_is_lowercase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX761"><code>u8_is_titlecase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX755"><code>u8_is_uppercase</code></a></td><td valign="top"><a href="libunistring_13.html#SEC53">13.5 Case detection</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX29"><code>u8_mblen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX66"><code>u8_mbsnlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX35"><code>u8_mbtouc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX32"><code>u8_mbtouc_unsafe</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX38"><code>u8_mbtoucr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX48"><code>u8_move</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX79"><code>u8_next</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX675"><code>u8_normalize</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX678"><code>u8_normcmp</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX686"><code>u8_normcoll</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX683"><code>u8_normxfrm</code></a></td><td valign="top"><a href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX638"><code>u8_possible_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX82"><code>u8_prev</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX51"><code>u8_set</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX188"><code>u8_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX187"><code>u8_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX145"><code>u8_startswith</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX95"><code>u8_stpcpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX101"><code>u8_stpncpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX104"><code>u8_strcat</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX126"><code>u8_strchr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX111"><code>u8_strcmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX115"><code>u8_strcoll</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX167"><code>u8_strconv_from_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX173"><code>u8_strconv_from_locale</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX170"><code>u8_strconv_to_encoding</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_5.html#IDX176"><code>u8_strconv_to_locale</code></a></td><td valign="top"><a href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX92"><code>u8_strcpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX132"><code>u8_strcspn</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX122"><code>u8_strdup</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX85"><code>u8_strlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX72"><code>u8_strmblen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX76"><code>u8_strmbtouc</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX107"><code>u8_strncat</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX118"><code>u8_strncmp</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX98"><code>u8_strncpy</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX88"><code>u8_strnlen</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX138"><code>u8_strpbrk</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX129"><code>u8_strrchr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX135"><code>u8_strspn</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX142"><code>u8_strstr</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX151"><code>u8_strtok</code></a></td><td valign="top"><a href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX612"><code>u8_strwidth</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX23"><code>u8_to_u16</code></a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX24"><code>u8_to_u32</code></a></td><td valign="top"><a href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX702"><code>u8_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX705"><code>u8_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX699"><code>u8_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX198"><code>u8_u8_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX197"><code>u8_u8_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX196"><code>u8_u8_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX195"><code>u8_u8_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX202"><code>u8_u8_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX201"><code>u8_u8_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX200"><code>u8_u8_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX199"><code>u8_u8_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#IDX41"><code>u8_uctomb</code></a></td><td valign="top"><a href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX194"><code>u8_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX193"><code>u8_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX192"><code>u8_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX191"><code>u8_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX609"><code>u8_width</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX642"><code>u8_width_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#IDX615"><code>u8_wordbreaks</code></a></td><td valign="top"><a href="libunistring_10.html#SEC39">10.1 Word breaks in a string</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX581"><code>uc_all_blocks</code></a></td><td valign="top"><a href="libunistring_8.html#SEC34">8.10 Blocks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX576"><code>uc_all_scripts</code></a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX402"><code>uc_bidi_category</code></a></td><td valign="top"><a href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX401"><code>uc_bidi_category_byname</code></a></td><td valign="top"><a href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX400"><code>uc_bidi_category_name</code></a></td><td valign="top"><a href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX579"><code>uc_block</code></a></td><td valign="top"><a href="libunistring_8.html#SEC34">8.10 Blocks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX577"><code>uc_block_t</code></a></td><td valign="top"><a href="libunistring_8.html#SEC34">8.10 Blocks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX589"><code>uc_c_ident_category</code></a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX665"><code>uc_canonical_decomposition</code></a></td><td valign="top"><a href="libunistring_12.html#SEC43">12.1 Decomposition of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX380"><code>uc_combining_class</code></a></td><td valign="top"><a href="libunistring_8.html#SEC24">8.2 Canonical combining class</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX666"><code>uc_composition</code></a></td><td valign="top"><a href="libunistring_12.html#SEC44">12.2 Composition of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX404"><code>uc_decimal_value</code></a></td><td valign="top"><a href="libunistring_8.html#SEC26">8.4 Decimal digit value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX664"><code>uc_decomposition</code></a></td><td valign="top"><a href="libunistring_12.html#SEC43">12.1 Decomposition of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX405"><code>uc_digit_value</code></a></td><td valign="top"><a href="libunistring_8.html#SEC27">8.5 Digit value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX406"><code>uc_fraction_t</code></a></td><td valign="top"><a href="libunistring_8.html#SEC28">8.6 Numeric value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX321"><code>uc_general_category</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX317"><code>uc_general_category_and</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX318"><code>uc_general_category_and_not</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX320"><code>uc_general_category_byname</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX319"><code>uc_general_category_name</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX316"><code>uc_general_category_or</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX241"><code>uc_general_category_t</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX592"><code>uc_is_alnum</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX593"><code>uc_is_alpha</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX403"><code>uc_is_bidi_category</code></a></td><td valign="top"><a href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX603"><code>uc_is_blank</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX580"><code>uc_is_block</code></a></td><td valign="top"><a href="libunistring_8.html#SEC34">8.10 Blocks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX582"><code>uc_is_c_whitespace</code></a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX594"><code>uc_is_cntrl</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX595"><code>uc_is_digit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX322"><code>uc_is_general_category</code></a></td><td valign="top"><a href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX360"><code>uc_is_general_category_withtable</code></a></td><td valign="top"><a href="libunistring_8.html#SEC23">8.1.2 The bit mask API for general category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX596"><code>uc_is_graph</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX583"><code>uc_is_java_whitespace</code></a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX597"><code>uc_is_lower</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX598"><code>uc_is_print</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX491"><code>uc_is_property</code></a></td><td valign="top"><a href="libunistring_8.html#SEC31">8.8.1 Properties as objects – the object oriented API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX493"><code>uc_is_property_alphabetic</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX540"><code>uc_is_property_ascii_hex_digit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX529"><code>uc_is_property_bidi_arabic_digit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX525"><code>uc_is_property_bidi_arabic_right_to_left</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX531"><code>uc_is_property_bidi_block_separator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX535"><code>uc_is_property_bidi_boundary_neutral</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX530"><code>uc_is_property_bidi_common_separator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX522"><code>uc_is_property_bidi_control</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX537"><code>uc_is_property_bidi_embedding_or_override</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX527"><code>uc_is_property_bidi_eur_num_separator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX528"><code>uc_is_property_bidi_eur_num_terminator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX526"><code>uc_is_property_bidi_european_digit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX524"><code>uc_is_property_bidi_hebrew_right_to_left</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX523"><code>uc_is_property_bidi_left_to_right</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX534"><code>uc_is_property_bidi_non_spacing_mark</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX538"><code>uc_is_property_bidi_other_neutral</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX536"><code>uc_is_property_bidi_pdf</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX532"><code>uc_is_property_bidi_segment_separator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX533"><code>uc_is_property_bidi_whitespace</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX564"><code>uc_is_property_combining</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX565"><code>uc_is_property_composite</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX559"><code>uc_is_property_currency_symbol</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX551"><code>uc_is_property_dash</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX566"><code>uc_is_property_decimal_digit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX496"><code>uc_is_property_default_ignorable_code_point</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX498"><code>uc_is_property_deprecated</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX568"><code>uc_is_property_diacritic</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX569"><code>uc_is_property_extender</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX550"><code>uc_is_property_format_control</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX518"><code>uc_is_property_grapheme_base</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX519"><code>uc_is_property_grapheme_extend</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX521"><code>uc_is_property_grapheme_link</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX539"><code>uc_is_property_hex_digit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX552"><code>uc_is_property_hyphen</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX511"><code>uc_is_property_id_continue</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX509"><code>uc_is_property_id_start</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX541"><code>uc_is_property_ideographic</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX544"><code>uc_is_property_ids_binary_operator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX545"><code>uc_is_property_ids_trinary_operator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX570"><code>uc_is_property_ignorable_control</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX549"><code>uc_is_property_iso_control</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX517"><code>uc_is_property_join_control</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX563"><code>uc_is_property_left_of_pair</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX554"><code>uc_is_property_line_separator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX499"><code>uc_is_property_logical_order_exception</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX505"><code>uc_is_property_lowercase</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX560"><code>uc_is_property_math</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX548"><code>uc_is_property_non_break</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX495"><code>uc_is_property_not_a_character</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX567"><code>uc_is_property_numeric</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX494"><code>uc_is_property_other_alphabetic</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX497"><code>uc_is_property_other_default_ignorable_code_point</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX520"><code>uc_is_property_other_grapheme_extend</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX512"><code>uc_is_property_other_id_continue</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX510"><code>uc_is_property_other_id_start</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX506"><code>uc_is_property_other_lowercase</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX561"><code>uc_is_property_other_math</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX504"><code>uc_is_property_other_uppercase</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX562"><code>uc_is_property_paired_punctuation</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX555"><code>uc_is_property_paragraph_separator</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX516"><code>uc_is_property_pattern_syntax</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX515"><code>uc_is_property_pattern_white_space</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX501"><code>uc_is_property_private_use</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX553"><code>uc_is_property_punctuation</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX556"><code>uc_is_property_quotation_mark</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX543"><code>uc_is_property_radical</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX557"><code>uc_is_property_sentence_terminal</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX508"><code>uc_is_property_soft_dotted</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX547"><code>uc_is_property_space</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX558"><code>uc_is_property_terminal_punctuation</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX507"><code>uc_is_property_titlecase</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX502"><code>uc_is_property_unassigned_code_value</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX542"><code>uc_is_property_unified_ideograph</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX503"><code>uc_is_property_uppercase</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX500"><code>uc_is_property_variation_selector</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX492"><code>uc_is_property_white_space</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX514"><code>uc_is_property_xid_continue</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX513"><code>uc_is_property_xid_start</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX546"><code>uc_is_property_zero_width</code></a></td><td valign="top"><a href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX599"><code>uc_is_punct</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX575"><code>uc_is_script</code></a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX600"><code>uc_is_space</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX601"><code>uc_is_upper</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX602"><code>uc_is_xdigit</code></a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX591"><code>uc_java_ident_category</code></a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX698"><code>uc_locale_language</code></a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX408"><code>uc_mirror_char</code></a></td><td valign="top"><a href="libunistring_8.html#SEC29">8.7 Mirrored character</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX407"><code>uc_numeric_value</code></a></td><td valign="top"><a href="libunistring_8.html#SEC28">8.6 Numeric value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX489"><code>uc_property_byname</code></a></td><td valign="top"><a href="libunistring_8.html#SEC31">8.8.1 Properties as objects – the object oriented API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX490"><code>uc_property_is_valid</code></a></td><td valign="top"><a href="libunistring_8.html#SEC31">8.8.1 Properties as objects – the object oriented API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX409"><code>uc_property_t</code></a></td><td valign="top"><a href="libunistring_8.html#SEC31">8.8.1 Properties as objects – the object oriented API</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX573"><code>uc_script</code></a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX574"><code>uc_script_byname</code></a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX571"><code>uc_script_t</code></a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX695"><code>uc_tolower</code></a></td><td valign="top"><a href="libunistring_13.html#SEC49">13.1 Case mappings of characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX696"><code>uc_totitle</code></a></td><td valign="top"><a href="libunistring_13.html#SEC49">13.1 Case mappings of characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX694"><code>uc_toupper</code></a></td><td valign="top"><a href="libunistring_13.html#SEC49">13.1 Case mappings of characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX608"><code>uc_width</code></a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#IDX632"><code>uc_wordbreak_property</code></a></td><td valign="top"><a href="libunistring_10.html#SEC40">10.2 Word break property</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX6">UCS-4</a></td><td valign="top"><a href="libunistring_1.html#SEC2">1.1 Unicode</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_3.html#IDX19"><code>ucs4_t</code></a></td><td valign="top"><a href="libunistring_3.html#SEC10">3. Elementary types <code><unitypes.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_3.html#IDX17"><code>uint16_t</code></a></td><td valign="top"><a href="libunistring_3.html#SEC10">3. Elementary types <code><unitypes.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_3.html#IDX18"><code>uint32_t</code></a></td><td valign="top"><a href="libunistring_3.html#SEC10">3. Elementary types <code><unitypes.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_3.html#IDX16"><code>uint8_t</code></a></td><td valign="top"><a href="libunistring_3.html#SEC10">3. Elementary types <code><unitypes.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX182"><code>ulc_asnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX181"><code>ulc_asprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX742"><code>ulc_casecmp</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX754"><code>ulc_casecoll</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#IDX750"><code>ulc_casexfrm</code></a></td><td valign="top"><a href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX235"><code>ulc_fprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX641"><code>ulc_possible_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX180"><code>ulc_snprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX179"><code>ulc_sprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX186"><code>ulc_vasnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX185"><code>ulc_vasprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX236"><code>ulc_vfprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX184"><code>ulc_vsnprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_6.html#IDX183"><code>ulc_vsprintf</code></a></td><td valign="top"><a href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#IDX645"><code>ulc_width_linebreaks</code></a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#IDX618"><code>ulc_wordbreaks</code></a></td><td valign="top"><a href="libunistring_10.html#SEC39">10.1 Word breaks in a string</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#SEC2">Unicode</a></td><td valign="top"><a href="libunistring_1.html#SEC2">1.1 Unicode</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC25">Unicode character, bidirectional category</a></td><td valign="top"><a href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX578">Unicode character, block</a></td><td valign="top"><a href="libunistring_8.html#SEC34">8.10 Blocks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC24">Unicode character, canonical combining class</a></td><td valign="top"><a href="libunistring_8.html#SEC24">8.2 Canonical combining class</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC49">Unicode character, case mappings</a></td><td valign="top"><a href="libunistring_13.html#SEC49">13.1 Case mappings of characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC21">Unicode character, classification</a></td><td valign="top"><a href="libunistring_8.html#SEC21">8.1 General category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC36">Unicode character, classification like in C</a></td><td valign="top"><a href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC21">Unicode character, general category</a></td><td valign="top"><a href="libunistring_8.html#SEC21">8.1 General category</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC29">Unicode character, mirroring</a></td><td valign="top"><a href="libunistring_8.html#SEC29">8.7 Mirrored character</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_7.html#SEC19">Unicode character, name</a></td><td valign="top"><a href="libunistring_7.html#SEC19">7. Names of Unicode characters <code><uniname.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC30">Unicode character, properties</a></td><td valign="top"><a href="libunistring_8.html#SEC30">8.8 Properties</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX572">Unicode character, script</a></td><td valign="top"><a href="libunistring_8.html#SEC33">8.9 Scripts</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX588">Unicode character, validity in C identifiers</a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#IDX590">Unicode character, validity in Java identifiers</a></td><td valign="top"><a href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC26">Unicode character, value</a></td><td valign="top"><a href="libunistring_8.html#SEC26">8.4 Decimal digit value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC27">Unicode character, value</a></td><td valign="top"><a href="libunistring_8.html#SEC27">8.5 Digit value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC28">Unicode character, value</a></td><td valign="top"><a href="libunistring_8.html#SEC28">8.6 Numeric value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#IDX605">Unicode character, width</a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_7.html#IDX238"><code>unicode_character_name</code></a></td><td valign="top"><a href="libunistring_7.html#SEC19">7. Names of Unicode characters <code><uniname.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_7.html#IDX239"><code>unicode_name_character</code></a></td><td valign="top"><a href="libunistring_7.html#SEC19">7. Names of Unicode characters <code><uniname.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX674"><code>uninorm_decomposing_form</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX690"><code>uninorm_filter_create</code></a></td><td valign="top"><a href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX692"><code>uninorm_filter_flush</code></a></td><td valign="top"><a href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX693"><code>uninorm_filter_free</code></a></td><td valign="top"><a href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX691"><code>uninorm_filter_write</code></a></td><td valign="top"><a href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX672"><code>uninorm_is_compat_decomposing</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX673"><code>uninorm_is_composing</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_12.html#IDX667"><code>uninorm_t</code></a></td><td valign="top"><a href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_13.html#SEC50">uppercasing</a></td><td valign="top"><a href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX1">use cases</a></td><td valign="top"><a href="libunistring_1.html#SEC1">1. Introduction</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX4">UTF-16</a></td><td valign="top"><a href="libunistring_1.html#SEC2">1.1 Unicode</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX12">UTF-16, strings</a></td><td valign="top"><a href="libunistring_1.html#SEC8">1.7 Unicode strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX5">UTF-32</a></td><td valign="top"><a href="libunistring_1.html#SEC2">1.1 Unicode</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX13">UTF-32, strings</a></td><td valign="top"><a href="libunistring_1.html#SEC8">1.7 Unicode strings</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX3">UTF-8</a></td><td valign="top"><a href="libunistring_1.html#SEC2">1.1 Unicode</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX11">UTF-8, strings</a></td><td valign="top"><a href="libunistring_1.html#SEC8">1.7 Unicode strings</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +</table> +<table><tr><th valign="top">Jump to: </th><td><a href="#SEC71_0" class="summary-letter"><b>A</b></a> + +<a href="#SEC71_1" class="summary-letter"><b>B</b></a> + +<a href="#SEC71_2" class="summary-letter"><b>C</b></a> + +<a href="#SEC71_3" class="summary-letter"><b>D</b></a> + +<a href="#SEC71_4" class="summary-letter"><b>E</b></a> + +<a href="#SEC71_5" class="summary-letter"><b>F</b></a> + +<a href="#SEC71_6" class="summary-letter"><b>G</b></a> + +<a href="#SEC71_7" class="summary-letter"><b>H</b></a> + +<a href="#SEC71_8" class="summary-letter"><b>I</b></a> + +<a href="#SEC71_9" class="summary-letter"><b>J</b></a> + +<a href="#SEC71_10" class="summary-letter"><b>L</b></a> + +<a href="#SEC71_11" class="summary-letter"><b>M</b></a> + +<a href="#SEC71_12" class="summary-letter"><b>N</b></a> + +<a href="#SEC71_13" class="summary-letter"><b>O</b></a> + +<a href="#SEC71_14" class="summary-letter"><b>P</b></a> + +<a href="#SEC71_15" class="summary-letter"><b>R</b></a> + +<a href="#SEC71_16" class="summary-letter"><b>S</b></a> + +<a href="#SEC71_17" class="summary-letter"><b>T</b></a> + +<a href="#SEC71_18" class="summary-letter"><b>U</b></a> + +<a href="libunistring_19.html#INDEX0_0" class="summary-letter"><b>V</b></a> + +<a href="libunistring_19.html#INDEX0_1" class="summary-letter"><b>W</b></a> + +</td></tr></table> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_17.html#SEC62" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_19.html#INDEX0" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_19.html b/doc/libunistring_19.html new file mode 100644 index 00000000..5974e2f6 --- /dev/null +++ b/doc/libunistring_19.html @@ -0,0 +1,188 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: Index: V – W</title> + +<meta name="description" content="GNU libunistring: Index: V – W"> +<meta name="keywords" content="GNU libunistring: Index: V – W"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[ >> ]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="INDEX0"></a> +<h1 class="unnumbered"> Index: V – W </h1> +<table><tr><th valign="top">Jump to: </th><td><a href="libunistring_18.html#SEC71_0" class="summary-letter"><b>A</b></a> + +<a href="libunistring_18.html#SEC71_1" class="summary-letter"><b>B</b></a> + +<a href="libunistring_18.html#SEC71_2" class="summary-letter"><b>C</b></a> + +<a href="libunistring_18.html#SEC71_3" class="summary-letter"><b>D</b></a> + +<a href="libunistring_18.html#SEC71_4" class="summary-letter"><b>E</b></a> + +<a href="libunistring_18.html#SEC71_5" class="summary-letter"><b>F</b></a> + +<a href="libunistring_18.html#SEC71_6" class="summary-letter"><b>G</b></a> + +<a href="libunistring_18.html#SEC71_7" class="summary-letter"><b>H</b></a> + +<a href="libunistring_18.html#SEC71_8" class="summary-letter"><b>I</b></a> + +<a href="libunistring_18.html#SEC71_9" class="summary-letter"><b>J</b></a> + +<a href="libunistring_18.html#SEC71_10" class="summary-letter"><b>L</b></a> + +<a href="libunistring_18.html#SEC71_11" class="summary-letter"><b>M</b></a> + +<a href="libunistring_18.html#SEC71_12" class="summary-letter"><b>N</b></a> + +<a href="libunistring_18.html#SEC71_13" class="summary-letter"><b>O</b></a> + +<a href="libunistring_18.html#SEC71_14" class="summary-letter"><b>P</b></a> + +<a href="libunistring_18.html#SEC71_15" class="summary-letter"><b>R</b></a> + +<a href="libunistring_18.html#SEC71_16" class="summary-letter"><b>S</b></a> + +<a href="libunistring_18.html#SEC71_17" class="summary-letter"><b>T</b></a> + +<a href="libunistring_18.html#SEC71_18" class="summary-letter"><b>U</b></a> + +<a href="#INDEX0_0" class="summary-letter"><b>V</b></a> + +<a href="#INDEX0_1" class="summary-letter"><b>W</b></a> + +</td></tr></table> +<table border="0" class="index-cp"> +<tr><td></td><th align="left">Index Entry</th><th align="left"> Section</th></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="INDEX0_0">V</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#SEC12">validity</a></td><td valign="top"><a href="libunistring_4.html#SEC12">4.1 Elementary string checks</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#IDX2">value, of libunistring</a></td><td valign="top"><a href="libunistring_1.html#SEC1">1. Introduction</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC26">value, of Unicode character</a></td><td valign="top"><a href="libunistring_8.html#SEC26">8.4 Decimal digit value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC27">value, of Unicode character</a></td><td valign="top"><a href="libunistring_8.html#SEC27">8.5 Digit value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_8.html#SEC28">value, of Unicode character</a></td><td valign="top"><a href="libunistring_8.html#SEC28">8.6 Numeric value</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_4.html#SEC12">verification</a></td><td valign="top"><a href="libunistring_4.html#SEC12">4.1 Elementary string checks</a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +<tr><th><a name="INDEX0_1">W</a></th><td></td><td></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_1.html#SEC7">wchar_t, type</a></td><td valign="top"><a href="libunistring_1.html#SEC7">1.6 The <code>wchar_t</code> mess</a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_9.html#SEC37">width</a></td><td valign="top"><a href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_10.html#SEC38">word breaks</a></td><td valign="top"><a href="libunistring_10.html#SEC38">10. Word breaks in strings <code><uniwbrk.h></code></a></td></tr> +<tr><td></td><td valign="top"><a href="libunistring_11.html#SEC41">wrapping</a></td><td valign="top"><a href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></td></tr> +<tr><td colspan="3"> <hr></td></tr> +</table> +<table><tr><th valign="top">Jump to: </th><td><a href="libunistring_18.html#SEC71_0" class="summary-letter"><b>A</b></a> + +<a href="libunistring_18.html#SEC71_1" class="summary-letter"><b>B</b></a> + +<a href="libunistring_18.html#SEC71_2" class="summary-letter"><b>C</b></a> + +<a href="libunistring_18.html#SEC71_3" class="summary-letter"><b>D</b></a> + +<a href="libunistring_18.html#SEC71_4" class="summary-letter"><b>E</b></a> + +<a href="libunistring_18.html#SEC71_5" class="summary-letter"><b>F</b></a> + +<a href="libunistring_18.html#SEC71_6" class="summary-letter"><b>G</b></a> + +<a href="libunistring_18.html#SEC71_7" class="summary-letter"><b>H</b></a> + +<a href="libunistring_18.html#SEC71_8" class="summary-letter"><b>I</b></a> + +<a href="libunistring_18.html#SEC71_9" class="summary-letter"><b>J</b></a> + +<a href="libunistring_18.html#SEC71_10" class="summary-letter"><b>L</b></a> + +<a href="libunistring_18.html#SEC71_11" class="summary-letter"><b>M</b></a> + +<a href="libunistring_18.html#SEC71_12" class="summary-letter"><b>N</b></a> + +<a href="libunistring_18.html#SEC71_13" class="summary-letter"><b>O</b></a> + +<a href="libunistring_18.html#SEC71_14" class="summary-letter"><b>P</b></a> + +<a href="libunistring_18.html#SEC71_15" class="summary-letter"><b>R</b></a> + +<a href="libunistring_18.html#SEC71_16" class="summary-letter"><b>S</b></a> + +<a href="libunistring_18.html#SEC71_17" class="summary-letter"><b>T</b></a> + +<a href="libunistring_18.html#SEC71_18" class="summary-letter"><b>U</b></a> + +<a href="#INDEX0_0" class="summary-letter"><b>V</b></a> + +<a href="#INDEX0_1" class="summary-letter"><b>W</b></a> + +</td></tr></table> + +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[ >> ]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_2.html b/doc/libunistring_2.html new file mode 100644 index 00000000..6d63b632 --- /dev/null +++ b/doc/libunistring_2.html @@ -0,0 +1,141 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 2. Conventions</title> + +<meta name="description" content="GNU libunistring: 2. Conventions"> +<meta name="keywords" content="GNU libunistring: 2. Conventions"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_1.html#SEC1" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_3.html#SEC10" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="Conventions"></a> +<a name="SEC9"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC9">2. Conventions</a> </h1> + +<p>This chapter explains conventions valid throughout the libunistring library. +</p> +<a name="IDX14"></a> +<p>Variables of type <code>char *</code> denote C strings in locale encoding. +See <a href="libunistring_1.html#SEC4">Locale encodings</a>. +</p> +<p>Variables of type <code>uint8_t *</code> denote UTF-8 strings. Their units +are bytes. +</p> +<p>Variables of type <code>uint16_t *</code> denote UTF-16 strings, without byte +order mark. Their units are 2-byte words. +</p> +<p>Variables of type <code>uint32_t *</code> denote UTF-32 strings, without byte +order mark. Their units are 4-byte words. +</p> +<p>Argument pairs <code>(<var>s</var>, <var>n</var>)</code> denote a string +<code><var>s</var>[0..<var>n</var>-1]</code> with exactly <var>n</var> units. +</p> +<p>All functions with prefix ‘<samp>ulc_</samp>’ operate on C strings in locale +encoding. +</p> +<p>All functions with prefix ‘<samp>u8_</samp>’ operate on UTF-8 strings. +</p> +<p>All functions with prefix ‘<samp>u16_</samp>’ operate on UTF-16 strings. +</p> +<p>All functions with prefix ‘<samp>u32_</samp>’ operate on UTF-32 strings. +</p> +<p>For every function with prefix ‘<samp>u8_</samp>’, operating on UTF-8 strings, +there is also a corresponding function with prefix ‘<samp>u16_</samp>’, +operating on UTF-16 strings, and a corresponding function with prefix +‘<samp>u32_</samp>’, operating on UTF-32 strings. Their description is +analogous; in this documentation we describe only the function that +operates on UTF-8 strings, for brevity. +</p> +<p>A declaration with a variable <var>n</var> denotes the three concrete +declarations with <var>n</var> = 8, <var>n</var> = 16, <var>n</var> = 32. +</p> +<p>All parameters starting with ‘<samp>str</samp>’ and the parameters of +functions starting with <code>u8_str</code>/<code>u16_str</code>/<code>u32_str</code> +denote a NUL terminated string. +</p> +<a name="IDX15"></a> +<p>Error values are always returned through the <code>errno</code> variable, +usually with a return value that indicates the presence of an error +(NULL for functions that return an pointer, or -1 for functions that +return an <code>int</code>). +</p> +<p>Functions returning a string result take a +<code>(<var>resultbuf</var>, <var>lengthp</var>)</code> +argument pair. If <var>resultbuf</var> is not NULL and the result fits +into <code>*<var>lengthp</var></code> units, it is put in <var>resultbuf</var>, and +<var>resultbuf</var> is returned. Otherwise, a freshly allocated string +is returned. In both cases, <code>*<var>lengthp</var></code> is set to the +length (number of units) of the returned string. In case of error, +NULL is returned and <code>errno</code> is set. +</p> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_1.html#SEC1" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_3.html#SEC10" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_3.html b/doc/libunistring_3.html new file mode 100644 index 00000000..cc446970 --- /dev/null +++ b/doc/libunistring_3.html @@ -0,0 +1,107 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 3. Elementary types <unitypes.h></title> + +<meta name="description" content="GNU libunistring: 3. Elementary types <unitypes.h>"> +<meta name="keywords" content="GNU libunistring: 3. Elementary types <unitypes.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_2.html#SEC9" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_4.html#SEC11" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="unitypes_002eh"></a> +<a name="SEC10"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC10">3. Elementary types <code><unitypes.h></code></a> </h1> + +<p>The include file <code><unitypes.h></code> provides the following basic types. +</p> +<dl> +<dt><u>Type:</u> <b>uint8_t</b> +<a name="IDX16"></a> +</dt> +<dt><u>Type:</u> <b>uint16_t</b> +<a name="IDX17"></a> +</dt> +<dt><u>Type:</u> <b>uint32_t</b> +<a name="IDX18"></a> +</dt> +<dd><p>These are the storage units of UTF-8/16/32 strings, respectively. The definitions are +taken from <code><stdint.h></code>, on platforms where this include file is present. +</p></dd></dl> + +<dl> +<dt><u>Type:</u> <b>ucs4_t</b> +<a name="IDX19"></a> +</dt> +<dd><p>This type represents a single Unicode character, outside of an UTF-32 string. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_2.html#SEC9" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_4.html#SEC11" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_4.html b/doc/libunistring_4.html new file mode 100644 index 00000000..60992cd0 --- /dev/null +++ b/doc/libunistring_4.html @@ -0,0 +1,864 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 4. Elementary Unicode string functions <unistr.h></title> + +<meta name="description" content="GNU libunistring: 4. Elementary Unicode string functions <unistr.h>"> +<meta name="keywords" content="GNU libunistring: 4. Elementary Unicode string functions <unistr.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_3.html#SEC10" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_5.html#SEC17" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="unistr_002eh"></a> +<a name="SEC11"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC11">4. Elementary Unicode string functions <code><unistr.h></code></a> </h1> + +<p>This include file declares elementary functions for Unicode strings. It is +essentially the equivalent of what <code><string.h></code> is for C strings. +</p> + +<hr size="6"> +<a name="Elementary-string-checks"></a> +<a name="SEC12"></a> +<h2 class="section"> <a href="libunistring.html#TOC12">4.1 Elementary string checks</a> </h2> + +<p>The following function is available to verify the integrity of a Unicode string. +</p> +<dl> +<dt><u>Function:</u> const uint8_t * <b>u8_check</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX20"></a> +</dt> +<dt><u>Function:</u> const uint16_t * <b>u16_check</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX21"></a> +</dt> +<dt><u>Function:</u> const uint32_t * <b>u32_check</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX22"></a> +</dt> +<dd><p>This function checks whether a Unicode string is well-formed. +It returns NULL if valid, or a pointer to the first invalid unit otherwise. +</p></dd></dl> + +<hr size="6"> +<a name="Elementary-string-conversions"></a> +<a name="SEC13"></a> +<h2 class="section"> <a href="libunistring.html#TOC13">4.2 Elementary string conversions</a> </h2> + +<p>The following functions perform conversions between the different forms of Unicode strings. +</p> +<dl> +<dt><u>Function:</u> uint16_t * <b>u8_to_u16</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX23"></a> +</dt> +<dd><p>Converts an UTF-8 string to an UTF-16 string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint32_t * <b>u8_to_u32</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX24"></a> +</dt> +<dd><p>Converts an UTF-8 string to an UTF-32 string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u16_to_u8</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX25"></a> +</dt> +<dd><p>Converts an UTF-16 string to an UTF-8 string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint32_t * <b>u16_to_u32</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX26"></a> +</dt> +<dd><p>Converts an UTF-16 string to an UTF-32 string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u32_to_u8</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX27"></a> +</dt> +<dd><p>Converts an UTF-32 string to an UTF-8 string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint16_t * <b>u32_to_u16</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX28"></a> +</dt> +<dd><p>Converts an UTF-32 string to an UTF-16 string. +</p></dd></dl> + +<hr size="6"> +<a name="Elementary-string-functions"></a> +<a name="SEC14"></a> +<h2 class="section"> <a href="libunistring.html#TOC14">4.3 Elementary string functions</a> </h2> + +<p>The following functions inspect and return details about the first character +in a Unicode string. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_mblen</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX29"></a> +</dt> +<dt><u>Function:</u> int <b>u16_mblen</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX30"></a> +</dt> +<dt><u>Function:</u> int <b>u32_mblen</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX31"></a> +</dt> +<dd><p>Returns the length (number of units) of the first character in <var>s</var>, which +is no longer than <var>n</var>. Returns 0 if it is the NUL character. Returns -1 +upon failure. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/mblen.html"><code>mblen</code></a>, except that it operates on a +Unicode string and that <var>s</var> must not be NULL. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_mbtouc_unsafe</b><i> (ucs4_t *<var>puc</var>, const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX32"></a> +</dt> +<dt><u>Function:</u> int <b>u16_mbtouc_unsafe</b><i> (ucs4_t *<var>puc</var>, const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX33"></a> +</dt> +<dt><u>Function:</u> int <b>u32_mbtouc_unsafe</b><i> (ucs4_t *<var>puc</var>, const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX34"></a> +</dt> +<dd><p>Returns the length (number of units) of the first character in <var>s</var>, +putting its <code>ucs4_t</code> representation in <code>*<var>puc</var></code>. Upon failure, +<code>*<var>puc</var></code> is set to <code>0xfffd</code>, and an appropriate number of units +is returned. +</p> +<p>The number of available units, <var>n</var>, must be > 0. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/mbtowc.html"><code>mbtowc</code></a>, except that it operates on a +Unicode string, <var>puc</var> and <var>s</var> must not be NULL, <var>n</var> must be > 0, +and the NUL character is not treated specially. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_mbtouc</b><i> (ucs4_t *<var>puc</var>, const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX35"></a> +</dt> +<dt><u>Function:</u> int <b>u16_mbtouc</b><i> (ucs4_t *<var>puc</var>, const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX36"></a> +</dt> +<dt><u>Function:</u> int <b>u32_mbtouc</b><i> (ucs4_t *<var>puc</var>, const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX37"></a> +</dt> +<dd><p>This function is like <code>u8_mbtouc_unsafe</code>, except that it will detect an +invalid UTF-8 character, even if the library is compiled without +‘<samp>--enable-safety</samp>’. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_mbtoucr</b><i> (ucs4_t *<var>puc</var>, const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX38"></a> +</dt> +<dt><u>Function:</u> int <b>u16_mbtoucr</b><i> (ucs4_t *<var>puc</var>, const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX39"></a> +</dt> +<dt><u>Function:</u> int <b>u32_mbtoucr</b><i> (ucs4_t *<var>puc</var>, const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX40"></a> +</dt> +<dd><p>Returns the length (number of units) of the first character in <var>s</var>, +putting its <code>ucs4_t</code> representation in <code>*<var>puc</var></code>. Upon failure, +<code>*<var>puc</var></code> is set to <code>0xfffd</code>, and -1 is returned for an invalid +sequence of units, -2 is returned for an incomplete sequence of units. +</p> +<p>The number of available units, <var>n</var>, must be > 0. +</p> +<p>This function is similar to <code>u8_mbtouc</code>, except that the return value +gives more details about the failure, similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html"><code>mbrtowc</code></a>. +</p></dd></dl> + +<p>The following function stores a Unicode character as a Unicode string in +memory. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_uctomb</b><i> (uint8_t *<var>s</var>, ucs4_t <var>uc</var>, int <var>n</var>)</i> +<a name="IDX41"></a> +</dt> +<dt><u>Function:</u> int <b>u16_uctomb</b><i> (uint16_t *<var>s</var>, ucs4_t <var>uc</var>, int <var>n</var>)</i> +<a name="IDX42"></a> +</dt> +<dt><u>Function:</u> int <b>u32_uctomb</b><i> (uint32_t *<var>s</var>, ucs4_t <var>uc</var>, int <var>n</var>)</i> +<a name="IDX43"></a> +</dt> +<dd><p>Puts the multibyte character represented by <var>uc</var> in <var>s</var>, returning its +length. Returns -1 upon failure, -2 if the number of available units, <var>n</var>, +is too small. The latter case cannot occur if <var>n</var> >= 6/2/1, respectively. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wctomb.html"><code>wctomb</code></a>, except that it operates on a +Unicode strings, <var>s</var> must not be NULL, and the argument <var>n</var> must be +specified. +</p></dd></dl> + +<a name="IDX44"></a> +<p>The following functions copy Unicode strings in memory. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_cpy</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX45"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_cpy</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX46"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_cpy</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX47"></a> +</dt> +<dd><p>Copies <var>n</var> units from <var>src</var> to <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/memcpy.html"><code>memcpy</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_move</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX48"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_move</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX49"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_move</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX50"></a> +</dt> +<dd><p>Copies <var>n</var> units from <var>src</var> to <var>dest</var>, guaranteeing correct +behavior for overlapping memory areas. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/memmove.html"><code>memmove</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<p>The following function fills a Unicode string. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_set</b><i> (uint8_t *<var>s</var>, ucs4_t <var>uc</var>, size_t <var>n</var>)</i> +<a name="IDX51"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_set</b><i> (uint16_t *<var>s</var>, ucs4_t <var>uc</var>, size_t <var>n</var>)</i> +<a name="IDX52"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_set</b><i> (uint32_t *<var>s</var>, ucs4_t <var>uc</var>, size_t <var>n</var>)</i> +<a name="IDX53"></a> +</dt> +<dd><p>Sets the first <var>n</var> characters of <var>s</var> to <var>uc</var>. <var>uc</var> should be +a character that occupies only 1 unit. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/memset.html"><code>memset</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<a name="IDX54"></a> +<p>The following function compares two Unicode strings of the same length. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_cmp</b><i> (const uint8_t *<var>s1</var>, const uint8_t *<var>s2</var>, size_t <var>n</var>)</i> +<a name="IDX55"></a> +</dt> +<dt><u>Function:</u> int <b>u16_cmp</b><i> (const uint16_t *<var>s1</var>, const uint16_t *<var>s2</var>, size_t <var>n</var>)</i> +<a name="IDX56"></a> +</dt> +<dt><u>Function:</u> int <b>u32_cmp</b><i> (const uint32_t *<var>s1</var>, const uint32_t *<var>s2</var>, size_t <var>n</var>)</i> +<a name="IDX57"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, each of length <var>n</var>, lexicographically. +Returns a negative value if <var>s1</var> compares smaller than <var>s2</var>, +a positive value if <var>s1</var> compares larger than <var>s2</var>, or 0 if +they compare equal. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/memcmp.html"><code>memcmp</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<p>The following function compares two Unicode strings of possibly different +lengths. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_cmp2</b><i> (const uint8_t *<var>s1</var>, size_t <var>n1</var>, const uint8_t *<var>s2</var>, size_t <var>n2</var>)</i> +<a name="IDX58"></a> +</dt> +<dt><u>Function:</u> int <b>u16_cmp2</b><i> (const uint16_t *<var>s1</var>, size_t <var>n1</var>, const uint16_t *<var>s2</var>, size_t <var>n2</var>)</i> +<a name="IDX59"></a> +</dt> +<dt><u>Function:</u> int <b>u32_cmp2</b><i> (const uint32_t *<var>s1</var>, size_t <var>n1</var>, const uint32_t *<var>s2</var>, size_t <var>n2</var>)</i> +<a name="IDX60"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, lexicographically. +Returns a negative value if <var>s1</var> compares smaller than <var>s2</var>, +a positive value if <var>s1</var> compares larger than <var>s2</var>, or 0 if +they compare equal. +</p> +<p>This function is similar to the gnulib function <code>memcmp2</code>, except that it +operates on Unicode strings. +</p></dd></dl> + +<a name="IDX61"></a> +<p>The following function searches for a given Unicode character. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_chr</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX62"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_chr</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX63"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_chr</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX64"></a> +</dt> +<dd><p>Searches the string at <var>s</var> for <var>uc</var>. Returns a pointer to the first +occurrence of <var>uc</var> in <var>s</var>, or NULL if <var>uc</var> does not occur in +<var>s</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/memchr.html"><code>memchr</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<a name="IDX65"></a> +<p>The following function counts the number of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> size_t <b>u8_mbsnlen</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX66"></a> +</dt> +<dt><u>Function:</u> size_t <b>u16_mbsnlen</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX67"></a> +</dt> +<dt><u>Function:</u> size_t <b>u32_mbsnlen</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX68"></a> +</dt> +<dd><p>Counts and returns the number of Unicode characters in the <var>n</var> units +from <var>s</var>. +</p> +<p>This function is similar to the gnulib function <code>mbsnlen</code>, except that +it operates on Unicode strings. +</p></dd></dl> + +<hr size="6"> +<a name="Elementary-string-functions-with-memory-allocation"></a> +<a name="SEC15"></a> +<h2 class="section"> <a href="libunistring.html#TOC15">4.4 Elementary string functions with memory allocation</a> </h2> + +<p>The following function copies a Unicode string. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_cpy_alloc</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX69"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_cpy_alloc</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX70"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_cpy_alloc</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>)</i> +<a name="IDX71"></a> +</dt> +<dd><p>Makes a freshly allocated copy of <var>s</var>, of length <var>n</var>. +</p></dd></dl> + +<hr size="6"> +<a name="Elementary-string-functions-on-NUL-terminated-strings"></a> +<a name="SEC16"></a> +<h2 class="section"> <a href="libunistring.html#TOC16">4.5 Elementary string functions on NUL terminated strings</a> </h2> + +<p>The following functions inspect and return details about the first character +in a Unicode string. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_strmblen</b><i> (const uint8_t *<var>s</var>)</i> +<a name="IDX72"></a> +</dt> +<dt><u>Function:</u> int <b>u16_strmblen</b><i> (const uint16_t *<var>s</var>)</i> +<a name="IDX73"></a> +</dt> +<dt><u>Function:</u> int <b>u32_strmblen</b><i> (const uint32_t *<var>s</var>)</i> +<a name="IDX74"></a> +</dt> +<dd><p>Returns the length (number of units) of the first character in <var>s</var>. +Returns 0 if it is the NUL character. Returns -1 upon failure. +</p></dd></dl> + +<a name="IDX75"></a> +<dl> +<dt><u>Function:</u> int <b>u8_strmbtouc</b><i> (ucs4_t *<var>puc</var>, const uint8_t *<var>s</var>)</i> +<a name="IDX76"></a> +</dt> +<dt><u>Function:</u> int <b>u16_strmbtouc</b><i> (ucs4_t *<var>puc</var>, const uint16_t *<var>s</var>)</i> +<a name="IDX77"></a> +</dt> +<dt><u>Function:</u> int <b>u32_strmbtouc</b><i> (ucs4_t *<var>puc</var>, const uint32_t *<var>s</var>)</i> +<a name="IDX78"></a> +</dt> +<dd><p>Returns the length (number of units) of the first character in <var>s</var>, +putting its <code>ucs4_t</code> representation in <code>*<var>puc</var></code>. Returns 0 +if it is the NUL character. Returns -1 upon failure. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> const uint8_t * <b>u8_next</b><i> (ucs4_t *<var>puc</var>, const uint8_t *<var>s</var>)</i> +<a name="IDX79"></a> +</dt> +<dt><u>Function:</u> const uint16_t * <b>u16_next</b><i> (ucs4_t *<var>puc</var>, const uint16_t *<var>s</var>)</i> +<a name="IDX80"></a> +</dt> +<dt><u>Function:</u> const uint32_t * <b>u32_next</b><i> (ucs4_t *<var>puc</var>, const uint32_t *<var>s</var>)</i> +<a name="IDX81"></a> +</dt> +<dd><p>Forward iteration step. Advances the pointer past the next character, +or returns NULL if the end of the string has been reached. Puts the +character's <code>ucs4_t</code> representation in <code>*<var>puc</var></code>. +</p></dd></dl> + +<p>The following function inspects and returns details about the previous +character in a Unicode string. +</p> +<dl> +<dt><u>Function:</u> const uint8_t * <b>u8_prev</b><i> (ucs4_t *<var>puc</var>, const uint8_t *<var>s</var>, const uint8_t *<var>start</var>)</i> +<a name="IDX82"></a> +</dt> +<dt><u>Function:</u> const uint16_t * <b>u16_prev</b><i> (ucs4_t *<var>puc</var>, const uint16_t *<var>s</var>, const uint16_t *<var>start</var>)</i> +<a name="IDX83"></a> +</dt> +<dt><u>Function:</u> const uint32_t * <b>u32_prev</b><i> (ucs4_t *<var>puc</var>, const uint32_t *<var>s</var>, const uint32_t *<var>start</var>)</i> +<a name="IDX84"></a> +</dt> +<dd><p>Backward iteration step. Advances the pointer to point to the previous +character, or returns NULL if the beginning of the string had been reached. +Puts the character's <code>ucs4_t</code> representation in <code>*<var>puc</var></code>. +</p></dd></dl> + +<p>The following functions determine the length of a Unicode string. +</p> +<dl> +<dt><u>Function:</u> size_t <b>u8_strlen</b><i> (const uint8_t *<var>s</var>)</i> +<a name="IDX85"></a> +</dt> +<dt><u>Function:</u> size_t <b>u16_strlen</b><i> (const uint16_t *<var>s</var>)</i> +<a name="IDX86"></a> +</dt> +<dt><u>Function:</u> size_t <b>u32_strlen</b><i> (const uint32_t *<var>s</var>)</i> +<a name="IDX87"></a> +</dt> +<dd><p>Returns the number of units in <var>s</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strlen.html"><code>strlen</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcslen.html"><code>wcslen</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> size_t <b>u8_strnlen</b><i> (const uint8_t *<var>s</var>, size_t <var>maxlen</var>)</i> +<a name="IDX88"></a> +</dt> +<dt><u>Function:</u> size_t <b>u16_strnlen</b><i> (const uint16_t *<var>s</var>, size_t <var>maxlen</var>)</i> +<a name="IDX89"></a> +</dt> +<dt><u>Function:</u> size_t <b>u32_strnlen</b><i> (const uint32_t *<var>s</var>, size_t <var>maxlen</var>)</i> +<a name="IDX90"></a> +</dt> +<dd><p>Returns the number of units in <var>s</var>, but at most <var>maxlen</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strnlen.html"><code>strnlen</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsnlen.html"><code>wcsnlen</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<a name="IDX91"></a> +<p>The following functions copy portions of Unicode strings in memory. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strcpy</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>)</i> +<a name="IDX92"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strcpy</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>)</i> +<a name="IDX93"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strcpy</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>)</i> +<a name="IDX94"></a> +</dt> +<dd><p>Copies <var>src</var> to <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcpy.html"><code>strcpy</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcscpy.html"><code>wcscpy</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_stpcpy</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>)</i> +<a name="IDX95"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_stpcpy</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>)</i> +<a name="IDX96"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_stpcpy</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>)</i> +<a name="IDX97"></a> +</dt> +<dd><p>Copies <var>src</var> to <var>dest</var>, returning the address of the terminating NUL +in <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html"><code>stpcpy</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strncpy</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX98"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strncpy</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX99"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strncpy</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX100"></a> +</dt> +<dd><p>Copies no more than <var>n</var> units of <var>src</var> to <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strncpy.html"><code>strncpy</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsncpy.html"><code>wcsncpy</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_stpncpy</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX101"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_stpncpy</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX102"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_stpncpy</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX103"></a> +</dt> +<dd><p>Copies no more than <var>n</var> units of <var>src</var> to <var>dest</var>, returning the +address of the last unit written into <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/stpncpy.html"><code>stpncpy</code></a>, except that it operates on +Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strcat</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>)</i> +<a name="IDX104"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strcat</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>)</i> +<a name="IDX105"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strcat</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>)</i> +<a name="IDX106"></a> +</dt> +<dd><p>Appends <var>src</var> onto <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcat.html"><code>strcat</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcscat.html"><code>wcscat</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strncat</b><i> (uint8_t *<var>dest</var>, const uint8_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX107"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strncat</b><i> (uint16_t *<var>dest</var>, const uint16_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX108"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strncat</b><i> (uint32_t *<var>dest</var>, const uint32_t *<var>src</var>, size_t <var>n</var>)</i> +<a name="IDX109"></a> +</dt> +<dd><p>Appends no more than <var>n</var> units of <var>src</var> onto <var>dest</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strncat.html"><code>strncat</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsncat.html"><code>wcsncat</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<a name="IDX110"></a> +<p>The following functions compare two Unicode strings. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_strcmp</b><i> (const uint8_t *<var>s1</var>, const uint8_t *<var>s2</var>)</i> +<a name="IDX111"></a> +</dt> +<dt><u>Function:</u> int <b>u16_strcmp</b><i> (const uint16_t *<var>s1</var>, const uint16_t *<var>s2</var>)</i> +<a name="IDX112"></a> +</dt> +<dt><u>Function:</u> int <b>u32_strcmp</b><i> (const uint32_t *<var>s1</var>, const uint32_t *<var>s2</var>)</i> +<a name="IDX113"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var>, lexicographically. +Returns a negative value if <var>s1</var> compares smaller than <var>s2</var>, +a positive value if <var>s1</var> compares larger than <var>s2</var>, or 0 if +they compare equal. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcmp.html"><code>strcmp</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcscmp.html"><code>wcscmp</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<a name="IDX114"></a> +<dl> +<dt><u>Function:</u> int <b>u8_strcoll</b><i> (const uint8_t *<var>s1</var>, const uint8_t *<var>s2</var>)</i> +<a name="IDX115"></a> +</dt> +<dt><u>Function:</u> int <b>u16_strcoll</b><i> (const uint16_t *<var>s1</var>, const uint16_t *<var>s2</var>)</i> +<a name="IDX116"></a> +</dt> +<dt><u>Function:</u> int <b>u32_strcoll</b><i> (const uint32_t *<var>s1</var>, const uint32_t *<var>s2</var>)</i> +<a name="IDX117"></a> +</dt> +<dd><p>Compares <var>s1</var> and <var>s2</var> using the collation rules of the current +locale. +Returns -1 if <var>s1</var> < <var>s2</var>, 0 if <var>s1</var> = <var>s2</var>, 1 if +<var>s1</var> > <var>s2</var>. Upon failure, sets <code>errno</code> and returns any value. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcoll.html"><code>strcoll</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcscoll.html"><code>wcscoll</code></a>, except +that it operates on Unicode strings. +</p> +<p>Note that this function may consider different canonical normalizations +of the same string as having a large distance. It is therefore better to +use the function <code>u8_normcoll</code> instead of this one; see <a href="libunistring_12.html#SEC42">Normalization forms (composition and decomposition) <code><uninorm.h></code></a>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_strncmp</b><i> (const uint8_t *<var>s1</var>, const uint8_t *<var>s2</var>, size_t <var>n</var>)</i> +<a name="IDX118"></a> +</dt> +<dt><u>Function:</u> int <b>u16_strncmp</b><i> (const uint16_t *<var>s1</var>, const uint16_t *<var>s2</var>, size_t <var>n</var>)</i> +<a name="IDX119"></a> +</dt> +<dt><u>Function:</u> int <b>u32_strncmp</b><i> (const uint32_t *<var>s1</var>, const uint32_t *<var>s2</var>, size_t <var>n</var>)</i> +<a name="IDX120"></a> +</dt> +<dd><p>Compares no more than <var>n</var> units of <var>s1</var> and <var>s2</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strncmp.html"><code>strncmp</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsncmp.html"><code>wcsncmp</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<a name="IDX121"></a> +<p>The following function allocates a duplicate of a Unicode string. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strdup</b><i> (const uint8_t *<var>s</var>)</i> +<a name="IDX122"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strdup</b><i> (const uint16_t *<var>s</var>)</i> +<a name="IDX123"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strdup</b><i> (const uint32_t *<var>s</var>)</i> +<a name="IDX124"></a> +</dt> +<dd><p>Duplicates <var>s</var>, returning an identical malloc'd string. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strdup.html"><code>strdup</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsdup.html"><code>wcsdup</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<a name="IDX125"></a> +<p>The following functions search for a given Unicode character. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strchr</b><i> (const uint8_t *<var>str</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX126"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strchr</b><i> (const uint16_t *<var>str</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX127"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strchr</b><i> (const uint32_t *<var>str</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX128"></a> +</dt> +<dd><p>Finds the first occurrence of <var>uc</var> in <var>str</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strchr.html"><code>strchr</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcschr.html"><code>wcschr</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strrchr</b><i> (const uint8_t *<var>str</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX129"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strrchr</b><i> (const uint16_t *<var>str</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX130"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strrchr</b><i> (const uint32_t *<var>str</var>, ucs4_t <var>uc</var>)</i> +<a name="IDX131"></a> +</dt> +<dd><p>Finds the last occurrence of <var>uc</var> in <var>str</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strrchr.html"><code>strrchr</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsrchr.html"><code>wcsrchr</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<p>The following functions search for the first occurrence of some Unicode +character in or outside a given set of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> size_t <b>u8_strcspn</b><i> (const uint8_t *<var>str</var>, const uint8_t *<var>reject</var>)</i> +<a name="IDX132"></a> +</dt> +<dt><u>Function:</u> size_t <b>u16_strcspn</b><i> (const uint16_t *<var>str</var>, const uint16_t *<var>reject</var>)</i> +<a name="IDX133"></a> +</dt> +<dt><u>Function:</u> size_t <b>u32_strcspn</b><i> (const uint32_t *<var>str</var>, const uint32_t *<var>reject</var>)</i> +<a name="IDX134"></a> +</dt> +<dd><p>Returns the length of the initial segment of <var>str</var> which consists entirely +of Unicode characters not in <var>reject</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strcspn.html"><code>strcspn</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcscspn.html"><code>wcscspn</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> size_t <b>u8_strspn</b><i> (const uint8_t *<var>str</var>, const uint8_t *<var>accept</var>)</i> +<a name="IDX135"></a> +</dt> +<dt><u>Function:</u> size_t <b>u16_strspn</b><i> (const uint16_t *<var>str</var>, const uint16_t *<var>accept</var>)</i> +<a name="IDX136"></a> +</dt> +<dt><u>Function:</u> size_t <b>u32_strspn</b><i> (const uint32_t *<var>str</var>, const uint32_t *<var>accept</var>)</i> +<a name="IDX137"></a> +</dt> +<dd><p>Returns the length of the initial segment of <var>str</var> which consists entirely +of Unicode characters in <var>accept</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strspn.html"><code>strspn</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsspn.html"><code>wcsspn</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strpbrk</b><i> (const uint8_t *<var>str</var>, const uint8_t *<var>accept</var>)</i> +<a name="IDX138"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strpbrk</b><i> (const uint16_t *<var>str</var>, const uint16_t *<var>accept</var>)</i> +<a name="IDX139"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strpbrk</b><i> (const uint32_t *<var>str</var>, const uint32_t *<var>accept</var>)</i> +<a name="IDX140"></a> +</dt> +<dd><p>Finds the first occurrence in <var>str</var> of any character in <var>accept</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strpbrk.html"><code>strpbrk</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcspbrk.html"><code>wcspbrk</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<a name="IDX141"></a> +<p>The following functions search whether a given Unicode string is a substring +of another Unicode string. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strstr</b><i> (const uint8_t *<var>haystack</var>, const uint8_t *<var>needle</var>)</i> +<a name="IDX142"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strstr</b><i> (const uint16_t *<var>haystack</var>, const uint16_t *<var>needle</var>)</i> +<a name="IDX143"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strstr</b><i> (const uint32_t *<var>haystack</var>, const uint32_t *<var>needle</var>)</i> +<a name="IDX144"></a> +</dt> +<dd><p>Finds the first occurrence of <var>needle</var> in <var>haystack</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strstr.html"><code>strstr</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcsstr.html"><code>wcsstr</code></a>, except +that it operates on Unicode strings. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>u8_startswith</b><i> (const uint8_t *<var>str</var>, const uint8_t *<var>prefix</var>)</i> +<a name="IDX145"></a> +</dt> +<dt><u>Function:</u> bool <b>u16_startswith</b><i> (const uint16_t *<var>str</var>, const uint16_t *<var>prefix</var>)</i> +<a name="IDX146"></a> +</dt> +<dt><u>Function:</u> bool <b>u32_startswith</b><i> (const uint32_t *<var>str</var>, const uint32_t *<var>prefix</var>)</i> +<a name="IDX147"></a> +</dt> +<dd><p>Tests whether <var>str</var> starts with <var>prefix</var>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>u8_endswith</b><i> (const uint8_t *<var>str</var>, const uint8_t *<var>suffix</var>)</i> +<a name="IDX148"></a> +</dt> +<dt><u>Function:</u> bool <b>u16_endswith</b><i> (const uint16_t *<var>str</var>, const uint16_t *<var>suffix</var>)</i> +<a name="IDX149"></a> +</dt> +<dt><u>Function:</u> bool <b>u32_endswith</b><i> (const uint32_t *<var>str</var>, const uint32_t *<var>suffix</var>)</i> +<a name="IDX150"></a> +</dt> +<dd><p>Tests whether <var>str</var> ends with <var>suffix</var>. +</p></dd></dl> + +<p>The following function does one step in tokenizing a Unicode string. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strtok</b><i> (uint8_t *<var>str</var>, const uint8_t *<var>delim</var>, uint8_t **<var>ptr</var>)</i> +<a name="IDX151"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strtok</b><i> (uint16_t *<var>str</var>, const uint16_t *<var>delim</var>, uint16_t **<var>ptr</var>)</i> +<a name="IDX152"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strtok</b><i> (uint32_t *<var>str</var>, const uint32_t *<var>delim</var>, uint32_t **<var>ptr</var>)</i> +<a name="IDX153"></a> +</dt> +<dd><p>Divides <var>str</var> into tokens separated by characters in <var>delim</var>. +</p> +<p>This function is similar to <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/strtok_r.html"><code>strtok_r</code></a> and <a href="http://www.opengroup.org/onlinepubs/9699919799/functions/wcstok.html"><code>wcstok</code></a>, except +that it operates on Unicode strings. Its interface is actually more similar to +<code>wcstok</code> than to <code>strtok</code>. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC11" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_5.html#SEC17" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_5.html b/doc/libunistring_5.html new file mode 100644 index 00000000..92e115f9 --- /dev/null +++ b/doc/libunistring_5.html @@ -0,0 +1,296 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 5. Conversions between Unicode and encodings <uniconv.h></title> + +<meta name="description" content="GNU libunistring: 5. Conversions between Unicode and encodings <uniconv.h>"> +<meta name="keywords" content="GNU libunistring: 5. Conversions between Unicode and encodings <uniconv.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_4.html#SEC11" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_6.html#SEC18" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="uniconv_002eh"></a> +<a name="SEC17"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a> </h1> + +<p>This include file declares functions for converting between Unicode strings +and <code>char *</code> strings in locale encoding or in other specified encodings. +</p> +<a name="IDX154"></a> +<p>The following function returns the locale encoding. +</p> +<dl> +<dt><u>Function:</u> const char * <b>locale_charset</b><i> ()</i> +<a name="IDX155"></a> +</dt> +<dd><p>Determines the current locale's character encoding, and canonicalizes it +into one of the canonical names listed in ‘<tt>config.charset</tt>’. +If the canonical name cannot be determined, the result is a non-canonical +name. +</p> +<p>The result must not be freed; it is statically allocated. +</p> +<p>The result of this function can be used as an argument to the <code>iconv_open</code> +function in GNU libc, in GNU libiconv, or in the gnulib provided wrapper +around the native <code>iconv_open</code> function. It may not work as an argument +to the native <code>iconv_open</code> function directly. +</p></dd></dl> + +<p>The handling of unconvertible characters during the conversions can be +parametrized through the following enumeration type: +</p> +<dl> +<dt><u>Type:</u> <b>enum iconv_ilseq_handler</b> +<a name="IDX156"></a> +</dt> +<dd><p>This type specifies how unconvertible characters in the input are handled. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> enum iconv_ilseq_handler <b>iconveh_error</b> +<a name="IDX157"></a> +</dt> +<dd><p>This handler causes the function to return with <code>errno</code> set to +<code>EILSEQ</code>. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> enum iconv_ilseq_handler <b>iconveh_question_mark</b> +<a name="IDX158"></a> +</dt> +<dd><p>This handler produces one question mark ‘<samp>?</samp>’ per unconvertible character. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> enum iconv_ilseq_handler <b>iconveh_escape_sequence</b> +<a name="IDX159"></a> +</dt> +<dd><p>This handler produces an escape sequence <code>\u<var>xxxx</var></code> or +<code>\U<var>xxxxxxxx</var></code> for each unconvertible character. +</p></dd></dl> + +<a name="IDX160"></a> +<p>The following functions convert between strings in a specified encoding and +Unicode strings. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_conv_from_encoding</b><i> (const char *<var>fromcode</var>, enum iconv_ilseq_handler <var>handler</var>, const char *<var>src</var>, size_t <var>srclen</var>, size_t *<var>offsets</var>, uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX161"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_conv_from_encoding</b><i> (const char *<var>fromcode</var>, enum iconv_ilseq_handler <var>handler</var>, const char *<var>src</var>, size_t <var>srclen</var>, size_t *<var>offsets</var>, uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX162"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_conv_from_encoding</b><i> (const char *<var>fromcode</var>, enum iconv_ilseq_handler <var>handler</var>, const char *<var>src</var>, size_t <var>srclen</var>, size_t *<var>offsets</var>, uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX163"></a> +</dt> +<dd><p>Converts an entire string, possibly including NUL bytes, from one encoding +to UTF-8 encoding. +</p> +<p>Converts a memory region given in encoding <var>fromcode</var>. <var>fromcode</var> is +as for the <code>iconv_open</code> function. +</p> +<p>The input is in the memory region between <var>src</var> (inclusive) and +<code><var>src</var> + <var>srclen</var></code> (exclusive). +</p> +<p>If <var>offsets</var> is not NULL, it should point to an array of <var>srclen</var> +integers; this array is filled with offsets into the result, i.e. the +character starting at <code><var>src</var>[i]</code> corresponds to the character starting +at <code><var>result</var>[<var>offsets</var>[i]]</code>, and other offsets are set to +<code>(size_t)(-1)</code>. +</p> +<p><code><var>resultbuf</var></code> and <code>*<var>lengthp</var></code> should be a scratch +buffer and its size, or <code><var>resultbuf</var></code> can be NULL. +</p> +<p>May erase the contents of the memory at <code><var>resultbuf</var></code>. +</p> +<p>If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in <code>*<var>lengthp</var></code>. The resulting string is +<code><var>resultbuf</var></code> if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. +</p> +<p>In case of error: NULL is returned and <code>errno</code> is set. +Particular <code>errno</code> values: <code>EINVAL</code>, <code>EILSEQ</code>, <code>ENOMEM</code>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> char * <b>u8_conv_to_encoding</b><i> (const char *<var>tocode</var>, enum iconv_ilseq_handler <var>handler</var>, const uint8_t *<var>src</var>, size_t <var>srclen</var>, size_t *<var>offsets</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX164"></a> +</dt> +<dt><u>Function:</u> char * <b>u16_conv_to_encoding</b><i> (const char *<var>tocode</var>, enum iconv_ilseq_handler <var>handler</var>, const uint16_t *<var>src</var>, size_t <var>srclen</var>, size_t *<var>offsets</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX165"></a> +</dt> +<dt><u>Function:</u> char * <b>u32_conv_to_encoding</b><i> (const char *<var>tocode</var>, enum iconv_ilseq_handler <var>handler</var>, const uint32_t *<var>src</var>, size_t <var>srclen</var>, size_t *<var>offsets</var>, char *<var>resultbuf</var>, size_t *<var>lengthp</var>)</i> +<a name="IDX166"></a> +</dt> +<dd><p>Converts an entire Unicode string, possibly including NUL units, from UTF-8 +encoding to a given encoding. +</p> +<p>Converts a memory region to encoding <var>tocode</var>. <var>tocode</var> is as for +the <code>iconv_open</code> function. +</p> +<p>The input is in the memory region between <var>src</var> (inclusive) and +<code><var>src</var> + <var>srclen</var></code> (exclusive). +</p> +<p>If <var>offsets</var> is not NULL, it should point to an array of <var>srclen</var> +integers; this array is filled with offsets into the result, i.e. the +character starting at <code><var>src</var>[i]</code> corresponds to the character starting +at <code><var>result</var>[<var>offsets</var>[i]]</code>, and other offsets are set to +<code>(size_t)(-1)</code>. +</p> +<p><code><var>resultbuf</var></code> and <code>*<var>lengthp</var></code> should be a scratch +buffer and its size, or <code><var>resultbuf</var></code> can be NULL. +</p> +<p>May erase the contents of the memory at <code><var>resultbuf</var></code>. +</p> +<p>If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in <code>*<var>lengthp</var></code>. The resulting string is +<code><var>resultbuf</var></code> if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. +</p> +<p>In case of error: NULL is returned and <code>errno</code> is set. +Particular <code>errno</code> values: <code>EINVAL</code>, <code>EILSEQ</code>, <code>ENOMEM</code>. +</p></dd></dl> + +<p>The following functions convert between NUL terminated strings in a specified +encoding and NUL terminated Unicode strings. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strconv_from_encoding</b><i> (const char *<var>string</var>, const char *<var>fromcode</var>, enum iconv_ilseq_handler <var>handler</var>)</i> +<a name="IDX167"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strconv_from_encoding</b><i> (const char *<var>string</var>, const char *<var>fromcode</var>, enum iconv_ilseq_handler <var>handler</var>)</i> +<a name="IDX168"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strconv_from_encoding</b><i> (const char *<var>string</var>, const char *<var>fromcode</var>, enum iconv_ilseq_handler <var>handler</var>)</i> +<a name="IDX169"></a> +</dt> +<dd><p>Converts a NUL terminated string from a given encoding. +</p> +<p>The result is <code>malloc</code> allocated, or NULL (with <var>errno</var> set) in case of error. +</p> +<p>Particular <code>errno</code> values: <code>EILSEQ</code>, <code>ENOMEM</code>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> char * <b>u8_strconv_to_encoding</b><i> (const uint8_t *<var>string</var>, const char *<var>tocode</var>, enum iconv_ilseq_handler <var>handler</var>)</i> +<a name="IDX170"></a> +</dt> +<dt><u>Function:</u> char * <b>u16_strconv_to_encoding</b><i> (const uint16_t *<var>string</var>, const char *<var>tocode</var>, enum iconv_ilseq_handler <var>handler</var>)</i> +<a name="IDX171"></a> +</dt> +<dt><u>Function:</u> char * <b>u32_strconv_to_encoding</b><i> (const uint32_t *<var>string</var>, const char *<var>tocode</var>, enum iconv_ilseq_handler <var>handler</var>)</i> +<a name="IDX172"></a> +</dt> +<dd><p>Converts a NUL terminated string to a given encoding. +</p> +<p>The result is <code>malloc</code> allocated, or NULL (with <code>errno</code> set) in case of error. +</p> +<p>Particular <code>errno</code> values: <code>EILSEQ</code>, <code>ENOMEM</code>. +</p></dd></dl> + +<p>The following functions are shorthands that convert between NUL terminated +strings in locale encoding and NUL terminated Unicode strings. +</p> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_strconv_from_locale</b><i> (const char *<var>string</var>)</i> +<a name="IDX173"></a> +</dt> +<dt><u>Function:</u> uint16_t * <b>u16_strconv_from_locale</b><i> (const char *<var>string</var>)</i> +<a name="IDX174"></a> +</dt> +<dt><u>Function:</u> uint32_t * <b>u32_strconv_from_locale</b><i> (const char *<var>string</var>)</i> +<a name="IDX175"></a> +</dt> +<dd><p>Converts a NUL terminated string from the locale encoding. +</p> +<p>The result is <code>malloc</code> allocated, or NULL (with <code>errno</code> set) in case of error. +</p> +<p>Particular <code>errno</code> values: <code>ENOMEM</code>. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> char * <b>u8_strconv_to_locale</b><i> (const uint8_t *<var>string</var>)</i> +<a name="IDX176"></a> +</dt> +<dt><u>Function:</u> char * <b>u16_strconv_to_locale</b><i> (const uint16_t *<var>string</var>)</i> +<a name="IDX177"></a> +</dt> +<dt><u>Function:</u> char * <b>u32_strconv_to_locale</b><i> (const uint32_t *<var>string</var>)</i> +<a name="IDX178"></a> +</dt> +<dd><p>Converts a NUL terminated string to the locale encoding. +</p> +<p>The result is <code>malloc</code> allocated, or NULL (with <code>errno</code> set) in case of error. +</p> +<p>Particular <code>errno</code> values: <code>ENOMEM</code>. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_4.html#SEC11" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_6.html#SEC18" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_6.html b/doc/libunistring_6.html new file mode 100644 index 00000000..33cda594 --- /dev/null +++ b/doc/libunistring_6.html @@ -0,0 +1,451 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 6. Output with Unicode strings <unistdio.h></title> + +<meta name="description" content="GNU libunistring: 6. Output with Unicode strings <unistdio.h>"> +<meta name="keywords" content="GNU libunistring: 6. Output with Unicode strings <unistdio.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_5.html#SEC17" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_7.html#SEC19" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="unistdio_002eh"></a> +<a name="SEC18"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC18">6. Output with Unicode strings <code><unistdio.h></code></a> </h1> + +<p>This include file declares functions for doing formatted output with Unicode +strings. It defines a set of functions similar to <code>fprintf</code> and +<code>sprintf</code>, which are declared in <code><stdio.h></code>. +</p> +<p>These functions work like the <code>printf</code> function family. +In the format string: +</p><ul class="toc"> +<li> +The format directive ‘<samp>U</samp>’ takes an UTF-8 string (<code>const uint8_t *</code>). +</li><li> +The format directive ‘<samp>lU</samp>’ takes an UTF-16 string +(<code>const uint16_t *</code>). +</li><li> +The format directive ‘<samp>llU</samp>’ takes an UTF-32 string +(<code>const uint32_t *</code>). +</li></ul> + +<p>A function name with an infix ‘<samp>v</samp>’ indicates that a <code>va_list</code> is +passed instead of multiple arguments. +</p> +<p>The functions <code>*sprintf</code> have a <var>buf</var> argument that is assumed to be +large enough. +(<em>DANGEROUS! Overflowing the buffer will crash the program.</em>) +</p> +<p>The functions <code>*snprintf</code> have a <var>buf</var> argument that is assumed to be +<var>size</var> units large. (<em>DANGEROUS! The resulting string might be +truncated in the middle of a multibyte character.</em>) +</p> +<p>The functions <code>*asprintf</code> have a <var>resultp</var> argument. The result will +be freshly allocated and stored in <code>*resultp</code>. +</p> +<p>The functions <code>*asnprintf</code> have a (<var>resultbuf</var>, <var>lengthp</var>) +argument pair. If <var>resultbuf</var> is not NULL and the result fits into +<code>*<var>lengthp</var></code> units, it is put in <var>resultbuf</var>, and <var>resultbuf</var> +is returned. Otherwise, a freshly allocated string is returned. In both +cases, <code>*<var>lengthp</var></code> is set to the length (number of units) of the +returned string. In case of error, NULL is returned and <code>errno</code> is set. +</p> +<p>The following functions take an ASCII format string and return a result that +is a <code>char *</code> string in locale encoding. +</p> +<dl> +<dt><u>Function:</u> int <b>ulc_sprintf</b><i> (char *<var>buf</var>, const char *<var>format</var>, ...)</i> +<a name="IDX179"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> int <b>ulc_snprintf</b><i> (char *<var>buf</var>, size_t size, const char *<var>format</var>, ...)</i> +<a name="IDX180"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> int <b>ulc_asprintf</b><i> (char **<var>resultp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX181"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> char * <b>ulc_asnprintf</b><i> (char *<var>resultbuf</var>, size_t *<var>lengthp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX182"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> int <b>ulc_vsprintf</b><i> (char *<var>buf</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX183"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> int <b>ulc_vsnprintf</b><i> (char *<var>buf</var>, size_t size, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX184"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> int <b>ulc_vasprintf</b><i> (char **<var>resultp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX185"></a> +</dt> +</dl> + +<dl> +<dt><u>Function:</u> char * <b>ulc_vasnprintf</b><i> (char *<var>resultbuf</var>, size_t *<var>lengthp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX186"></a> +</dt> +</dl> + +<p>The following functions take an ASCII format string and return a result in +UTF-8 format. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_sprintf</b><i> (uint8_t *<var>buf</var>, const char *<var>format</var>, ...)</i> +<a name="IDX187"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_snprintf</b><i> (uint8_t *<var>buf</var>, size_t <var>size</var>, const char *<var>format</var>, ...)</i> +<a name="IDX188"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_asprintf</b><i> (uint8_t **<var>resultp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX189"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_asnprintf</b><i> (uint8_t *<var>resultbuf</var>, size_t *<var>lengthp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX190"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_vsprintf</b><i> (uint8_t *<var>buf</var>, const char *<var>format</var>, va_list ap)</i> +<a name="IDX191"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_vsnprintf</b><i> (uint8_t *<var>buf</var>, size_t <var>size</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX192"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_vasprintf</b><i> (uint8_t **<var>resultp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX193"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_vasnprintf</b><i> (uint8_t *resultbuf, size_t *<var>lengthp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX194"></a> +</dt> +</dl> + +<p>The following functions take an UTF-8 format string and return a result in +UTF-8 format. +</p> +<dl> +<dt><u>Function:</u> int <b>u8_u8_sprintf</b><i> (uint8_t *<var>buf</var>, const uint8_t *<var>format</var>, ...)</i> +<a name="IDX195"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_u8_snprintf</b><i> (uint8_t *<var>buf</var>, size_t <var>size</var>, const uint8_t *<var>format</var>, ...)</i> +<a name="IDX196"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_u8_asprintf</b><i> (uint8_t **<var>resultp</var>, const uint8_t *<var>format</var>, ...)</i> +<a name="IDX197"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_u8_asnprintf</b><i> (uint8_t *resultbuf, size_t *<var>lengthp</var>, const uint8_t *<var>format</var>, ...)</i> +<a name="IDX198"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_u8_vsprintf</b><i> (uint8_t *<var>buf</var>, const uint8_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX199"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_u8_vsnprintf</b><i> (uint8_t *<var>buf</var>, size_t <var>size</var>, const uint8_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX200"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u8_u8_vasprintf</b><i> (uint8_t **<var>resultp</var>, const uint8_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX201"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint8_t * <b>u8_u8_vasnprintf</b><i> (uint8_t *resultbuf, size_t *<var>lengthp</var>, const uint8_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX202"></a> +</dt> +</dl> + +<p>The following functions take an ASCII format string and return a result in +UTF-16 format. +</p> +<dl> +<dt><u>Function:</u> int <b>u16_sprintf</b><i> (uint16_t *<var>buf</var>, const char *<var>format</var>, ...)</i> +<a name="IDX203"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_snprintf</b><i> (uint16_t *<var>buf</var>, size_t <var>size</var>, const char *<var>format</var>, ...)</i> +<a name="IDX204"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_asprintf</b><i> (uint16_t **<var>resultp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX205"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint16_t * <b>u16_asnprintf</b><i> (uint16_t *<var>resultbuf</var>, size_t *<var>lengthp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX206"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_vsprintf</b><i> (uint16_t *<var>buf</var>, const char *<var>format</var>, va_list ap)</i> +<a name="IDX207"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_vsnprintf</b><i> (uint16_t *<var>buf</var>, size_t <var>size</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX208"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_vasprintf</b><i> (uint16_t **<var>resultp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX209"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint16_t * <b>u16_vasnprintf</b><i> (uint16_t *resultbuf, size_t *<var>lengthp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX210"></a> +</dt> +</dl> + +<p>The following functions take an UTF-16 format string and return a result in +UTF-16 format. +</p> +<dl> +<dt><u>Function:</u> int <b>u16_u16_sprintf</b><i> (uint16_t *<var>buf</var>, const uint16_t *<var>format</var>, ...)</i> +<a name="IDX211"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_u16_snprintf</b><i> (uint16_t *<var>buf</var>, size_t <var>size</var>, const uint16_t *<var>format</var>, ...)</i> +<a name="IDX212"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_u16_asprintf</b><i> (uint16_t **<var>resultp</var>, const uint16_t *<var>format</var>, ...)</i> +<a name="IDX213"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint16_t * <b>u16_u16_asnprintf</b><i> (uint16_t *resultbuf, size_t *<var>lengthp</var>, const uint16_t *<var>format</var>, ...)</i> +<a name="IDX214"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_u16_vsprintf</b><i> (uint16_t *<var>buf</var>, const uint16_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX215"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_u16_vsnprintf</b><i> (uint16_t *<var>buf</var>, size_t <var>size</var>, const uint16_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX216"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u16_u16_vasprintf</b><i> (uint16_t **<var>resultp</var>, const uint16_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX217"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint16_t * <b>u16_u16_vasnprintf</b><i> (uint16_t *resultbuf, size_t *<var>lengthp</var>, const uint16_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX218"></a> +</dt> +</dl> + +<p>The following functions take an ASCII format string and return a result in +UTF-32 format. +</p> +<dl> +<dt><u>Function:</u> int <b>u32_sprintf</b><i> (uint32_t *<var>buf</var>, const char *<var>format</var>, ...)</i> +<a name="IDX219"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_snprintf</b><i> (uint32_t *<var>buf</var>, size_t <var>size</var>, const char *<var>format</var>, ...)</i> +<a name="IDX220"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_asprintf</b><i> (uint32_t **<var>resultp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX221"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint32_t * <b>u32_asnprintf</b><i> (uint32_t *<var>resultbuf</var>, size_t *<var>lengthp</var>, const char *<var>format</var>, ...)</i> +<a name="IDX222"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_vsprintf</b><i> (uint32_t *<var>buf</var>, const char *<var>format</var>, va_list ap)</i> +<a name="IDX223"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_vsnprintf</b><i> (uint32_t *<var>buf</var>, size_t <var>size</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX224"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_vasprintf</b><i> (uint32_t **<var>resultp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX225"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint32_t * <b>u32_vasnprintf</b><i> (uint32_t *resultbuf, size_t *<var>lengthp</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX226"></a> +</dt> +</dl> + +<p>The following functions take an UTF-32 format string and return a result in +UTF-32 format. +</p> +<dl> +<dt><u>Function:</u> int <b>u32_u32_sprintf</b><i> (uint32_t *<var>buf</var>, const uint32_t *<var>format</var>, ...)</i> +<a name="IDX227"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_u32_snprintf</b><i> (uint32_t *<var>buf</var>, size_t <var>size</var>, const uint32_t *<var>format</var>, ...)</i> +<a name="IDX228"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_u32_asprintf</b><i> (uint32_t **<var>resultp</var>, const uint32_t *<var>format</var>, ...)</i> +<a name="IDX229"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint32_t * <b>u32_u32_asnprintf</b><i> (uint32_t *resultbuf, size_t *<var>lengthp</var>, const uint32_t *<var>format</var>, ...)</i> +<a name="IDX230"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_u32_vsprintf</b><i> (uint32_t *<var>buf</var>, const uint32_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX231"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_u32_vsnprintf</b><i> (uint32_t *<var>buf</var>, size_t <var>size</var>, const uint32_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX232"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>u32_u32_vasprintf</b><i> (uint32_t **<var>resultp</var>, const uint32_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX233"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> uint32_t * <b>u32_u32_vasnprintf</b><i> (uint32_t *resultbuf, size_t *<var>lengthp</var>, const uint32_t *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX234"></a> +</dt> +</dl> + +<p>The following functions take an ASCII format string and produce output in +locale encoding to a <code>FILE</code> stream. +</p> +<dl> +<dt><u>Function:</u> int <b>ulc_fprintf</b><i> (FILE *<var>stream</var>, const char *<var>format</var>, ...)</i> +<a name="IDX235"></a> +</dt> +</dl> +<dl> +<dt><u>Function:</u> int <b>ulc_vfprintf</b><i> (FILE *<var>stream</var>, const char *<var>format</var>, va_list <var>ap</var>)</i> +<a name="IDX236"></a> +</dt> +</dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_5.html#SEC17" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_7.html#SEC19" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_7.html b/doc/libunistring_7.html new file mode 100644 index 00000000..6fe526d9 --- /dev/null +++ b/doc/libunistring_7.html @@ -0,0 +1,125 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 7. Names of Unicode characters <uniname.h></title> + +<meta name="description" content="GNU libunistring: 7. Names of Unicode characters <uniname.h>"> +<meta name="keywords" content="GNU libunistring: 7. Names of Unicode characters <uniname.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_6.html#SEC18" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_8.html#SEC20" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="uniname_002eh"></a> +<a name="SEC19"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC19">7. Names of Unicode characters <code><uniname.h></code></a> </h1> + +<p>This include file implements the association between a Unicode character and +its name. +</p> +<p>The name of a Unicode character allows to distinguish it from other, similar +looking characters. For example, the character ‘<samp>x</samp>’ has the name +<code>"LATIN SMALL LETTER X"</code> and is therefore different from the character +named <code>"MULTIPLICATION SIGN"</code>. +</p> +<dl> +<dt><u>Macro:</u> unsigned int <b>UNINAME_MAX</b> +<a name="IDX237"></a> +</dt> +<dd><p>This macro expands to a constant that is the required size of buffer for a +Unicode character name. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> char * <b>unicode_character_name</b><i> (ucs4_t <var>uc</var>, char *<var>buf</var>)</i> +<a name="IDX238"></a> +</dt> +<dd><p>Looks up the name of a Unicode character, in uppercase ASCII. +<var>buf</var> must point to a buffer, at least <code>UNINAME_MAX</code> bytes in size. +Returns the filled <var>buf</var>, or NULL if the character does not have a name. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> ucs4_t <b>unicode_name_character</b><i> (const char *<var>name</var>)</i> +<a name="IDX239"></a> +</dt> +<dd><p>Looks up the Unicode character with a given name, in upper- or lowercase +ASCII. Returns the character if found, or <code>UNINAME_INVALID</code> if not found. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> ucs4_t <b>UNINAME_INVALID</b> +<a name="IDX240"></a> +</dt> +<dd><p>This macro expands to a constant that is a special return value of the +<code>unicode_name_character</code> function. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_6.html#SEC18" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_8.html#SEC20" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_8.html b/doc/libunistring_8.html new file mode 100644 index 00000000..def5e04a --- /dev/null +++ b/doc/libunistring_8.html @@ -0,0 +1,2071 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 8. Unicode character classification and properties <unictype.h></title> + +<meta name="description" content="GNU libunistring: 8. Unicode character classification and properties <unictype.h>"> +<meta name="keywords" content="GNU libunistring: 8. Unicode character classification and properties <unictype.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_7.html#SEC19" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_9.html#SEC37" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="unictype_002eh"></a> +<a name="SEC20"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC20">8. Unicode character classification and properties <code><unictype.h></code></a> </h1> + +<p>This include file declares functions that classify Unicode characters +and that test whether Unicode characters have specific properties. +</p> +<p>The classification assigns a “general category” to every Unicode +character. This is similar to the classification provided by ISO C in +<code><wctype.h></code>. +</p> +<p>Properties are the data that guides various text processing algorithms +in the presence of specific Unicode characters. +</p> + +<hr size="6"> +<a name="General-category"></a> +<a name="SEC21"></a> +<h2 class="section"> <a href="libunistring.html#TOC21">8.1 General category</a> </h2> + +<p>Every Unicode character or code point has a <em>general category</em> assigned +to it. This classification is important for most algorithms that work on +Unicode text. +</p> +<p>The GNU libunistring library provides two kinds of API for working with +general categories. The object oriented API uses a variable to denote +every predefined general category value or combinations thereof. The +low-level API uses a bit mask instead. The advantage of the object oriented +API is that if only a few predefined general category values are used, +the data tables are relatively small. When you combine general category +values (using <code>uc_general_category_or</code>, <code>uc_general_category_and</code>, +or <code>uc_general_category_and_not</code>), or when you use the low level +bit masks, a big table is used thats holds the complete general category +information for all Unicode characters. +</p> + +<hr size="6"> +<a name="Object-oriented-API"></a> +<a name="SEC22"></a> +<h3 class="subsection"> <a href="libunistring.html#TOC22">8.1.1 The object oriented API for general category</a> </h3> + +<dl> +<dt><u>Type:</u> <b>uc_general_category_t</b> +<a name="IDX241"></a> +</dt> +<dd><p>This data type denotes a general category value. It is an immediate type that +can be copied by simple assignment, without involving memory allocation. It is +not an array type. +</p></dd></dl> + +<p>The following are the predefined general category value. Additional general +categories may be added in the future. +</p> +<dl> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_L</b> +<a name="IDX242"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Lu</b> +<a name="IDX243"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Ll</b> +<a name="IDX244"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Lt</b> +<a name="IDX245"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Lm</b> +<a name="IDX246"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Lo</b> +<a name="IDX247"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_M</b> +<a name="IDX248"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Mn</b> +<a name="IDX249"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Mc</b> +<a name="IDX250"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Me</b> +<a name="IDX251"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_N</b> +<a name="IDX252"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Nd</b> +<a name="IDX253"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Nl</b> +<a name="IDX254"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_No</b> +<a name="IDX255"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_P</b> +<a name="IDX256"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Pc</b> +<a name="IDX257"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Pd</b> +<a name="IDX258"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Ps</b> +<a name="IDX259"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Pe</b> +<a name="IDX260"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Pi</b> +<a name="IDX261"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Pf</b> +<a name="IDX262"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Po</b> +<a name="IDX263"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_S</b> +<a name="IDX264"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Sm</b> +<a name="IDX265"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Sc</b> +<a name="IDX266"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Sk</b> +<a name="IDX267"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_So</b> +<a name="IDX268"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Z</b> +<a name="IDX269"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Zs</b> +<a name="IDX270"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Zl</b> +<a name="IDX271"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Zp</b> +<a name="IDX272"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_C</b> +<a name="IDX273"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Cc</b> +<a name="IDX274"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Cf</b> +<a name="IDX275"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Cs</b> +<a name="IDX276"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Co</b> +<a name="IDX277"></a> +</dt> +<dt><u>Constant:</u> uc_general_category_t <b>UC_CATEGORY_Cn</b> +<a name="IDX278"></a> +</dt> +</dl> + +<p>The following are alias names for predefined General category values. +</p> +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_LETTER</b> +<a name="IDX279"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_L</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_UPPERCASE_LETTER</b> +<a name="IDX280"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Lu</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_LOWERCASE_LETTER</b> +<a name="IDX281"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Ll</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_TITLECASE_LETTER</b> +<a name="IDX282"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Lt</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_MODIFIER_LETTER</b> +<a name="IDX283"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Lm</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_OTHER_LETTER</b> +<a name="IDX284"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Lo</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_MARK</b> +<a name="IDX285"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_M</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_NON_SPACING_MARK</b> +<a name="IDX286"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Mn</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_COMBINING_SPACING_MARK</b> +<a name="IDX287"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Mc</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_ENCLOSING_MARK</b> +<a name="IDX288"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Me</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_NUMBER</b> +<a name="IDX289"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_N</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_DECIMAL_DIGIT_NUMBER</b> +<a name="IDX290"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Nd</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_LETTER_NUMBER</b> +<a name="IDX291"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Nl</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_OTHER_NUMBER</b> +<a name="IDX292"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_No</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_PUNCTUATION</b> +<a name="IDX293"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_P</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_CONNECTOR_PUNCTUATION</b> +<a name="IDX294"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Pc</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_DASH_PUNCTUATION</b> +<a name="IDX295"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Pd</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_OPEN_PUNCTUATION</b> +<a name="IDX296"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Ps</code> (“start punctuation”). +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_CLOSE_PUNCTUATION</b> +<a name="IDX297"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Pe</code> (“end punctuation”). +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_INITIAL_QUOTE_PUNCTUATION</b> +<a name="IDX298"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Pi</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_FINAL_QUOTE_PUNCTUATION</b> +<a name="IDX299"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Pf</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_OTHER_PUNCTUATION</b> +<a name="IDX300"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Po</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_SYMBOL</b> +<a name="IDX301"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_S</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_MATH_SYMBOL</b> +<a name="IDX302"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Sm</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_CURRENCY_SYMBOL</b> +<a name="IDX303"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Sc</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_MODIFIER_SYMBOL</b> +<a name="IDX304"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Sk</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_OTHER_SYMBOL</b> +<a name="IDX305"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_So</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_SEPARATOR</b> +<a name="IDX306"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Z</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_SPACE_SEPARATOR</b> +<a name="IDX307"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Zs</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_LINE_SEPARATOR</b> +<a name="IDX308"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Zl</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_PARAGRAPH_SEPARATOR</b> +<a name="IDX309"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Zp</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_OTHER</b> +<a name="IDX310"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_C</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_CONTROL</b> +<a name="IDX311"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Cc</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_FORMAT</b> +<a name="IDX312"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Cf</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_SURROGATE</b> +<a name="IDX313"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Cs</code>. All code points in this +category are invalid characters. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_PRIVATE_USE</b> +<a name="IDX314"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Co</code>. +</p></dd></dl> + +<dl> +<dt><u>Macro:</u> uc_general_category_t <b>UC_UNASSIGNED</b> +<a name="IDX315"></a> +</dt> +<dd><p>This is another name for <code>UC_CATEGORY_Cn</code>. Some code points in this +category are invalid characters. +</p></dd></dl> + +<p>The following functions combine general categories, like in a boolean algebra, +except that there is no ‘<samp>not</samp>’ operation. +</p> +<dl> +<dt><u>Function:</u> uc_general_category_t <b>uc_general_category_or</b><i> (uc_general_category_t <var>category1</var>, uc_general_category_t <var>category2</var>)</i> +<a name="IDX316"></a> +</dt> +<dd><p>Returns the union of two general categories. +This corresponds to the unions of the two sets of characters. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uc_general_category_t <b>uc_general_category_and</b><i> (uc_general_category_t <var>category1</var>, uc_general_category_t <var>category2</var>)</i> +<a name="IDX317"></a> +</dt> +<dd><p>Returns the intersection of two general categories as bit masks. +This <em>does not</em> correspond to the intersection of the two sets of +characters. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uc_general_category_t <b>uc_general_category_and_not</b><i> (uc_general_category_t <var>category1</var>, uc_general_category_t <var>category2</var>)</i> +<a name="IDX318"></a> +</dt> +<dd><p>Returns the intersection of a general category with the complement of a +second general category, as bit masks. +This <em>does not</em> correspond to the intersection with complement, when +viewing the categories as sets of characters. +</p></dd></dl> + +<p>The following functions associate general categories with their name. +</p> +<dl> +<dt><u>Function:</u> const char * <b>uc_general_category_name</b><i> (uc_general_category_t <var>category</var>)</i> +<a name="IDX319"></a> +</dt> +<dd><p>Returns the name of a general category. +Returns NULL if the general category corresponds to a bit mask that does not +have a name. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> uc_general_category_t <b>uc_general_category_byname</b><i> (const char *<var>category_name</var>)</i> +<a name="IDX320"></a> +</dt> +<dd><p>Returns the general category given by name, e.g. <code>"Lu"</code>. +</p></dd></dl> + +<p>The following functions view general categories as sets of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> uc_general_category_t <b>uc_general_category</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX321"></a> +</dt> +<dd><p>Returns the general category of a Unicode character. +</p> +<p>This function uses a big table. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_general_category</b><i> (ucs4_t <var>uc</var>, uc_general_category_t <var>category</var>)</i> +<a name="IDX322"></a> +</dt> +<dd><p>Tests whether a Unicode character belongs to a given category. +The <var>category</var> argument can be a predefined general category or the +combination of several predefined general categories. +</p></dd></dl> + +<hr size="6"> +<a name="Bit-mask-API"></a> +<a name="SEC23"></a> +<h3 class="subsection"> <a href="libunistring.html#TOC23">8.1.2 The bit mask API for general category</a> </h3> + +<p>The following are the predefined general category value as bit masks. +Additional general categories may be added in the future. +</p> +<dl> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_L</b> +<a name="IDX323"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Lu</b> +<a name="IDX324"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Ll</b> +<a name="IDX325"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Lt</b> +<a name="IDX326"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Lm</b> +<a name="IDX327"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Lo</b> +<a name="IDX328"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_M</b> +<a name="IDX329"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Mn</b> +<a name="IDX330"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Mc</b> +<a name="IDX331"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Me</b> +<a name="IDX332"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_N</b> +<a name="IDX333"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Nd</b> +<a name="IDX334"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Nl</b> +<a name="IDX335"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_No</b> +<a name="IDX336"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_P</b> +<a name="IDX337"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Pc</b> +<a name="IDX338"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Pd</b> +<a name="IDX339"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Ps</b> +<a name="IDX340"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Pe</b> +<a name="IDX341"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Pi</b> +<a name="IDX342"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Pf</b> +<a name="IDX343"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Po</b> +<a name="IDX344"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_S</b> +<a name="IDX345"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Sm</b> +<a name="IDX346"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Sc</b> +<a name="IDX347"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Sk</b> +<a name="IDX348"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_So</b> +<a name="IDX349"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Z</b> +<a name="IDX350"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Zs</b> +<a name="IDX351"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Zl</b> +<a name="IDX352"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Zp</b> +<a name="IDX353"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_C</b> +<a name="IDX354"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Cc</b> +<a name="IDX355"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Cf</b> +<a name="IDX356"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Cs</b> +<a name="IDX357"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Co</b> +<a name="IDX358"></a> +</dt> +<dt><u>Macro:</u> uint32_t <b>UC_CATEGORY_MASK_Cn</b> +<a name="IDX359"></a> +</dt> +</dl> + +<p>The following function views general categories as sets of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_general_category_withtable</b><i> (ucs4_t <var>uc</var>, uint32_t <var>bitmask</var>)</i> +<a name="IDX360"></a> +</dt> +<dd><p>Tests whether a Unicode character belongs to a given category. +The <var>bitmask</var> argument can be a predefined general category bitmask or the +combination of several predefined general category bitmasks. +</p> +<p>This function uses a big table comprising all general categories. +</p></dd></dl> + +<hr size="6"> +<a name="Canonical-combining-class"></a> +<a name="SEC24"></a> +<h2 class="section"> <a href="libunistring.html#TOC24">8.2 Canonical combining class</a> </h2> + +<p>Every Unicode character or code point has a <em>canonical combining class</em> +assigned to it. +</p> +<p>What is the meaning of the canonical combining class? Essentially, it +indicates the priority with which a combining character is attached to its +base character. The characters for which the canonical combining class is 0 +are the base characters, and the characters for which it is greater than 0 are +the combining characters. Combining characters are rendered +near/attached/around their base character, and combining characters with small +combining classes are attached "first" or "closer" to the base character. +</p> +<p>The canonical combining class of a character is a number in the range +0..255. The possible values are described in the Unicode Character Database +<a href="http://www.unicode.org/Public/UNIDATA/UCD.html">http://www.unicode.org/Public/UNIDATA/UCD.html</a>. The list here is +not definitive; more values can be added in future versions. +</p> +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_NR</b> +<a name="IDX361"></a> +</dt> +<dd><p>The canonical combining class value for “Not Reordered” characters. +The value is 0. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_OV</b> +<a name="IDX362"></a> +</dt> +<dd><p>The canonical combining class value for “Overlay” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_NK</b> +<a name="IDX363"></a> +</dt> +<dd><p>The canonical combining class value for “Nukta” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_KV</b> +<a name="IDX364"></a> +</dt> +<dd><p>The canonical combining class value for “Kana Voicing” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_VR</b> +<a name="IDX365"></a> +</dt> +<dd><p>The canonical combining class value for “Virama” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_ATBL</b> +<a name="IDX366"></a> +</dt> +<dd><p>The canonical combining class value for “Attached Below Left” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_ATB</b> +<a name="IDX367"></a> +</dt> +<dd><p>The canonical combining class value for “Attached Below” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_ATAR</b> +<a name="IDX368"></a> +</dt> +<dd><p>The canonical combining class value for “Attached Above Right” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_BL</b> +<a name="IDX369"></a> +</dt> +<dd><p>The canonical combining class value for “Below Left” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_B</b> +<a name="IDX370"></a> +</dt> +<dd><p>The canonical combining class value for “Below” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_BR</b> +<a name="IDX371"></a> +</dt> +<dd><p>The canonical combining class value for “Below Right” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_L</b> +<a name="IDX372"></a> +</dt> +<dd><p>The canonical combining class value for “Left” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_R</b> +<a name="IDX373"></a> +</dt> +<dd><p>The canonical combining class value for “Right” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_AL</b> +<a name="IDX374"></a> +</dt> +<dd><p>The canonical combining class value for “Above Left” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_A</b> +<a name="IDX375"></a> +</dt> +<dd><p>The canonical combining class value for “Above” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_AR</b> +<a name="IDX376"></a> +</dt> +<dd><p>The canonical combining class value for “Above Right” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_DB</b> +<a name="IDX377"></a> +</dt> +<dd><p>The canonical combining class value for “Double Below” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_DA</b> +<a name="IDX378"></a> +</dt> +<dd><p>The canonical combining class value for “Double Above” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_CCC_IS</b> +<a name="IDX379"></a> +</dt> +<dd><p>The canonical combining class value for “Iota Subscript” characters. +</p></dd></dl> + +<p>The following function looks up the canonical combining class of a character. +</p> +<dl> +<dt><u>Function:</u> int <b>uc_combining_class</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX380"></a> +</dt> +<dd><p>Returns the canonical combining class of a Unicode character. +</p></dd></dl> + +<hr size="6"> +<a name="Bidirectional-category"></a> +<a name="SEC25"></a> +<h2 class="section"> <a href="libunistring.html#TOC25">8.3 Bidirectional category</a> </h2> + +<p>Every Unicode character or code point has a <em>bidirectional category</em> +assigned to it. +</p> +<p>The bidirectional category guides the bidirectional algorithm +(<a href="http://www.unicode.org/reports/tr9/">http://www.unicode.org/reports/tr9/</a>). The possible values are +the following. +</p> +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_L</b> +<a name="IDX381"></a> +</dt> +<dd><p>The bidirectional category for `Left-to-Right`” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_LRE</b> +<a name="IDX382"></a> +</dt> +<dd><p>The bidirectional category for “Left-to-Right Embedding” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_LRO</b> +<a name="IDX383"></a> +</dt> +<dd><p>The bidirectional category for “Left-to-Right Override” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_R</b> +<a name="IDX384"></a> +</dt> +<dd><p>The bidirectional category for “Right-to-Left” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_AL</b> +<a name="IDX385"></a> +</dt> +<dd><p>The bidirectional category for “Right-to-Left Arabic” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_RLE</b> +<a name="IDX386"></a> +</dt> +<dd><p>The bidirectional category for “Right-to-Left Embedding” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_RLO</b> +<a name="IDX387"></a> +</dt> +<dd><p>The bidirectional category for “Right-to-Left Override” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_PDF</b> +<a name="IDX388"></a> +</dt> +<dd><p>The bidirectional category for “Pop Directional Format” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_EN</b> +<a name="IDX389"></a> +</dt> +<dd><p>The bidirectional category for “European Number” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_ES</b> +<a name="IDX390"></a> +</dt> +<dd><p>The bidirectional category for “European Number Separator” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_ET</b> +<a name="IDX391"></a> +</dt> +<dd><p>The bidirectional category for “European Number Terminator” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_AN</b> +<a name="IDX392"></a> +</dt> +<dd><p>The bidirectional category for “Arabic Number” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_CS</b> +<a name="IDX393"></a> +</dt> +<dd><p>The bidirectional category for “Common Number Separator” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_NSM</b> +<a name="IDX394"></a> +</dt> +<dd><p>The bidirectional category for “Non-Spacing Mark” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_BN</b> +<a name="IDX395"></a> +</dt> +<dd><p>The bidirectional category for “Boundary Neutral” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_B</b> +<a name="IDX396"></a> +</dt> +<dd><p>The bidirectional category for “Paragraph Separator” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_S</b> +<a name="IDX397"></a> +</dt> +<dd><p>The bidirectional category for “Segment Separator” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_WS</b> +<a name="IDX398"></a> +</dt> +<dd><p>The bidirectional category for “Whitespace” characters. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_BIDI_ON</b> +<a name="IDX399"></a> +</dt> +<dd><p>The bidirectional category for “Other Neutral” characters. +</p></dd></dl> + +<p>The following functions implement the association between a bidirectional +category and its name. +</p> +<dl> +<dt><u>Function:</u> const char * <b>uc_bidi_category_name</b><i> (int <var>category</var>)</i> +<a name="IDX400"></a> +</dt> +<dd><p>Returns the name of a bidirectional category. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>uc_bidi_category_byname</b><i> (const char *<var>category_name</var>)</i> +<a name="IDX401"></a> +</dt> +<dd><p>Returns the bidirectional category given by name, e.g. <code>"LRE"</code>. +</p></dd></dl> + +<p>The following functions view bidirectional categories as sets of Unicode +characters. +</p> +<dl> +<dt><u>Function:</u> int <b>uc_bidi_category</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX402"></a> +</dt> +<dd><p>Returns the bidirectional category of a Unicode character. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_bidi_category</b><i> (ucs4_t <var>uc</var>, int <var>category</var>)</i> +<a name="IDX403"></a> +</dt> +<dd><p>Tests whether a Unicode character belongs to a given bidirectional category. +</p></dd></dl> + +<hr size="6"> +<a name="Decimal-digit-value"></a> +<a name="SEC26"></a> +<h2 class="section"> <a href="libunistring.html#TOC26">8.4 Decimal digit value</a> </h2> + +<p>Decimal digits (like the digits from ‘<samp>0</samp>’ to ‘<samp>9</samp>’) exist in many +scripts. The following function converts a decimal digit character to its +numerical value. +</p> +<dl> +<dt><u>Function:</u> int <b>uc_decimal_value</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX404"></a> +</dt> +<dd><p>Returns the decimal digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a decimal digit. +</p></dd></dl> + +<hr size="6"> +<a name="Digit-value"></a> +<a name="SEC27"></a> +<h2 class="section"> <a href="libunistring.html#TOC27">8.5 Digit value</a> </h2> + +<p>Digit characters are like decimal digit characters, possibly in special forms, +like as superscript, subscript, or circled. The following function converts a +digit character to its numerical value. +</p> +<dl> +<dt><u>Function:</u> int <b>uc_digit_value</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX405"></a> +</dt> +<dd><p>Returns the digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a digit. +</p></dd></dl> + +<hr size="6"> +<a name="Numeric-value"></a> +<a name="SEC28"></a> +<h2 class="section"> <a href="libunistring.html#TOC28">8.6 Numeric value</a> </h2> + +<p>There are also characters that represent numbers without a digit system, like +the Roman numerals, and fractional numbers, like 1/4 or 3/4. +</p> +<p>The following type represents the numeric value of a Unicode character. +</p><dl> +<dt><u>Type:</u> <b>uc_fraction_t</b> +<a name="IDX406"></a> +</dt> +<dd><p>This is a structure type with the following fields: +</p><table><tr><td> </td><td><pre class="smallexample">int numerator; +int denominator; +</pre></td></tr></table> +<p>An integer <var>n</var> is represented by <code>numerator = <var>n</var></code>, +<code>denominator = 1</code>. +</p></dd></dl> + +<p>The following function converts a number character to its numerical value. +</p> +<dl> +<dt><u>Function:</u> uc_fraction_t <b>uc_numeric_value</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX407"></a> +</dt> +<dd><p>Returns the numeric value of a Unicode character. +The return value is a fraction, or the pseudo-fraction <code>{ 0, 0 }</code> for +characters that do not represent a number. +</p></dd></dl> + +<hr size="6"> +<a name="Mirrored-character"></a> +<a name="SEC29"></a> +<h2 class="section"> <a href="libunistring.html#TOC29">8.7 Mirrored character</a> </h2> + +<p>Character mirroring is used to associate the closing parenthesis character +to the opening parenthesis character, the closing brace character with the +opening brace character, and so on. +</p> +<p>The following function looks up the mirrored character of a Unicode character. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_mirror_char</b><i> (ucs4_t <var>uc</var>, ucs4_t *<var>puc</var>)</i> +<a name="IDX408"></a> +</dt> +<dd><p>Stores the mirrored character of a Unicode character <var>uc</var> in +<code>*<var>puc</var></code> and returns <code>true</code>, if it exists. Otherwise it +stores <var>uc</var> unmodified in <code>*<var>puc</var></code> and returns <code>false</code>. +</p></dd></dl> + +<hr size="6"> +<a name="Properties"></a> +<a name="SEC30"></a> +<h2 class="section"> <a href="libunistring.html#TOC30">8.8 Properties</a> </h2> + +<p>This section defines boolean properties of Unicode characters. This +means, a character either has the given property or does not have it. +In other words, the property can be viewed as a subset of the set of +Unicode characters. +</p> +<p>The GNU libunistring library provides two kinds of API for working with +properties. The object oriented API uses a type <code>uc_property_t</code> +to designate a property. In the function-based API, which is a bit more +low level, a property is merely a function. +</p> + +<hr size="6"> +<a name="Properties-as-objects"></a> +<a name="SEC31"></a> +<h3 class="subsection"> <a href="libunistring.html#TOC31">8.8.1 Properties as objects – the object oriented API</a> </h3> + +<p>The following type designates a property on Unicode characters. +</p> +<dl> +<dt><u>Type:</u> <b>uc_property_t</b> +<a name="IDX409"></a> +</dt> +<dd><p>This data type denotes a boolean property on Unicode characters. It is an +immediate type that can be copied by simple assignment, without involving +memory allocation. It is not an array type. +</p></dd></dl> + +<p>Many Unicode properties are predefined. +</p> +<p>The following are general properties. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_WHITE_SPACE</b> +<a name="IDX410"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_ALPHABETIC</b> +<a name="IDX411"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_ALPHABETIC</b> +<a name="IDX412"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_NOT_A_CHARACTER</b> +<a name="IDX413"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT</b> +<a name="IDX414"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT</b> +<a name="IDX415"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_DEPRECATED</b> +<a name="IDX416"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_LOGICAL_ORDER_EXCEPTION</b> +<a name="IDX417"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_VARIATION_SELECTOR</b> +<a name="IDX418"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_PRIVATE_USE</b> +<a name="IDX419"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_UNASSIGNED_CODE_VALUE</b> +<a name="IDX420"></a> +</dt> +</dl> + +<p>The following properties are related to case folding. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_UPPERCASE</b> +<a name="IDX421"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_UPPERCASE</b> +<a name="IDX422"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_LOWERCASE</b> +<a name="IDX423"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_LOWERCASE</b> +<a name="IDX424"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_TITLECASE</b> +<a name="IDX425"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_SOFT_DOTTED</b> +<a name="IDX426"></a> +</dt> +</dl> + +<p>The following properties are related to identifiers. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_ID_START</b> +<a name="IDX427"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_ID_START</b> +<a name="IDX428"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_ID_CONTINUE</b> +<a name="IDX429"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_ID_CONTINUE</b> +<a name="IDX430"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_XID_START</b> +<a name="IDX431"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_XID_CONTINUE</b> +<a name="IDX432"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_PATTERN_WHITE_SPACE</b> +<a name="IDX433"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_PATTERN_SYNTAX</b> +<a name="IDX434"></a> +</dt> +</dl> + +<p>The following properties have an influence on shaping and rendering. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_JOIN_CONTROL</b> +<a name="IDX435"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_GRAPHEME_BASE</b> +<a name="IDX436"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_GRAPHEME_EXTEND</b> +<a name="IDX437"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_GRAPHEME_EXTEND</b> +<a name="IDX438"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_GRAPHEME_LINK</b> +<a name="IDX439"></a> +</dt> +</dl> + +<p>The following properties relate to bidirectional reordering. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_CONTROL</b> +<a name="IDX440"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_LEFT_TO_RIGHT</b> +<a name="IDX441"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT</b> +<a name="IDX442"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT</b> +<a name="IDX443"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_EUROPEAN_DIGIT</b> +<a name="IDX444"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR</b> +<a name="IDX445"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR</b> +<a name="IDX446"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_ARABIC_DIGIT</b> +<a name="IDX447"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_COMMON_SEPARATOR</b> +<a name="IDX448"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_BLOCK_SEPARATOR</b> +<a name="IDX449"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_SEGMENT_SEPARATOR</b> +<a name="IDX450"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_WHITESPACE</b> +<a name="IDX451"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_NON_SPACING_MARK</b> +<a name="IDX452"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL</b> +<a name="IDX453"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_PDF</b> +<a name="IDX454"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE</b> +<a name="IDX455"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_BIDI_OTHER_NEUTRAL</b> +<a name="IDX456"></a> +</dt> +</dl> + +<p>The following properties deal with number representations. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_HEX_DIGIT</b> +<a name="IDX457"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_ASCII_HEX_DIGIT</b> +<a name="IDX458"></a> +</dt> +</dl> + +<p>The following properties deal with CJK. +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_IDEOGRAPHIC</b> +<a name="IDX459"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_UNIFIED_IDEOGRAPH</b> +<a name="IDX460"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_RADICAL</b> +<a name="IDX461"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_IDS_BINARY_OPERATOR</b> +<a name="IDX462"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_IDS_TRINARY_OPERATOR</b> +<a name="IDX463"></a> +</dt> +</dl> + +<p>Other miscellaneous properties are: +</p> +<dl> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_ZERO_WIDTH</b> +<a name="IDX464"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_SPACE</b> +<a name="IDX465"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_NON_BREAK</b> +<a name="IDX466"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_ISO_CONTROL</b> +<a name="IDX467"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_FORMAT_CONTROL</b> +<a name="IDX468"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_DASH</b> +<a name="IDX469"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_HYPHEN</b> +<a name="IDX470"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_PUNCTUATION</b> +<a name="IDX471"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_LINE_SEPARATOR</b> +<a name="IDX472"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_PARAGRAPH_SEPARATOR</b> +<a name="IDX473"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_QUOTATION_MARK</b> +<a name="IDX474"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_SENTENCE_TERMINAL</b> +<a name="IDX475"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_TERMINAL_PUNCTUATION</b> +<a name="IDX476"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_CURRENCY_SYMBOL</b> +<a name="IDX477"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_MATH</b> +<a name="IDX478"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_OTHER_MATH</b> +<a name="IDX479"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_PAIRED_PUNCTUATION</b> +<a name="IDX480"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_LEFT_OF_PAIR</b> +<a name="IDX481"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_COMBINING</b> +<a name="IDX482"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_COMPOSITE</b> +<a name="IDX483"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_DECIMAL_DIGIT</b> +<a name="IDX484"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_NUMERIC</b> +<a name="IDX485"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_DIACRITIC</b> +<a name="IDX486"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_EXTENDER</b> +<a name="IDX487"></a> +</dt> +<dt><u>Constant:</u> uc_property_t <b>UC_PROPERTY_IGNORABLE_CONTROL</b> +<a name="IDX488"></a> +</dt> +</dl> + +<p>The following function looks up a property by its name. +</p> +<dl> +<dt><u>Function:</u> uc_property_t <b>uc_property_byname</b><i> (const char *<var>property_name</var>)</i> +<a name="IDX489"></a> +</dt> +<dd><p>Returns the property given by name, e.g. <code>"White space"</code>. If a property +with the given name exists, the result will satisfy the +<code>uc_property_is_valid</code> predicate. Otherwise the result will not satisfy +this predicate and must not be passed to functions that expect an +<code>uc_property_t</code> argument. +</p> +<p>This function references a big table of all predefined properties. Its use +can significantly increase the size of your application. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_property_is_valid</b><i> (uc_property_t property)</i> +<a name="IDX490"></a> +</dt> +<dd><p>Returns <code>true</code> when the given property is valid, or <code>false</code> +otherwise. +</p></dd></dl> + +<p>The following function views a property as a set of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property</b><i> (ucs4_t <var>uc</var>, uc_property_t <var>property</var>)</i> +<a name="IDX491"></a> +</dt> +<dd><p>Tests whether the Unicode character <var>uc</var> has the given property. +</p></dd></dl> + +<hr size="6"> +<a name="Properties-as-functions"></a> +<a name="SEC32"></a> +<h3 class="subsection"> <a href="libunistring.html#TOC32">8.8.2 Properties as functions – the functional API</a> </h3> + +<p>The following are general properties. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_white_space</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX492"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_alphabetic</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX493"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_alphabetic</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX494"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_not_a_character</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX495"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_default_ignorable_code_point</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX496"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_default_ignorable_code_point</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX497"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_deprecated</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX498"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_logical_order_exception</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX499"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_variation_selector</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX500"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_private_use</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX501"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_unassigned_code_value</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX502"></a> +</dt> +</dl> + +<p>The following properties are related to case folding. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_uppercase</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX503"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_uppercase</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX504"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_lowercase</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX505"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_lowercase</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX506"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_titlecase</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX507"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_soft_dotted</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX508"></a> +</dt> +</dl> + +<p>The following properties are related to identifiers. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_id_start</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX509"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_id_start</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX510"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_id_continue</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX511"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_id_continue</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX512"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_xid_start</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX513"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_xid_continue</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX514"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_pattern_white_space</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX515"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_pattern_syntax</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX516"></a> +</dt> +</dl> + +<p>The following properties have an influence on shaping and rendering. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_join_control</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX517"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_grapheme_base</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX518"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_grapheme_extend</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX519"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_grapheme_extend</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX520"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_grapheme_link</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX521"></a> +</dt> +</dl> + +<p>The following properties relate to bidirectional reordering. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_control</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX522"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_left_to_right</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX523"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_hebrew_right_to_left</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX524"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_arabic_right_to_left</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX525"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_european_digit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX526"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_eur_num_separator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX527"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_eur_num_terminator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX528"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_arabic_digit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX529"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_common_separator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX530"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_block_separator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX531"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_segment_separator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX532"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_whitespace</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX533"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_non_spacing_mark</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX534"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_boundary_neutral</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX535"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_pdf</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX536"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_embedding_or_override</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX537"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_bidi_other_neutral</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX538"></a> +</dt> +</dl> + +<p>The following properties deal with number representations. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_hex_digit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX539"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_ascii_hex_digit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX540"></a> +</dt> +</dl> + +<p>The following properties deal with CJK. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_ideographic</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX541"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_unified_ideograph</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX542"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_radical</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX543"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_ids_binary_operator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX544"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_ids_trinary_operator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX545"></a> +</dt> +</dl> + +<p>Other miscellaneous properties are: +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_property_zero_width</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX546"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_space</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX547"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_non_break</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX548"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_iso_control</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX549"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_format_control</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX550"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_dash</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX551"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_hyphen</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX552"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_punctuation</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX553"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_line_separator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX554"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_paragraph_separator</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX555"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_quotation_mark</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX556"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_sentence_terminal</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX557"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_terminal_punctuation</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX558"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_currency_symbol</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX559"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_math</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX560"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_other_math</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX561"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_paired_punctuation</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX562"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_left_of_pair</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX563"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_combining</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX564"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_composite</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX565"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_decimal_digit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX566"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_numeric</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX567"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_diacritic</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX568"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_extender</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX569"></a> +</dt> +<dt><u>Function:</u> bool <b>uc_is_property_ignorable_control</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX570"></a> +</dt> +</dl> + +<hr size="6"> +<a name="Scripts"></a> +<a name="SEC33"></a> +<h2 class="section"> <a href="libunistring.html#TOC33">8.9 Scripts</a> </h2> + +<p>The Unicode characters are subdivided into scripts. +</p> +<p>The following type is used to represent a script: +</p> +<dl> +<dt><u>Type:</u> <b>uc_script_t</b> +<a name="IDX571"></a> +</dt> +<dd><p>This data type is a structure type that refers to statically allocated +read-only data. It contains the following fields: +</p><table><tr><td> </td><td><pre class="smallexample">const char *name; +</pre></td></tr></table> + +<p>The <code>name</code> field contains the name of the script. +</p></dd></dl> + +<a name="IDX572"></a> +<p>The following functions look up a script. +</p> +<dl> +<dt><u>Function:</u> const uc_script_t * <b>uc_script</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX573"></a> +</dt> +<dd><p>Returns the script of a Unicode character. Returns NULL if <var>uc</var> does not +belong to any script. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> const uc_script_t * <b>uc_script_byname</b><i> (const char *<var>script_name</var>)</i> +<a name="IDX574"></a> +</dt> +<dd><p>Returns the script given by its name, e.g. <code>"HAN"</code>. Returns NULL if a +script with the given name does not exist. +</p></dd></dl> + +<p>The following function views a script as a set of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_script</b><i> (ucs4_t <var>uc</var>, const uc_script_t *<var>script</var>)</i> +<a name="IDX575"></a> +</dt> +<dd><p>Tests whether a Unicode character belongs to a given script. +</p></dd></dl> + +<p>The following gives a global picture of all scripts. +</p> +<dl> +<dt><u>Function:</u> void <b>uc_all_scripts</b><i> (const uc_script_t **<var>scripts</var>, size_t *<var>count</var>)</i> +<a name="IDX576"></a> +</dt> +<dd><p>Get the list of all scripts. Stores a pointer to an array of all scripts in +<code>*<var>scripts</var></code> and the length of this array in <code>*<var>count</var></code>. +</p></dd></dl> + +<hr size="6"> +<a name="Blocks"></a> +<a name="SEC34"></a> +<h2 class="section"> <a href="libunistring.html#TOC34">8.10 Blocks</a> </h2> + +<p>The Unicode characters are subdivided into blocks. A block is an interval of +Unicode code points. +</p> +<p>The following type is used to represent a block. +</p> +<dl> +<dt><u>Type:</u> <b>uc_block_t</b> +<a name="IDX577"></a> +</dt> +<dd><p>This data type is a structure type that refers to statically allocated data. +It contains the following fields: +</p><table><tr><td> </td><td><pre class="smallexample">ucs4_t start; +ucs4_t end; +const char *name; +</pre></td></tr></table> + +<p>The <code>start</code> field is the first Unicode code point in the block. +</p> +<p>The <code>end</code> field is the last Unicode code point in the block. +</p> +<p>The <code>name</code> field is the name of the block. +</p></dd></dl> + +<a name="IDX578"></a> +<p>The following function looks up a block. +</p> +<dl> +<dt><u>Function:</u> const uc_block_t * <b>uc_block</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX579"></a> +</dt> +<dd><p>Returns the block a character belongs to. +</p></dd></dl> + +<p>The following function views a block as a set of Unicode characters. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_block</b><i> (ucs4_t <var>uc</var>, const uc_block_t *<var>block</var>)</i> +<a name="IDX580"></a> +</dt> +<dd><p>Tests whether a Unicode character belongs to a given block. +</p></dd></dl> + +<p>The following gives a global picture of all block. +</p> +<dl> +<dt><u>Function:</u> void <b>uc_all_blocks</b><i> (const uc_block_t **<var>blocks</var>, size_t *<var>count</var>)</i> +<a name="IDX581"></a> +</dt> +<dd><p>Get the list of all blocks. Stores a pointer to an array of all blocks in +<code>*<var>blocks</var></code> and the length of this array in <code>*<var>count</var></code>. +</p></dd></dl> + +<hr size="6"> +<a name="ISO-C-and-Java-syntax"></a> +<a name="SEC35"></a> +<h2 class="section"> <a href="libunistring.html#TOC35">8.11 ISO C and Java syntax</a> </h2> + +<p>The following properties are taken from language standards. The supported +language standards are ISO C 99 and Java. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_c_whitespace</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX582"></a> +</dt> +<dd><p>Tests whether a Unicode character is considered whitespace in ISO C 99. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_java_whitespace</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX583"></a> +</dt> +<dd><p>Tests whether a Unicode character is considered whitespace in Java. +</p></dd></dl> + +<p>The following enumerated values are the possible return values of the functions +<code>uc_c_ident_category</code> and <code>uc_java_ident_category</code>. +</p> +<dl> +<dt><u>Constant:</u> int <b>UC_IDENTIFIER_START</b> +<a name="IDX584"></a> +</dt> +<dd><p>This return value means that the given character is valid as first or +subsequent character in an identifier. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_IDENTIFIER_VALID</b> +<a name="IDX585"></a> +</dt> +<dd><p>This return value means that the given character is valid as subsequent +character only. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_IDENTIFIER_INVALID</b> +<a name="IDX586"></a> +</dt> +<dd><p>This return value means that the given character is not valid in an identifier. +</p></dd></dl> + +<dl> +<dt><u>Constant:</u> int <b>UC_IDENTIFIER_IGNORABLE</b> +<a name="IDX587"></a> +</dt> +<dd><p>This return value (only for Java) means that the given character is ignorable. +</p></dd></dl> + +<p>The following function determine whether a given character can be a constituent +of an identifier in the given programming language. +</p> +<a name="IDX588"></a> +<dl> +<dt><u>Function:</u> int <b>uc_c_ident_category</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX589"></a> +</dt> +<dd><p>Returns the categorization of a Unicode character with respect to the ISO C 99 +identifier syntax. +</p></dd></dl> + +<a name="IDX590"></a> +<dl> +<dt><u>Function:</u> int <b>uc_java_ident_category</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX591"></a> +</dt> +<dd><p>Returns the categorization of a Unicode character with respect to the Java +identifier syntax. +</p></dd></dl> + +<hr size="6"> +<a name="Classifications-like-in-ISO-C"></a> +<a name="SEC36"></a> +<h2 class="section"> <a href="libunistring.html#TOC36">8.12 Classifications like in ISO C</a> </h2> + +<p>The following character classifications mimic those declared in the ISO C +header files <code><ctype.h></code> and <code><wctype.h></code>. These functions are +deprecated, because this set of functions was designed with ASCII in mind and +cannot reflect the more diverse reality of the Unicode character set. But +they can be a quick-and-dirty porting aid when migrating from <code>wchar_t</code> +APIs to Unicode strings. +</p> +<dl> +<dt><u>Function:</u> bool <b>uc_is_alnum</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX592"></a> +</dt> +<dd><p>Tests for any character for which <code>uc_is_alpha</code> or <code>uc_is_digit</code> is +true. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_alpha</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX593"></a> +</dt> +<dd><p>Tests for any character for which <code>uc_is_upper</code> or <code>uc_is_lower</code> is +true, or any character that is one of a locale-specific set of characters for +which none of <code>uc_is_cntrl</code>, <code>uc_is_digit</code>, <code>uc_is_punct</code>, or +<code>uc_is_space</code> is true. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_cntrl</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX594"></a> +</dt> +<dd><p>Tests for any control character. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_digit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX595"></a> +</dt> +<dd><p>Tests for any character that corresponds to a decimal-digit character. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_graph</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX596"></a> +</dt> +<dd><p>Tests for any character for which <code>uc_is_print</code> is true and +<code>uc_is_space</code> is false. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_lower</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX597"></a> +</dt> +<dd><p>Tests for any character that corresponds to a lowercase letter or is one +of a locale-specific set of characters for which none of <code>uc_is_cntrl</code>, +<code>uc_is_digit</code>, <code>uc_is_punct</code>, or <code>uc_is_space</code> is true. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_print</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX598"></a> +</dt> +<dd><p>Tests for any printing character. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_punct</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX599"></a> +</dt> +<dd><p>Tests for any printing character that is one of a locale-specific set of +characters for which neither <code>uc_is_space</code> nor <code>uc_is_alnum</code> is true. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_space</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX600"></a> +</dt> +<dd><p>Test for any character that corresponds to a locale-specific set of characters +for which none of <code>uc_is_alnum</code>, <code>uc_is_graph</code>, or <code>uc_is_punct</code> +is true. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_upper</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX601"></a> +</dt> +<dd><p>Tests for any character that corresponds to an uppercase letter or is one +of a locale-specific set of characters for which none of <code>uc_is_cntrl</code>, +<code>uc_is_digit</code>, <code>uc_is_punct</code>, or <code>uc_is_space</code> is true. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_xdigit</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX602"></a> +</dt> +<dd><p>Tests for any character that corresponds to a hexadecimal-digit character. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> bool <b>uc_is_blank</b><i> (ucs4_t <var>uc</var>)</i> +<a name="IDX603"></a> +</dt> +<dd><p>Tests for any character that corresponds to a standard blank character or +a locale-specific set of characters for which <code>uc_is_alnum</code> is false. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC20" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_9.html#SEC37" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_9.html b/doc/libunistring_9.html new file mode 100644 index 00000000..0586e0f2 --- /dev/null +++ b/doc/libunistring_9.html @@ -0,0 +1,141 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: 9. Display width <uniwidth.h></title> + +<meta name="description" content="GNU libunistring: 9. Display width <uniwidth.h>"> +<meta name="keywords" content="GNU libunistring: 9. Display width <uniwidth.h>"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_8.html#SEC20" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_10.html#SEC38" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> + +<hr size="2"> +<a name="uniwidth_002eh"></a> +<a name="SEC37"></a> +<h1 class="chapter"> <a href="libunistring.html#TOC37">9. Display width <code><uniwidth.h></code></a> </h1> + +<p>This include file declares functions that return the display width, measured +in columns, of characters or strings, when output to a device that uses +non-proportional fonts. +</p> +<a name="IDX604"></a> +<p>Note that for some rarely used characters the actual fonts or terminal +emulators can use a different width. There is no mechanism for communicating +the display width of characters across a Unix pseudo-terminal (tty). Also, +there are scripts with complex rendering, like the Indic scripts. For these +scripts, there is no such concept as non-proportional fonts. Therefore +the results of these functions usually work fine on most scripts and on +most characters but can fail to represent the actual display width. +</p> +<p>These functions are locale dependent. The <var>encoding</var> argument identifies +the encoding (e.g. <code>"ISO-8859-2"</code> for Polish). +</p> +<a name="IDX605"></a> +<a name="IDX606"></a> +<a name="IDX607"></a> +<dl> +<dt><u>Function:</u> int <b>uc_width</b><i> (ucs4_t <var>uc</var>, const char *<var>encoding</var>)</i> +<a name="IDX608"></a> +</dt> +<dd><p>Determines and returns the number of column positions required for <var>uc</var>. +Returns -1 if <var>uc</var> is a control character that has an influence on the +column position when output. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_width</b><i> (const uint8_t *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>)</i> +<a name="IDX609"></a> +</dt> +<dt><u>Function:</u> int <b>u16_width</b><i> (const uint16_t *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>)</i> +<a name="IDX610"></a> +</dt> +<dt><u>Function:</u> int <b>u32_width</b><i> (const uint32_t *<var>s</var>, size_t <var>n</var>, const char *<var>encoding</var>)</i> +<a name="IDX611"></a> +</dt> +<dd><p>Determines and returns the number of column positions required for first +<var>n</var> units (or fewer if <var>s</var> ends before this) in <var>s</var>. This +function ignores control characters in the string. +</p></dd></dl> + +<dl> +<dt><u>Function:</u> int <b>u8_strwidth</b><i> (const uint8_t *<var>s</var>, const char *<var>encoding</var>)</i> +<a name="IDX612"></a> +</dt> +<dt><u>Function:</u> int <b>u16_strwidth</b><i> (const uint16_t *<var>s</var>, const char *<var>encoding</var>)</i> +<a name="IDX613"></a> +</dt> +<dt><u>Function:</u> int <b>u32_strwidth</b><i> (const uint32_t *<var>s</var>, const char *<var>encoding</var>)</i> +<a name="IDX614"></a> +</dt> +<dd><p>Determines and returns the number of column positions required for <var>s</var>. +This function ignores control characters in the string. +</p></dd></dl> +<hr size="6"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring_8.html#SEC20" title="Beginning of this chapter or previous chapter"> << </a>]</td> +<td valign="middle" align="left">[<a href="libunistring_10.html#SEC38" title="Next chapter"> >> </a>]</td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left"> </td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_abt.html b/doc/libunistring_abt.html new file mode 100644 index 00000000..47d71bac --- /dev/null +++ b/doc/libunistring_abt.html @@ -0,0 +1,167 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: About This Document</title> + +<meta name="description" content="GNU libunistring: About This Document"> +<meta name="keywords" content="GNU libunistring: About This Document"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<a name="SEC_About"></a> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<h1>About This Document</h1> +<p> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. +</p> +<p> + The buttons in the navigation panels have the following meaning: +</p> +<table border="1"> + <tr> + <th> Button </th> + <th> Name </th> + <th> Go to </th> + <th> From 1.2.3 go to</th> + </tr> + <tr> + <td align="center"> [ < ] </td> + <td align="center">Back</td> + <td>Previous section in reading order</td> + <td>1.2.2</td> + </tr> + <tr> + <td align="center"> [ > ] </td> + <td align="center">Forward</td> + <td>Next section in reading order</td> + <td>1.2.4</td> + </tr> + <tr> + <td align="center"> [ << ] </td> + <td align="center">FastBack</td> + <td>Beginning of this chapter or previous chapter</td> + <td>1</td> + </tr> + <tr> + <td align="center"> [ Up ] </td> + <td align="center">Up</td> + <td>Up section</td> + <td>1.2</td> + </tr> + <tr> + <td align="center"> [ >> ] </td> + <td align="center">FastForward</td> + <td>Next chapter</td> + <td>2</td> + </tr> + <tr> + <td align="center"> [Top] </td> + <td align="center">Top</td> + <td>Cover (top) of document</td> + <td> </td> + </tr> + <tr> + <td align="center"> [Contents] </td> + <td align="center">Contents</td> + <td>Table of contents</td> + <td> </td> + </tr> + <tr> + <td align="center"> [Index] </td> + <td align="center">Index</td> + <td>Index</td> + <td> </td> + </tr> + <tr> + <td align="center"> [ ? ] </td> + <td align="center">About</td> + <td>About (help)</td> + <td> </td> + </tr> +</table> + +<p> + where the <strong> Example </strong> assumes that the current position is at <strong> Subsubsection One-Two-Three </strong> of a document of the following structure: +</p> + +<ul> + <li> 1. Section One + <ul> + <li>1.1 Subsection One-One + <ul> + <li>...</li> + </ul> + </li> + <li>1.2 Subsection One-Two + <ul> + <li>1.2.1 Subsubsection One-Two-One</li> + <li>1.2.2 Subsubsection One-Two-Two</li> + <li>1.2.3 Subsubsection One-Two-Three + <strong><== Current Position </strong></li> + <li>1.2.4 Subsubsection One-Two-Four</li> + </ul> + </li> + <li>1.3 Subsection One-Three + <ul> + <li>...</li> + </ul> + </li> + <li>1.4 Subsection One-Four</li> + </ul> + </li> +</ul> + +<hr size="1"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="libunistring.html#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="libunistring.html#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/libunistring_toc.html b/doc/libunistring_toc.html new file mode 100644 index 00000000..e7e4e51e --- /dev/null +++ b/doc/libunistring_toc.html @@ -0,0 +1,164 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd"> +<html> +<!-- Created on July, 1 2009 by texi2html 1.78a --> +<!-- +Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author) + Karl Berry <karl@freefriends.org> + Olaf Bachmann <obachman@mathematik.uni-kl.de> + and many others. +Maintained by: Many creative people. +Send bugs and suggestions to <texi2html-bug@nongnu.org> + +--> +<head> +<title>GNU libunistring: GNU libunistring</title> + +<meta name="description" content="GNU libunistring: GNU libunistring"> +<meta name="keywords" content="GNU libunistring: GNU libunistring"> +<meta name="resource-type" content="document"> +<meta name="distribution" content="global"> +<meta name="Generator" content="texi2html 1.78a"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<style type="text/css"> +<!-- +a.summary-letter {text-decoration: none} +pre.display {font-family: serif} +pre.format {font-family: serif} +pre.menu-comment {font-family: serif} +pre.menu-preformatted {font-family: serif} +pre.smalldisplay {font-family: serif; font-size: smaller} +pre.smallexample {font-size: smaller} +pre.smallformat {font-family: serif; font-size: smaller} +pre.smalllisp {font-size: smaller} +span.roman {font-family:serif; font-weight:normal;} +span.sansserif {font-family:sans-serif; font-weight:normal;} +ul.toc {list-style: none} +--> +</style> + + +</head> + +<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> + +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<h1 class="settitle">GNU libunistring</h1> +<a name="SEC_Contents"></a> +<h1>Table of Contents</h1> +<div class="contents"> + +<ul class="toc"> + <li><a name="TOC1" href="libunistring_1.html#SEC1">1. Introduction</a> + <ul class="toc"> + <li><a name="TOC2" href="libunistring_1.html#SEC2">1.1 Unicode</a></li> + <li><a name="TOC3" href="libunistring_1.html#SEC3">1.2 Unicode and Internationalization</a></li> + <li><a name="TOC4" href="libunistring_1.html#SEC4">1.3 Locale encodings</a></li> + <li><a name="TOC5" href="libunistring_1.html#SEC5">1.4 Choice of in-memory representation of strings</a></li> + <li><a name="TOC6" href="libunistring_1.html#SEC6">1.5 ‘<samp>char *</samp>’ strings</a></li> + <li><a name="TOC7" href="libunistring_1.html#SEC7">1.6 The <code>wchar_t</code> mess</a></li> + <li><a name="TOC8" href="libunistring_1.html#SEC8">1.7 Unicode strings</a></li> + </ul></li> + <li><a name="TOC9" href="libunistring_2.html#SEC9">2. Conventions</a></li> + <li><a name="TOC10" href="libunistring_3.html#SEC10">3. Elementary types <code><unitypes.h></code></a></li> + <li><a name="TOC11" href="libunistring_4.html#SEC11">4. Elementary Unicode string functions <code><unistr.h></code></a> + <ul class="toc"> + <li><a name="TOC12" href="libunistring_4.html#SEC12">4.1 Elementary string checks</a></li> + <li><a name="TOC13" href="libunistring_4.html#SEC13">4.2 Elementary string conversions</a></li> + <li><a name="TOC14" href="libunistring_4.html#SEC14">4.3 Elementary string functions</a></li> + <li><a name="TOC15" href="libunistring_4.html#SEC15">4.4 Elementary string functions with memory allocation</a></li> + <li><a name="TOC16" href="libunistring_4.html#SEC16">4.5 Elementary string functions on NUL terminated strings</a></li> + </ul></li> + <li><a name="TOC17" href="libunistring_5.html#SEC17">5. Conversions between Unicode and encodings <code><uniconv.h></code></a></li> + <li><a name="TOC18" href="libunistring_6.html#SEC18">6. Output with Unicode strings <code><unistdio.h></code></a></li> + <li><a name="TOC19" href="libunistring_7.html#SEC19">7. Names of Unicode characters <code><uniname.h></code></a></li> + <li><a name="TOC20" href="libunistring_8.html#SEC20">8. Unicode character classification and properties <code><unictype.h></code></a> + <ul class="toc"> + <li><a name="TOC21" href="libunistring_8.html#SEC21">8.1 General category</a> + <ul class="toc"> + <li><a name="TOC22" href="libunistring_8.html#SEC22">8.1.1 The object oriented API for general category</a></li> + <li><a name="TOC23" href="libunistring_8.html#SEC23">8.1.2 The bit mask API for general category</a></li> + </ul></li> + <li><a name="TOC24" href="libunistring_8.html#SEC24">8.2 Canonical combining class</a></li> + <li><a name="TOC25" href="libunistring_8.html#SEC25">8.3 Bidirectional category</a></li> + <li><a name="TOC26" href="libunistring_8.html#SEC26">8.4 Decimal digit value</a></li> + <li><a name="TOC27" href="libunistring_8.html#SEC27">8.5 Digit value</a></li> + <li><a name="TOC28" href="libunistring_8.html#SEC28">8.6 Numeric value</a></li> + <li><a name="TOC29" href="libunistring_8.html#SEC29">8.7 Mirrored character</a></li> + <li><a name="TOC30" href="libunistring_8.html#SEC30">8.8 Properties</a> + <ul class="toc"> + <li><a name="TOC31" href="libunistring_8.html#SEC31">8.8.1 Properties as objects – the object oriented API</a></li> + <li><a name="TOC32" href="libunistring_8.html#SEC32">8.8.2 Properties as functions – the functional API</a></li> + </ul></li> + <li><a name="TOC33" href="libunistring_8.html#SEC33">8.9 Scripts</a></li> + <li><a name="TOC34" href="libunistring_8.html#SEC34">8.10 Blocks</a></li> + <li><a name="TOC35" href="libunistring_8.html#SEC35">8.11 ISO C and Java syntax</a></li> + <li><a name="TOC36" href="libunistring_8.html#SEC36">8.12 Classifications like in ISO C</a></li> + </ul></li> + <li><a name="TOC37" href="libunistring_9.html#SEC37">9. Display width <code><uniwidth.h></code></a></li> + <li><a name="TOC38" href="libunistring_10.html#SEC38">10. Word breaks in strings <code><uniwbrk.h></code></a> + <ul class="toc"> + <li><a name="TOC39" href="libunistring_10.html#SEC39">10.1 Word breaks in a string</a></li> + <li><a name="TOC40" href="libunistring_10.html#SEC40">10.2 Word break property</a></li> + </ul></li> + <li><a name="TOC41" href="libunistring_11.html#SEC41">11. Line breaking <code><unilbrk.h></code></a></li> + <li><a name="TOC42" href="libunistring_12.html#SEC42">12. Normalization forms (composition and decomposition) <code><uninorm.h></code></a> + <ul class="toc"> + <li><a name="TOC43" href="libunistring_12.html#SEC43">12.1 Decomposition of Unicode characters</a></li> + <li><a name="TOC44" href="libunistring_12.html#SEC44">12.2 Composition of Unicode characters</a></li> + <li><a name="TOC45" href="libunistring_12.html#SEC45">12.3 Normalization of strings</a></li> + <li><a name="TOC46" href="libunistring_12.html#SEC46">12.4 Normalizing comparisons</a></li> + <li><a name="TOC47" href="libunistring_12.html#SEC47">12.5 Normalization of streams of Unicode characters</a></li> + </ul></li> + <li><a name="TOC48" href="libunistring_13.html#SEC48">13. Case mappings <code><unicase.h></code></a> + <ul class="toc"> + <li><a name="TOC49" href="libunistring_13.html#SEC49">13.1 Case mappings of characters</a></li> + <li><a name="TOC50" href="libunistring_13.html#SEC50">13.2 Case mappings of strings</a></li> + <li><a name="TOC51" href="libunistring_13.html#SEC51">13.3 Case mappings of substrings</a></li> + <li><a name="TOC52" href="libunistring_13.html#SEC52">13.4 Case insensitive comparison</a></li> + <li><a name="TOC53" href="libunistring_13.html#SEC53">13.5 Case detection</a></li> + </ul></li> + <li><a name="TOC54" href="libunistring_14.html#SEC54">14. Regular expressions <code><uniregex.h></code></a></li> + <li><a name="TOC55" href="libunistring_15.html#SEC55">15. Using the library</a> + <ul class="toc"> + <li><a name="TOC56" href="libunistring_15.html#SEC56">15.1 Installation</a></li> + <li><a name="TOC57" href="libunistring_15.html#SEC57">15.2 Compiler options</a></li> + <li><a name="TOC58" href="libunistring_15.html#SEC58">15.3 Include files</a></li> + <li><a name="TOC59" href="libunistring_15.html#SEC59">15.4 Autoconf macro</a></li> + <li><a name="TOC60" href="libunistring_15.html#SEC60">15.5 Reporting problems</a></li> + </ul></li> + <li><a name="TOC61" href="libunistring_16.html#SEC61">16. More advanced functionality</a></li> + <li><a name="TOC62" href="libunistring_17.html#SEC62">A. Licenses</a> + <ul class="toc"> + <li><a name="TOC63" href="libunistring_17.html#SEC63">A.1 GNU GENERAL PUBLIC LICENSE</a></li> + <li><a name="TOC64" href="libunistring_17.html#SEC68">A.2 GNU LESSER GENERAL PUBLIC LICENSE</a></li> + <li><a name="TOC65" href="libunistring_17.html#SEC69">A.3 GNU Free Documentation License</a></li> + </ul></li> + <li><a name="TOC66" href="libunistring_18.html#SEC71">Index</a></li> +</ul> +</div> + +<a name="Top"></a> +<a name="SEC_Top"></a> + + +<hr size="1"> +<table cellpadding="1" cellspacing="1" border="0"> +<tr><td valign="middle" align="left">[<a href="#SEC_Top" title="Cover (top) of document">Top</a>]</td> +<td valign="middle" align="left">[<a href="#SEC_Contents" title="Table of contents">Contents</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_18.html#SEC71" title="Index">Index</a>]</td> +<td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td> +</tr></table> +<p> + <font size="-1"> + This document was generated by <em>Bruno Haible</em> on <em>July, 1 2009</em> using <a href="http://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>. + </font> + <br> + +</p> +</body> +</html> diff --git a/doc/stamp-vti b/doc/stamp-vti new file mode 100644 index 00000000..ee4ba906 --- /dev/null +++ b/doc/stamp-vti @@ -0,0 +1,4 @@ +@set UPDATED 29 June 2009 +@set UPDATED-MONTH June 2009 +@set EDITION 0.9.1 +@set VERSION 0.9.1 diff --git a/doc/unicase.texi b/doc/unicase.texi new file mode 100644 index 00000000..14b46be0 --- /dev/null +++ b/doc/unicase.texi @@ -0,0 +1,364 @@ +@node unicase.h +@chapter Case mappings @code{<unicase.h>} + +This include file defines functions for case mapping for Unicode strings and +case insensitive comparison of Unicode strings and C strings. + +These string functions fix the problems that were mentioned in +@ref{char * strings}, namely, they handle the Croatian +@sc{LETTER DZ WITH CARON}, the German @sc{LATIN SMALL LETTER SHARP S}, the +Greek sigma and the Lithuanian i correctly. + +@menu +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: +@end menu + +@node Case mappings of characters +@section Case mappings of characters + +@cindex Unicode character, case mappings +The following functions implement case mappings on Unicode characters --- +for those cases only where the result of the mapping is a again a single +Unicode character. + +These mappings are locale and context independent. + +@cartouche +@strong{WARNING!} These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that treat an +entire string at once and are language aware. +@end cartouche + +@deftypefun ucs4_t uc_toupper (ucs4_t @var{uc}) +Returns the uppercase mapping of the Unicode character @var{uc}. +@end deftypefun + +@deftypefun ucs4_t uc_tolower (ucs4_t @var{uc}) +Returns the lowercase mapping of the Unicode character @var{uc}. +@end deftypefun + +@deftypefun ucs4_t uc_totitle (ucs4_t @var{uc}) +Returns the titlecase mapping of the Unicode character @var{uc}. + +The titlecase mapping of a character is to be used when the character should +look like upper case and the following characters are lower cased. + +For most characters, this is the same as the uppercase mapping. There are +only few characters where the title case variant and the uuper case variant +are different. These characters occur in the Latin writing of the Croatian, +Bosnian, and Serbian languages. + +@c Normally we would use .33 space for each column, but this is too much in +@c TeX mode, see +@c <http://lists.gnu.org/archive/html/bug-texinfo/2009-05/msg00016.html>. +@multitable @columnfractions .31 .31 .31 +@headitem Lower case @tab Title case @tab Upper case +@item LATIN SMALL LETTER LJ + @tab LATIN CAPITAL LETTER L WITH SMALL LETTER J + @tab LATIN CAPITAL LETTER LJ +@item LATIN SMALL LETTER NJ + @tab LATIN CAPITAL LETTER N WITH SMALL LETTER J + @tab LATIN CAPITAL LETTER NJ +@item LATIN SMALL LETTER DZ + @tab LATIN CAPITAL LETTER D WITH SMALL LETTER Z + @tab LATIN CAPITAL LETTER DZ +@item LATIN SMALL LETTER DZ WITH CARON + @tab LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + @tab LATIN CAPITAL LETTER DZ WITH CARON +@end multitable +@end deftypefun + +@node Case mappings of strings +@section Case mappings of strings + +@cindex case mappings +@cindex uppercasing +@cindex lowercasing +@cindex titlecasing +Case mapping should always be performed on entire strings, not on individual +characters. The functions in this sections do so. + +These functions allow to apply a normalization after the case mapping. The +reason is that if you want to treat @samp{@"{a}} and @samp{@"{A}} the same, +you most often also want to treat the composed and decomposed forms of such +a character, U+00C4 @sc{LATIN CAPITAL LETTER A WITH DIAERESIS} and +U+0041 @sc{LATIN CAPITAL LETTER A} U+0308 @sc{COMBINING DIAERESIS} the same. +The @var{nf} argument designates the normalization. + +@cindex locale language +These functions are locale dependent. The @var{iso639_language} argument +identifies the language (e.g. @code{"tr"} for Turkish). NULL means to use +locale independent case mappings. + +@deftypefun {const char *} uc_locale_language () +Returns the ISO 639 language code of the current locale. +Returns @code{""} if it is unknown, or in the "C" locale. +@end deftypefun + +@deftypefun {uint8_t *} u8_toupper (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_toupper (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_toupper (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the uppercase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_tolower (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_tolower (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_tolower (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the lowercase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_totitle (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_totitle (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_totitle (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the titlecase mapping of a string. + +Mapping to title case means that, in each word, the first cased character +is being mapped to title case and the remaining characters of the word +are being mapped to lower case. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@node Case mappings of substrings +@section Case mappings of substrings + +Case mapping of a substring cannot simply be performed by extracting the +substring and then applying the case mapping function to it. This does not +work because case mapping requires some information about the surrounding +characters. The following functions allow to apply case mappings to +substrings of a given string, while taking into account the characters that +precede it (the ``prefix'') and the characters that follow it (the ``suffix''). + +@deftp Type casing_prefix_context_t +This data type denotes the case-mapping context that is given by a prefix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +@end deftp + +@deftypevr Constant casing_prefix_context_t unicase_empty_prefix_context +This constant is the case-mapping context that corresponds to an empty prefix +string. +@end deftypevr + +The following functions return @code{casing_prefix_context_t} objects: + +@deftypefun casing_prefix_context_t u8_casing_prefix_context (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx casing_prefix_context_t u16_casing_prefix_context (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx casing_prefix_context_t u32_casing_prefix_context (const uint32_t *@var{s}, size_t @var{n}) +Returns the case-mapping context of a given prefix string. +@end deftypefun + +@deftypefun casing_prefix_context_t u8_casing_prefixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +@deftypefunx casing_prefix_context_t u16_casing_prefixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +@deftypefunx casing_prefix_context_t u32_casing_prefixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +Returns the case-mapping context of the prefix concat(@var{a}, @var{s}), +given the case-mapping context of the prefix @var{a}. +@end deftypefun + +@deftp Type casing_suffix_context_t +This data type denotes the case-mapping context that is given by a suffix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +@end deftp + +@deftypevr Constant casing_suffix_context_t unicase_empty_suffix_context +This constant is the case-mapping context that corresponds to an empty suffix +string. +@end deftypevr + +The following functions return @code{casing_suffix_context_t} objects: + +@deftypefun casing_suffix_context_t u8_casing_suffix_context (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx casing_suffix_context_t u16_casing_suffix_context (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx casing_suffix_context_t u32_casing_suffix_context (const uint32_t *@var{s}, size_t @var{n}) +Returns the case-mapping context of a given suffix string. +@end deftypefun + +@deftypefun casing_suffix_context_t u8_casing_suffixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +@deftypefunx casing_suffix_context_t u16_casing_suffixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +@deftypefunx casing_suffix_context_t u32_casing_suffixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +Returns the case-mapping context of the suffix concat(@var{s}, @var{a}), +given the case-mapping context of the suffix @var{a}. +@end deftypefun + +The following functions perform a case mapping, considering the +prefix context and the suffix context. + +@deftypefun {uint8_t *} u8_ct_toupper (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_toupper (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_toupper (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the uppercase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_tolower (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_tolower (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_tolower (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the lowercase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_totitle (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_totitle (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_totitle (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the titlecase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +For example, to uppercase the UTF-8 substring between @code{s + start_index} +and @code{s + end_index} of a string that extends from @code{s} to +@code{s + u8_strlen (s)}, you can use the statements + +@smallexample +size_t result_length; +uint8_t result = + u8_ct_toupper (s + start_index, end_index - start_index, + u8_casing_prefix_context (s, start_index), + u8_casing_suffix_context (s + end_index, + u8_strlen (s) - end_index), + iso639_language, NULL, NULL, &result_length); +@end smallexample + +@node Case insensitive comparison +@section Case insensitive comparison + +@cindex comparing, ignoring case +@cindex comparing, ignoring normalization and case +The following functions implement comparison that ignores differences in case +and normalization. + +@deftypefun {uint8_t *} u8_casefold (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_casefold (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_casefold (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the case folded string. + +Comparing @code{u8_casefold (@var{s1})} and @code{u8_casefold (@var{s2})} +with the @code{u8_cmp2} function is equivalent to comparing @var{s1} and +@var{s2} with @code{u8_casecmp}. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_casefold (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_casefold (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_casefold (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the case folded string. The case folding takes into account the +case mapping contexts of the prefix and suffix strings. +@end deftypefun + +@deftypefun int u8_casecmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_casecmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_casecmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int ulc_casecmp (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in case and normalization. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@cindex comparing, ignoring case, with collation rules +@cindex comparing, with collation rules, ignoring case +@cindex comparing, ignoring normalization and case, with collation rules +@cindex comparing, with collation rules, ignoring normalization and case +The following functions additionally take into account the sorting rules of the +current locale. + +@deftypefun {char *} u8_casexfrm (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_casexfrm (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_casexfrm (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} ulc_casexfrm (const char *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts the string @var{s} of length @var{n} to a NUL-terminated byte +sequence, in such a way that comparing @code{u8_casexfrm (@var{s1})} and +@code{u8_casexfrm (@var{s2})} with the gnulib function @code{memcmp2} is +equivalent to comparing @var{s1} and @var{s2} with @code{u8_casecoll}. + +@var{nf} must be either @code{UNINORM_NFC}, @code{UNINORM_NFKC}, or NULL for +no normalization. +@end deftypefun + +@deftypefun int u8_casecoll (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_casecoll (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_casecoll (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int ulc_casecoll (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in case and normalization, +using the collation rules of the current locale. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. +It can also be NULL, for no normalization. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@node Case detection +@section Case detection + +@cindex case detection +@cindex detecting case +The following functions determine whether a Unicode string is entirely in +upper case. or entirely in lower case, or entirely in title case, or already +case-folded. + +@deftypefun int u8_is_uppercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_uppercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_uppercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to upper case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_lowercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_lowercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_lowercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to lower case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_titlecase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_titlecase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_titlecase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to title case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_casefolded (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_casefolded (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_casefolded (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if applying case folding to NFD(@var{S}) is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +The following functions determine whether case mappings have any effect on a +Unicode string. + +@deftypefun int u8_is_cased (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_cased (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_cased (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if case matters for @var{s}, that is, if +mapping NFD(@var{s}) to either upper case or lower case or title case is not +a no-op. Set @code{*@var{resultp}} to false if NFD(@var{s}) maps to itself +under the upper case mapping, under the lower case mapping, and under the title +case mapping; in other words, when NFD(@var{s}) consists entirely of caseless +characters. Upon failure, returns -1 with @code{errno} set. +@end deftypefun diff --git a/doc/uniconv.texi b/doc/uniconv.texi new file mode 100644 index 00000000..07cfa1be --- /dev/null +++ b/doc/uniconv.texi @@ -0,0 +1,157 @@ +@node uniconv.h +@chapter Conversions between Unicode and encodings @code{<uniconv.h>} + +This include file declares functions for converting between Unicode strings +and @code{char *} strings in locale encoding or in other specified encodings. + +@cindex locale encoding +The following function returns the locale encoding. + +@deftypefun {const char *} locale_charset () +Determines the current locale's character encoding, and canonicalizes it +into one of the canonical names listed in @file{config.charset}. +If the canonical name cannot be determined, the result is a non-canonical +name. + +The result must not be freed; it is statically allocated. + +The result of this function can be used as an argument to the @code{iconv_open} +function in GNU libc, in GNU libiconv, or in the gnulib provided wrapper +around the native @code{iconv_open} function. It may not work as an argument +to the native @code{iconv_open} function directly. +@end deftypefun + +The handling of unconvertible characters during the conversions can be +parametrized through the following enumeration type: + +@deftp Type {enum iconv_ilseq_handler} +This type specifies how unconvertible characters in the input are handled. +@end deftp + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_error +This handler causes the function to return with @code{errno} set to +@code{EILSEQ}. +@end deftypevr + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_question_mark +This handler produces one question mark @samp{?} per unconvertible character. +@end deftypevr + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_escape_sequence +This handler produces an escape sequence @code{\u@var{xxxx}} or +@code{\U@var{xxxxxxxx}} for each unconvertible character. +@end deftypevr + +@cindex converting +The following functions convert between strings in a specified encoding and +Unicode strings. + +@deftypefun {uint8_t *} u8_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an entire string, possibly including NUL bytes, from one encoding +to UTF-8 encoding. + +Converts a memory region given in encoding @var{fromcode}. @var{fromcode} is +as for the @code{iconv_open} function. + +The input is in the memory region between @var{src} (inclusive) and +@code{@var{src} + @var{srclen}} (exclusive). + +If @var{offsets} is not NULL, it should point to an array of @var{srclen} +integers; this array is filled with offsets into the result, i.e@. the +character starting at @code{@var{src}[i]} corresponds to the character starting +at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to +@code{(size_t)(-1)}. + +@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch +buffer and its size, or @code{@var{resultbuf}} can be NULL. + +May erase the contents of the memory at @code{@var{resultbuf}}. + +If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in @code{*@var{lengthp}}. The resulting string is +@code{@var{resultbuf}} if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. + +In case of error: NULL is returned and @code{errno} is set. +Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint8_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint16_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint32_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts an entire Unicode string, possibly including NUL units, from UTF-8 +encoding to a given encoding. + +Converts a memory region to encoding @var{tocode}. @var{tocode} is as for +the @code{iconv_open} function. + +The input is in the memory region between @var{src} (inclusive) and +@code{@var{src} + @var{srclen}} (exclusive). + +If @var{offsets} is not NULL, it should point to an array of @var{srclen} +integers; this array is filled with offsets into the result, i.e@. the +character starting at @code{@var{src}[i]} corresponds to the character starting +at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to +@code{(size_t)(-1)}. + +@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch +buffer and its size, or @code{@var{resultbuf}} can be NULL. + +May erase the contents of the memory at @code{@var{resultbuf}}. + +If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in @code{*@var{lengthp}}. The resulting string is +@code{@var{resultbuf}} if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. + +In case of error: NULL is returned and @code{errno} is set. +Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +The following functions convert between NUL terminated strings in a specified +encoding and NUL terminated Unicode strings. + +@deftypefun {uint8_t *} u8_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {uint16_t *} u16_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {uint32_t *} u32_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +Converts a NUL terminated string from a given encoding. + +The result is @code{malloc} allocated, or NULL (with @var{errno} set) in case of error. + +Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_strconv_to_encoding (const uint8_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {char *} u16_strconv_to_encoding (const uint16_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {char *} u32_strconv_to_encoding (const uint32_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +Converts a NUL terminated string to a given encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +The following functions are shorthands that convert between NUL terminated +strings in locale encoding and NUL terminated Unicode strings. + +@deftypefun {uint8_t *} u8_strconv_from_locale (const char *@var{string}) +@deftypefunx {uint16_t *} u16_strconv_from_locale (const char *@var{string}) +@deftypefunx {uint32_t *} u32_strconv_from_locale (const char *@var{string}) +Converts a NUL terminated string from the locale encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_strconv_to_locale (const uint8_t *@var{string}) +@deftypefunx {char *} u16_strconv_to_locale (const uint16_t *@var{string}) +@deftypefunx {char *} u32_strconv_to_locale (const uint32_t *@var{string}) +Converts a NUL terminated string to the locale encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{ENOMEM}. +@end deftypefun diff --git a/doc/unictype.texi b/doc/unictype.texi new file mode 100644 index 00000000..129159c7 --- /dev/null +++ b/doc/unictype.texi @@ -0,0 +1,1145 @@ +@node unictype.h +@chapter Unicode character classification and properties @code{<unictype.h>} + +This include file declares functions that classify Unicode characters +and that test whether Unicode characters have specific properties. + +The classification assigns a ``general category'' to every Unicode +character. This is similar to the classification provided by ISO C in +@code{<wctype.h>}. + +Properties are the data that guides various text processing algorithms +in the presence of specific Unicode characters. + +@menu +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: +@end menu + +@node General category +@section General category + +@cindex general category +@cindex Unicode character, general category +@cindex Unicode character, classification +Every Unicode character or code point has a @emph{general category} assigned +to it. This classification is important for most algorithms that work on +Unicode text. + +The GNU libunistring library provides two kinds of API for working with +general categories. The object oriented API uses a variable to denote +every predefined general category value or combinations thereof. The +low-level API uses a bit mask instead. The advantage of the object oriented +API is that if only a few predefined general category values are used, +the data tables are relatively small. When you combine general category +values (using @code{uc_general_category_or}, @code{uc_general_category_and}, +or @code{uc_general_category_and_not}), or when you use the low level +bit masks, a big table is used thats holds the complete general category +information for all Unicode characters. + +@menu +* Object oriented API:: +* Bit mask API:: +@end menu + +@node Object oriented API +@subsection The object oriented API for general category + +@deftp Type uc_general_category_t +This data type denotes a general category value. It is an immediate type that +can be copied by simple assignment, without involving memory allocation. It is +not an array type. +@end deftp + +The following are the predefined general category value. Additional general +categories may be added in the future. + +@deftypevr Constant uc_general_category_t UC_CATEGORY_L +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lu +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Ll +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lt +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lm +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lo +@deftypevrx Constant uc_general_category_t UC_CATEGORY_M +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Mn +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Mc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Me +@deftypevrx Constant uc_general_category_t UC_CATEGORY_N +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Nd +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Nl +@deftypevrx Constant uc_general_category_t UC_CATEGORY_No +@deftypevrx Constant uc_general_category_t UC_CATEGORY_P +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pd +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Ps +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pe +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pi +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pf +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Po +@deftypevrx Constant uc_general_category_t UC_CATEGORY_S +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Sm +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Sc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Sk +@deftypevrx Constant uc_general_category_t UC_CATEGORY_So +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Z +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Zs +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Zl +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Zp +@deftypevrx Constant uc_general_category_t UC_CATEGORY_C +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cf +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cs +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Co +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cn +@end deftypevr + +The following are alias names for predefined General category values. + +@deftypevr Macro uc_general_category_t UC_LETTER +This is another name for @code{UC_CATEGORY_L}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_UPPERCASE_LETTER +This is another name for @code{UC_CATEGORY_Lu}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_LOWERCASE_LETTER +This is another name for @code{UC_CATEGORY_Ll}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_TITLECASE_LETTER +This is another name for @code{UC_CATEGORY_Lt}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MODIFIER_LETTER +This is another name for @code{UC_CATEGORY_Lm}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_LETTER +This is another name for @code{UC_CATEGORY_Lo}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MARK +This is another name for @code{UC_CATEGORY_M}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_NON_SPACING_MARK +This is another name for @code{UC_CATEGORY_Mn}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_COMBINING_SPACING_MARK +This is another name for @code{UC_CATEGORY_Mc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_ENCLOSING_MARK +This is another name for @code{UC_CATEGORY_Me}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_NUMBER +This is another name for @code{UC_CATEGORY_N}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_DECIMAL_DIGIT_NUMBER +This is another name for @code{UC_CATEGORY_Nd}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_LETTER_NUMBER +This is another name for @code{UC_CATEGORY_Nl}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_NUMBER +This is another name for @code{UC_CATEGORY_No}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_PUNCTUATION +This is another name for @code{UC_CATEGORY_P}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CONNECTOR_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_DASH_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pd}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OPEN_PUNCTUATION +This is another name for @code{UC_CATEGORY_Ps} (``start punctuation''). +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CLOSE_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pe} (``end punctuation''). +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_INITIAL_QUOTE_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pi}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_FINAL_QUOTE_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pf}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_PUNCTUATION +This is another name for @code{UC_CATEGORY_Po}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SYMBOL +This is another name for @code{UC_CATEGORY_S}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MATH_SYMBOL +This is another name for @code{UC_CATEGORY_Sm}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CURRENCY_SYMBOL +This is another name for @code{UC_CATEGORY_Sc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MODIFIER_SYMBOL +This is another name for @code{UC_CATEGORY_Sk}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_SYMBOL +This is another name for @code{UC_CATEGORY_So}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SEPARATOR +This is another name for @code{UC_CATEGORY_Z}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SPACE_SEPARATOR +This is another name for @code{UC_CATEGORY_Zs}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_LINE_SEPARATOR +This is another name for @code{UC_CATEGORY_Zl}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_PARAGRAPH_SEPARATOR +This is another name for @code{UC_CATEGORY_Zp}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER +This is another name for @code{UC_CATEGORY_C}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CONTROL +This is another name for @code{UC_CATEGORY_Cc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_FORMAT +This is another name for @code{UC_CATEGORY_Cf}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SURROGATE +This is another name for @code{UC_CATEGORY_Cs}. All code points in this +category are invalid characters. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_PRIVATE_USE +This is another name for @code{UC_CATEGORY_Co}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_UNASSIGNED +This is another name for @code{UC_CATEGORY_Cn}. Some code points in this +category are invalid characters. +@end deftypevr + +The following functions combine general categories, like in a boolean algebra, +except that there is no @samp{not} operation. + +@deftypefun uc_general_category_t uc_general_category_or (uc_general_category_t @var{category1}, uc_general_category_t @var{category2}) +Returns the union of two general categories. +This corresponds to the unions of the two sets of characters. +@end deftypefun + +@deftypefun uc_general_category_t uc_general_category_and (uc_general_category_t @var{category1}, uc_general_category_t @var{category2}) +Returns the intersection of two general categories as bit masks. +This @emph{does not} correspond to the intersection of the two sets of +characters. +@c Really?? +@end deftypefun + +@deftypefun uc_general_category_t uc_general_category_and_not (uc_general_category_t @var{category1}, uc_general_category_t @var{category2}) +Returns the intersection of a general category with the complement of a +second general category, as bit masks. +This @emph{does not} correspond to the intersection with complement, when +viewing the categories as sets of characters. +@c Really?? +@end deftypefun + +The following functions associate general categories with their name. + +@deftypefun {const char *} uc_general_category_name (uc_general_category_t @var{category}) +Returns the name of a general category. +Returns NULL if the general category corresponds to a bit mask that does not +have a name. +@end deftypefun + +@deftypefun uc_general_category_t uc_general_category_byname (const char *@var{category_name}) +Returns the general category given by name, e.g@. @code{"Lu"}. +@end deftypefun + +The following functions view general categories as sets of Unicode characters. + +@deftypefun uc_general_category_t uc_general_category (ucs4_t @var{uc}) +Returns the general category of a Unicode character. + +This function uses a big table. +@end deftypefun + +@deftypefun bool uc_is_general_category (ucs4_t @var{uc}, uc_general_category_t @var{category}) +Tests whether a Unicode character belongs to a given category. +The @var{category} argument can be a predefined general category or the +combination of several predefined general categories. +@end deftypefun + +@node Bit mask API +@subsection The bit mask API for general category + +The following are the predefined general category value as bit masks. +Additional general categories may be added in the future. + +@deftypevr Macro uint32_t UC_CATEGORY_MASK_L +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lu +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Ll +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lt +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lm +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lo +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_M +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Mn +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Mc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Me +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_N +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Nd +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Nl +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_No +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_P +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pd +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Ps +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pe +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pi +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pf +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Po +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_S +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Sm +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Sc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Sk +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_So +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Z +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Zs +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Zl +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Zp +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_C +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cf +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cs +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Co +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cn +@end deftypevr + +The following function views general categories as sets of Unicode characters. + +@deftypefun bool uc_is_general_category_withtable (ucs4_t @var{uc}, uint32_t @var{bitmask}) +Tests whether a Unicode character belongs to a given category. +The @var{bitmask} argument can be a predefined general category bitmask or the +combination of several predefined general category bitmasks. + +This function uses a big table comprising all general categories. +@end deftypefun + +@node Canonical combining class +@section Canonical combining class + +@cindex canonical combining class +@cindex Unicode character, canonical combining class +Every Unicode character or code point has a @emph{canonical combining class} +assigned to it. + +What is the meaning of the canonical combining class? Essentially, it +indicates the priority with which a combining character is attached to its +base character. The characters for which the canonical combining class is 0 +are the base characters, and the characters for which it is greater than 0 are +the combining characters. Combining characters are rendered +near/attached/around their base character, and combining characters with small +combining classes are attached "first" or "closer" to the base character. + +The canonical combining class of a character is a number in the range +0..255. The possible values are described in the Unicode Character Database +@texnl{}@url{http://www.unicode.org/Public/UNIDATA/UCD.html}. The list here is +not definitive; more values can be added in future versions. + +@deftypevr Constant int UC_CCC_NR +The canonical combining class value for ``Not Reordered'' characters. +The value is 0. +@end deftypevr + +@deftypevr Constant int UC_CCC_OV +The canonical combining class value for ``Overlay'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_NK +The canonical combining class value for ``Nukta'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_KV +The canonical combining class value for ``Kana Voicing'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_VR +The canonical combining class value for ``Virama'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_ATBL +The canonical combining class value for ``Attached Below Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_ATB +The canonical combining class value for ``Attached Below'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_ATAR +The canonical combining class value for ``Attached Above Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_BL +The canonical combining class value for ``Below Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_B +The canonical combining class value for ``Below'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_BR +The canonical combining class value for ``Below Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_L +The canonical combining class value for ``Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_R +The canonical combining class value for ``Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_AL +The canonical combining class value for ``Above Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_A +The canonical combining class value for ``Above'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_AR +The canonical combining class value for ``Above Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_DB +The canonical combining class value for ``Double Below'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_DA +The canonical combining class value for ``Double Above'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_IS +The canonical combining class value for ``Iota Subscript'' characters. +@end deftypevr + +The following function looks up the canonical combining class of a character. + +@deftypefun int uc_combining_class (ucs4_t @var{uc}) +Returns the canonical combining class of a Unicode character. +@end deftypefun + +@node Bidirectional category +@section Bidirectional category + +@cindex bidirectional category +@cindex Unicode character, bidirectional category +Every Unicode character or code point has a @emph{bidirectional category} +assigned to it. + +The bidirectional category guides the bidirectional algorithm@texnl{} +(@url{http://www.unicode.org/reports/tr9/}). The possible values are +the following. + +@deftypevr Constant int UC_BIDI_L +The bidirectional category for `Left-to-Right`'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_LRE +The bidirectional category for ``Left-to-Right Embedding'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_LRO +The bidirectional category for ``Left-to-Right Override'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_R +The bidirectional category for ``Right-to-Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_AL +The bidirectional category for ``Right-to-Left Arabic'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_RLE +The bidirectional category for ``Right-to-Left Embedding'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_RLO +The bidirectional category for ``Right-to-Left Override'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_PDF +The bidirectional category for ``Pop Directional Format'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_EN +The bidirectional category for ``European Number'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_ES +The bidirectional category for ``European Number Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_ET +The bidirectional category for ``European Number Terminator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_AN +The bidirectional category for ``Arabic Number'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_CS +The bidirectional category for ``Common Number Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_NSM +The bidirectional category for ``Non-Spacing Mark'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_BN +The bidirectional category for ``Boundary Neutral'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_B +The bidirectional category for ``Paragraph Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_S +The bidirectional category for ``Segment Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_WS +The bidirectional category for ``Whitespace'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_ON +The bidirectional category for ``Other Neutral'' characters. +@end deftypevr + +The following functions implement the association between a bidirectional +category and its name. + +@deftypefun {const char *} uc_bidi_category_name (int @var{category}) +Returns the name of a bidirectional category. +@end deftypefun + +@deftypefun int uc_bidi_category_byname (const char *@var{category_name}) +Returns the bidirectional category given by name, e.g@. @code{"LRE"}. +@end deftypefun + +The following functions view bidirectional categories as sets of Unicode +characters. + +@deftypefun int uc_bidi_category (ucs4_t @var{uc}) +Returns the bidirectional category of a Unicode character. +@end deftypefun + +@deftypefun bool uc_is_bidi_category (ucs4_t @var{uc}, int @var{category}) +Tests whether a Unicode character belongs to a given bidirectional category. +@end deftypefun + +@node Decimal digit value +@section Decimal digit value + +@cindex value, of Unicode character +@cindex Unicode character, value +Decimal digits (like the digits from @samp{0} to @samp{9}) exist in many +scripts. The following function converts a decimal digit character to its +numerical value. + +@deftypefun int uc_decimal_value (ucs4_t @var{uc}) +Returns the decimal digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a decimal digit. +@end deftypefun + +@node Digit value +@section Digit value + +@cindex value, of Unicode character +@cindex Unicode character, value +Digit characters are like decimal digit characters, possibly in special forms, +like as superscript, subscript, or circled. The following function converts a +digit character to its numerical value. + +@deftypefun int uc_digit_value (ucs4_t @var{uc}) +Returns the digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a digit. +@end deftypefun + +@node Numeric value +@section Numeric value + +@cindex value, of Unicode character +@cindex Unicode character, value +There are also characters that represent numbers without a digit system, like +the Roman numerals, and fractional numbers, like 1/4 or 3/4. + +The following type represents the numeric value of a Unicode character. +@deftp Type uc_fraction_t +This is a structure type with the following fields: +@smallexample +int numerator; +int denominator; +@end smallexample +An integer @var{n} is represented by @code{numerator = @var{n}}, +@code{denominator = 1}. +@end deftp + +The following function converts a number character to its numerical value. + +@deftypefun uc_fraction_t uc_numeric_value (ucs4_t @var{uc}) +Returns the numeric value of a Unicode character. +The return value is a fraction, or the pseudo-fraction @code{@{ 0, 0 @}} for +characters that do not represent a number. +@end deftypefun + +@node Mirrored character +@section Mirrored character + +@cindex mirroring, of Unicode character +@cindex Unicode character, mirroring +Character mirroring is used to associate the closing parenthesis character +to the opening parenthesis character, the closing brace character with the +opening brace character, and so on. + +The following function looks up the mirrored character of a Unicode character. + +@deftypefun bool uc_mirror_char (ucs4_t @var{uc}, ucs4_t *@var{puc}) +Stores the mirrored character of a Unicode character @var{uc} in +@code{*@var{puc}} and returns @code{true}, if it exists. Otherwise it +stores @var{uc} unmodified in @code{*@var{puc}} and returns @code{false}. +@end deftypefun + +@node Properties +@section Properties + +@cindex properties, of Unicode character +@cindex Unicode character, properties +This section defines boolean properties of Unicode characters. This +means, a character either has the given property or does not have it. +In other words, the property can be viewed as a subset of the set of +Unicode characters. + +The GNU libunistring library provides two kinds of API for working with +properties. The object oriented API uses a type @code{uc_property_t} +to designate a property. In the function-based API, which is a bit more +low level, a property is merely a function. + +@menu +* Properties as objects:: +* Properties as functions:: +@end menu + +@node Properties as objects +@subsection Properties as objects -- the object oriented API + +The following type designates a property on Unicode characters. + +@deftp Type uc_property_t +This data type denotes a boolean property on Unicode characters. It is an +immediate type that can be copied by simple assignment, without involving +memory allocation. It is not an array type. +@end deftp + +Many Unicode properties are predefined. + +The following are general properties. + +@deftypevr Constant uc_property_t UC_PROPERTY_WHITE_SPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_ALPHABETIC +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_ALPHABETIC +@deftypevrx Constant uc_property_t UC_PROPERTY_NOT_A_CHARACTER +@deftypevrx Constant uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT +@deftypevrx Constant uc_property_t UC_PROPERTY_DEPRECATED +@deftypevrx Constant uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION +@deftypevrx Constant uc_property_t UC_PROPERTY_VARIATION_SELECTOR +@deftypevrx Constant uc_property_t UC_PROPERTY_PRIVATE_USE +@deftypevrx Constant uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE +@end deftypevr + +The following properties are related to case folding. + +@deftypevr Constant uc_property_t UC_PROPERTY_UPPERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_UPPERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_LOWERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_LOWERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_TITLECASE +@deftypevrx Constant uc_property_t UC_PROPERTY_SOFT_DOTTED +@end deftypevr + +The following properties are related to identifiers. + +@deftypevr Constant uc_property_t UC_PROPERTY_ID_START +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_ID_START +@deftypevrx Constant uc_property_t UC_PROPERTY_ID_CONTINUE +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE +@deftypevrx Constant uc_property_t UC_PROPERTY_XID_START +@deftypevrx Constant uc_property_t UC_PROPERTY_XID_CONTINUE +@deftypevrx Constant uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_PATTERN_SYNTAX +@end deftypevr + +The following properties have an influence on shaping and rendering. + +@deftypevr Constant uc_property_t UC_PROPERTY_JOIN_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_GRAPHEME_BASE +@deftypevrx Constant uc_property_t UC_PROPERTY_GRAPHEME_EXTEND +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND +@deftypevrx Constant uc_property_t UC_PROPERTY_GRAPHEME_LINK +@end deftypevr + +The following properties relate to bidirectional reordering. + +@deftypevr Constant uc_property_t UC_PROPERTY_BIDI_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_WHITESPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_PDF +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL +@end deftypevr + +The following properties deal with number representations. + +@deftypevr Constant uc_property_t UC_PROPERTY_HEX_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT +@end deftypevr + +The following properties deal with CJK. + +@deftypevr Constant uc_property_t UC_PROPERTY_IDEOGRAPHIC +@deftypevrx Constant uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH +@deftypevrx Constant uc_property_t UC_PROPERTY_RADICAL +@deftypevrx Constant uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR +@end deftypevr + +Other miscellaneous properties are: + +@deftypevr Constant uc_property_t UC_PROPERTY_ZERO_WIDTH +@deftypevrx Constant uc_property_t UC_PROPERTY_SPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_NON_BREAK +@deftypevrx Constant uc_property_t UC_PROPERTY_ISO_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_FORMAT_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_DASH +@deftypevrx Constant uc_property_t UC_PROPERTY_HYPHEN +@deftypevrx Constant uc_property_t UC_PROPERTY_PUNCTUATION +@deftypevrx Constant uc_property_t UC_PROPERTY_LINE_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_QUOTATION_MARK +@deftypevrx Constant uc_property_t UC_PROPERTY_SENTENCE_TERMINAL +@deftypevrx Constant uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION +@deftypevrx Constant uc_property_t UC_PROPERTY_CURRENCY_SYMBOL +@deftypevrx Constant uc_property_t UC_PROPERTY_MATH +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_MATH +@deftypevrx Constant uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION +@deftypevrx Constant uc_property_t UC_PROPERTY_LEFT_OF_PAIR +@deftypevrx Constant uc_property_t UC_PROPERTY_COMBINING +@deftypevrx Constant uc_property_t UC_PROPERTY_COMPOSITE +@deftypevrx Constant uc_property_t UC_PROPERTY_DECIMAL_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_NUMERIC +@deftypevrx Constant uc_property_t UC_PROPERTY_DIACRITIC +@deftypevrx Constant uc_property_t UC_PROPERTY_EXTENDER +@deftypevrx Constant uc_property_t UC_PROPERTY_IGNORABLE_CONTROL +@end deftypevr + +The following function looks up a property by its name. + +@deftypefun uc_property_t uc_property_byname (const char *@var{property_name}) +Returns the property given by name, e.g. @code{"White space"}. If a property +with the given name exists, the result will satisfy the +@code{uc_property_is_valid} predicate. Otherwise the result will not satisfy +this predicate and must not be passed to functions that expect an +@code{uc_property_t} argument. + +This function references a big table of all predefined properties. Its use +can significantly increase the size of your application. +@end deftypefun + +@deftypefun bool uc_property_is_valid (uc_property_t property) +Returns @code{true} when the given property is valid, or @code{false} +otherwise. +@end deftypefun + +The following function views a property as a set of Unicode characters. + +@deftypefun bool uc_is_property (ucs4_t @var{uc}, uc_property_t @var{property}) +Tests whether the Unicode character @var{uc} has the given property. +@end deftypefun + +@node Properties as functions +@subsection Properties as functions -- the functional API + +The following are general properties. + +@deftypefun bool uc_is_property_white_space (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_alphabetic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_alphabetic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_not_a_character (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_default_ignorable_code_point (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_default_ignorable_code_point (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_deprecated (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_logical_order_exception (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_variation_selector (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_private_use (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_unassigned_code_value (ucs4_t @var{uc}) +@end deftypefun + +The following properties are related to case folding. + +@deftypefun bool uc_is_property_uppercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_uppercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_lowercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_lowercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_titlecase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_soft_dotted (ucs4_t @var{uc}) +@end deftypefun + +The following properties are related to identifiers. + +@deftypefun bool uc_is_property_id_start (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_id_start (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_id_continue (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_id_continue (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_xid_start (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_xid_continue (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_pattern_white_space (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_pattern_syntax (ucs4_t @var{uc}) +@end deftypefun + +The following properties have an influence on shaping and rendering. + +@deftypefun bool uc_is_property_join_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_grapheme_base (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_grapheme_extend (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_grapheme_extend (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_grapheme_link (ucs4_t @var{uc}) +@end deftypefun + +The following properties relate to bidirectional reordering. + +@deftypefun bool uc_is_property_bidi_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_left_to_right (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_arabic_right_to_left (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_european_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_eur_num_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_eur_num_terminator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_arabic_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_common_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_block_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_segment_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_whitespace (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_non_spacing_mark (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_boundary_neutral (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_pdf (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_embedding_or_override (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_other_neutral (ucs4_t @var{uc}) +@end deftypefun + +The following properties deal with number representations. + +@deftypefun bool uc_is_property_hex_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ascii_hex_digit (ucs4_t @var{uc}) +@end deftypefun + +The following properties deal with CJK. + +@deftypefun bool uc_is_property_ideographic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_unified_ideograph (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_radical (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ids_binary_operator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ids_trinary_operator (ucs4_t @var{uc}) +@end deftypefun + +Other miscellaneous properties are: + +@deftypefun bool uc_is_property_zero_width (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_space (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_non_break (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_iso_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_format_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_dash (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_hyphen (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_punctuation (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_line_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_paragraph_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_quotation_mark (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_sentence_terminal (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_terminal_punctuation (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_currency_symbol (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_math (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_math (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_paired_punctuation (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_left_of_pair (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_combining (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_composite (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_decimal_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_numeric (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_diacritic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_extender (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ignorable_control (ucs4_t @var{uc}) +@end deftypefun + +@node Scripts +@section Scripts + +@cindex scripts +The Unicode characters are subdivided into scripts. + +The following type is used to represent a script: + +@deftp Type uc_script_t +This data type is a structure type that refers to statically allocated +read-only data. It contains the following fields: +@smallexample +const char *name; +@end smallexample + +The @code{name} field contains the name of the script. +@end deftp + +@cindex Unicode character, script +The following functions look up a script. + +@deftypefun {const uc_script_t *} uc_script (ucs4_t @var{uc}) +Returns the script of a Unicode character. Returns NULL if @var{uc} does not +belong to any script. +@end deftypefun + +@deftypefun {const uc_script_t *} uc_script_byname (const char *@var{script_name}) +Returns the script given by its name, e.g@. @code{"HAN"}. Returns NULL if a +script with the given name does not exist. +@end deftypefun + +The following function views a script as a set of Unicode characters. + +@deftypefun bool uc_is_script (ucs4_t @var{uc}, const uc_script_t *@var{script}) +Tests whether a Unicode character belongs to a given script. +@end deftypefun + +The following gives a global picture of all scripts. + +@deftypefun void uc_all_scripts (const uc_script_t **@var{scripts}, size_t *@var{count}) +Get the list of all scripts. Stores a pointer to an array of all scripts in +@code{*@var{scripts}} and the length of this array in @code{*@var{count}}. +@end deftypefun + +@node Blocks +@section Blocks + +@cindex block +The Unicode characters are subdivided into blocks. A block is an interval of +Unicode code points. + +The following type is used to represent a block. + +@deftp Type uc_block_t +This data type is a structure type that refers to statically allocated data. +It contains the following fields: +@smallexample +ucs4_t start; +ucs4_t end; +const char *name; +@end smallexample + +The @code{start} field is the first Unicode code point in the block. + +The @code{end} field is the last Unicode code point in the block. + +The @code{name} field is the name of the block. +@end deftp + +@cindex Unicode character, block +The following function looks up a block. + +@deftypefun {const uc_block_t *} uc_block (ucs4_t @var{uc}) +Returns the block a character belongs to. +@end deftypefun + +The following function views a block as a set of Unicode characters. + +@deftypefun bool uc_is_block (ucs4_t @var{uc}, const uc_block_t *@var{block}) +Tests whether a Unicode character belongs to a given block. +@end deftypefun + +The following gives a global picture of all block. + +@deftypefun void uc_all_blocks (const uc_block_t **@var{blocks}, size_t *@var{count}) +Get the list of all blocks. Stores a pointer to an array of all blocks in +@code{*@var{blocks}} and the length of this array in @code{*@var{count}}. +@end deftypefun + +@node ISO C and Java syntax +@section ISO C and Java syntax + +@cindex C, programming language +@cindex Java, programming language +@cindex identifiers +The following properties are taken from language standards. The supported +language standards are ISO C 99 and Java. + +@deftypefun bool uc_is_c_whitespace (ucs4_t @var{uc}) +Tests whether a Unicode character is considered whitespace in ISO C 99. +@end deftypefun + +@deftypefun bool uc_is_java_whitespace (ucs4_t @var{uc}) +Tests whether a Unicode character is considered whitespace in Java. +@end deftypefun + +The following enumerated values are the possible return values of the functions +@code{uc_c_ident_category} and @code{uc_java_ident_category}. + +@deftypevr Constant int UC_IDENTIFIER_START +This return value means that the given character is valid as first or +subsequent character in an identifier. +@end deftypevr + +@deftypevr Constant int UC_IDENTIFIER_VALID +This return value means that the given character is valid as subsequent +character only. +@end deftypevr + +@deftypevr Constant int UC_IDENTIFIER_INVALID +This return value means that the given character is not valid in an identifier. +@end deftypevr + +@deftypevr Constant int UC_IDENTIFIER_IGNORABLE +This return value (only for Java) means that the given character is ignorable. +@end deftypevr + +The following function determine whether a given character can be a constituent +of an identifier in the given programming language. + +@cindex Unicode character, validity in C identifiers +@deftypefun int uc_c_ident_category (ucs4_t @var{uc}) +Returns the categorization of a Unicode character with respect to the ISO C 99 +identifier syntax. +@end deftypefun + +@cindex Unicode character, validity in Java identifiers +@deftypefun int uc_java_ident_category (ucs4_t @var{uc}) +Returns the categorization of a Unicode character with respect to the Java +identifier syntax. +@end deftypefun + +@node Classifications like in ISO C +@section Classifications like in ISO C + +@cindex C-like API +@cindex Unicode character, classification like in C +The following character classifications mimic those declared in the ISO C +header files @code{<ctype.h>} and @code{<wctype.h>}. These functions are +deprecated, because this set of functions was designed with ASCII in mind and +cannot reflect the more diverse reality of the Unicode character set. But +they can be a quick-and-dirty porting aid when migrating from @code{wchar_t} +APIs to Unicode strings. + +@deftypefun bool uc_is_alnum (ucs4_t @var{uc}) +Tests for any character for which @code{uc_is_alpha} or @code{uc_is_digit} is +true. +@end deftypefun + +@deftypefun bool uc_is_alpha (ucs4_t @var{uc}) +Tests for any character for which @code{uc_is_upper} or @code{uc_is_lower} is +true, or any character that is one of a locale-specific set of characters for +which none of @code{uc_is_cntrl}, @code{uc_is_digit}, @code{uc_is_punct}, or +@code{uc_is_space} is true. +@end deftypefun + +@deftypefun bool uc_is_cntrl (ucs4_t @var{uc}) +Tests for any control character. +@end deftypefun + +@deftypefun bool uc_is_digit (ucs4_t @var{uc}) +Tests for any character that corresponds to a decimal-digit character. +@end deftypefun + +@deftypefun bool uc_is_graph (ucs4_t @var{uc}) +Tests for any character for which @code{uc_is_print} is true and +@code{uc_is_space} is false. +@end deftypefun + +@deftypefun bool uc_is_lower (ucs4_t @var{uc}) +Tests for any character that corresponds to a lowercase letter or is one +of a locale-specific set of characters for which none of @code{uc_is_cntrl}, +@code{uc_is_digit}, @code{uc_is_punct}, or @code{uc_is_space} is true. +@end deftypefun + +@deftypefun bool uc_is_print (ucs4_t @var{uc}) +Tests for any printing character. +@end deftypefun + +@deftypefun bool uc_is_punct (ucs4_t @var{uc}) +Tests for any printing character that is one of a locale-specific set of +characters for which neither @code{uc_is_space} nor @code{uc_is_alnum} is true. +@end deftypefun + +@deftypefun bool uc_is_space (ucs4_t @var{uc}) +Test for any character that corresponds to a locale-specific set of characters +for which none of @code{uc_is_alnum}, @code{uc_is_graph}, or @code{uc_is_punct} +is true. +@end deftypefun + +@deftypefun bool uc_is_upper (ucs4_t @var{uc}) +Tests for any character that corresponds to an uppercase letter or is one +of a locale-specific set of characters for which none of @code{uc_is_cntrl}, +@code{uc_is_digit}, @code{uc_is_punct}, or @code{uc_is_space} is true. +@end deftypefun + +@deftypefun bool uc_is_xdigit (ucs4_t @var{uc}) +Tests for any character that corresponds to a hexadecimal-digit character. +@end deftypefun + +@deftypefun bool uc_is_blank (ucs4_t @var{uc}) +Tests for any character that corresponds to a standard blank character or +a locale-specific set of characters for which @code{uc_is_alnum} is false. +@end deftypefun diff --git a/doc/unilbrk.texi b/doc/unilbrk.texi new file mode 100644 index 00000000..5441f317 --- /dev/null +++ b/doc/unilbrk.texi @@ -0,0 +1,88 @@ +@node unilbrk.h +@chapter Line breaking @code{<unilbrk.h>} + +@cindex line breaks +@cindex breaks, line +@cindex wrapping +This include file declares functions for determining where in a string +line breaks could or should be introduced, in order to make the displayed +string fit into a column of given width. + +These functions are locale dependent. The @var{encoding} argument identifies +the encoding (e.g@. @code{"ISO-8859-2"} for Polish). + +The following enumerated values indicate whether, at a given position, a line +break is possible or not. Given an string @var{s} as an array +@code{@var{s}[0..@var{n}-1]} and a position @var{i}, the values have the +following meanings: + +@deftypevr Constant int UC_BREAK_MANDATORY +This value indicates that @code{@var{s}[@var{i}]} is a line break character. +@end deftypevr + +@deftypevr Constant int UC_BREAK_POSSIBLE +This value indicates that a line break may be inserted between +@code{@var{s}[@var{i}-1]} and @code{@var{s}[@var{i}]}. +@end deftypevr + +@deftypevr Constant int UC_BREAK_HYPHENATION +This value indicates that a hyphen and a line break may be inserted between +@code{@var{s}[@var{i}-1]} and @code{@var{s}[@var{i}]}. But beware of language +dependent hyphenation rules. +@end deftypevr + +@deftypevr Constant int UC_BREAK_PROHIBITED +This value indicates that @code{@var{s}[@var{i}-1]} and @code{@var{s}[@var{i}]} +must not be separated. +@end deftypevr + +@deftypevr Constant int UC_BREAK_UNDEFINED +This value is not used as a return value; rather, in the overriding argument of +the @code{u*_width_linebreaks} functions, it indicates the absence of an +override. +@end deftypevr + +The following functions determine the positions at which line breaks are +possible. + +@deftypefun void u8_possible_linebreaks (const uint8_t *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +@deftypefunx void u16_possible_linebreaks (const uint16_t *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +@deftypefunx void u32_possible_linebreaks (const uint32_t *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +@deftypefunx void ulc_possible_linebreaks (const char *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +Determines the line break points in @var{s}, and stores the result at +@code{@var{p}[0..@var{n}-1]}. Every @code{@var{p}[@var{i}]} is assigned one of +the values @code{UC_BREAK_MANDATORY}, @code{UC_BREAK_POSSIBLE}, +@code{UC_BREAK_HYPHENATION}, @code{UC_BREAK_PROHIBITED}. +@end deftypefun + +The following functions determine where line breaks should be inserted so that +each line fits in a given width, when output to a device that uses +non-proportional fonts. + +@deftypefun int u8_width_linebreaks (const uint8_t *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +@deftypefunx int u16_width_linebreaks (const uint16_t *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +@deftypefunx int u32_width_linebreaks (const uint32_t *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +@deftypefunx int ulc_width_linebreaks (const char *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +Chooses the best line breaks, assuming that every character occupies a width +given by the @code{uc_width} function (see @ref{uniwidth.h}). + +The string is @code{@var{s}[0..@var{n}-1]}. + +The maximum number of columns per line is given as @var{width}. +The starting column of the string is given as @var{start_column}. +If the algorithm shall keep room after the last piece, this amount of room can +be given as @var{at_end_columns}. + +@var{override} is an optional override; if +@code{@var{override}[@var{i}] != UC_BREAK_UNDEFINED}, +@code{@var{override}[@var{i}]} takes precedence over @code{@var{p}[@var{i}]} +as returned by the @code{u*_possible_linebreaks} function. + +The given @var{encoding} is used for disambiguating widths in @code{uc_width}. + +Returns the column after the end of the string, and stores the result at +@code{@var{p}[0..@var{n}-1]}. Every @code{@var{p}[@var{i}]} is assigned one of +the values @code{UC_BREAK_MANDATORY}, @code{UC_BREAK_POSSIBLE}, +@code{UC_BREAK_HYPHENATION}, @code{UC_BREAK_PROHIBITED}. Here the value +@code{UC_BREAK_POSSIBLE} indicates that a line break @emph{should} be inserted. +@end deftypefun diff --git a/doc/uniname.texi b/doc/uniname.texi new file mode 100644 index 00000000..66461be5 --- /dev/null +++ b/doc/uniname.texi @@ -0,0 +1,32 @@ +@node uniname.h +@chapter Names of Unicode characters @code{<uniname.h>} + +@cindex Unicode character, name +This include file implements the association between a Unicode character and +its name. + +The name of a Unicode character allows to distinguish it from other, similar +looking characters. For example, the character @samp{x} has the name +@code{"LATIN SMALL LETTER X"} and is therefore different from the character +named @code{"MULTIPLICATION SIGN"}. + +@deftypevr Macro {unsigned int} UNINAME_MAX +This macro expands to a constant that is the required size of buffer for a +Unicode character name. +@end deftypevr + +@deftypefun {char *} unicode_character_name (ucs4_t @var{uc}, char *@var{buf}) +Looks up the name of a Unicode character, in uppercase ASCII. +@var{buf} must point to a buffer, at least @code{UNINAME_MAX} bytes in size. +Returns the filled @var{buf}, or NULL if the character does not have a name. +@end deftypefun + +@deftypefun ucs4_t unicode_name_character (const char *@var{name}) +Looks up the Unicode character with a given name, in upper- or lowercase +ASCII. Returns the character if found, or @code{UNINAME_INVALID} if not found. +@end deftypefun + +@deftypevr Macro ucs4_t UNINAME_INVALID +This macro expands to a constant that is a special return value of the +@code{unicode_name_character} function. +@end deftypevr diff --git a/doc/uninorm.texi b/doc/uninorm.texi new file mode 100644 index 00000000..d4206d50 --- /dev/null +++ b/doc/uninorm.texi @@ -0,0 +1,299 @@ +@node uninorm.h +@chapter Normalization forms (composition and decomposition) @code{<uninorm.h>} + +@cindex normal forms +@cindex normalizing +This include file defines functions for transforming Unicode strings to one +of the four normal forms, known as NFC, NFD, NKFC, NFKD. These +transformations involve decomposition and --- for NFC and NFKC --- composition +of Unicode characters. + +@menu +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: +@end menu + +@node Decomposition of characters +@section Decomposition of Unicode characters + +@cindex decomposing +The following enumerated values are the possible types of decomposition of a +Unicode character. + +@deftypevr Constant int UC_DECOMP_CANONICAL +Denotes canonical decomposition. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_FONT +UCD marker: @code{<font>}. Denotes a font variant (e.g. a blackletter form). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_NOBREAK +UCD marker: @code{<noBreak>}. +Denotes a no-break version of a space or hyphen. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_INITIAL +UCD marker: @code{<initial>}. +Denotes an initial presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_MEDIAL +UCD marker: @code{<medial>}. +Denotes a medial presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_FINAL +UCD marker: @code{<final>}. +Denotes a final presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_ISOLATED +UCD marker: @code{<isolated>}. +Denotes an isolated presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_CIRCLE +UCD marker: @code{<circle>}. +Denotes an encircled form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SUPER +UCD marker: @code{<super>}. +Denotes a superscript form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SUB +UCD marker: @code{<sub>}. +Denotes a subscript form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_VERTICAL +UCD marker: @code{<vertical>}. +Denotes a vertical layout presentation form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_WIDE +UCD marker: @code{<wide>}. +Denotes a wide (or zenkaku) compatibility character. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_NARROW +UCD marker: @code{<narrow>}. +Denotes a narrow (or hankaku) compatibility character. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SMALL +UCD marker: @code{<small>}. +Denotes a small variant form (CNS compatibility). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SQUARE +UCD marker: @code{<square>}. +Denotes a CJK squared font variant. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_FRACTION +UCD marker: @code{<fraction>}. +Denotes a vulgar fraction form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_COMPAT +UCD marker: @code{<compat>}. +Denotes an otherwise unspecified compatibility character. +@end deftypevr + +The following constant denotes the maximum size of decomposition of a single +Unicode character. + +@deftypevr Macro {unsigned int} UC_DECOMPOSITION_MAX_LENGTH +This macro expands to a constant that is the required size of buffer passed to +the @code{uc_decomposition} and @code{uc_canonical_decomposition} functions. +@end deftypevr + +The following functions decompose a Unicode character. + +@deftypefun int uc_decomposition (ucs4_t @var{uc}, int *@var{decomp_tag}, ucs4_t *@var{decomposition}) +Returns the character decomposition mapping of the Unicode character @var{uc}. +@var{decomposition} must point to an array of at least +@code{UC_DECOMPOSITION_MAX_LENGTH} @code{ucs_t} elements. + +When a decomposition exists, @code{@var{decomposition}[0..@var{n}-1]} and +@code{*@var{decomp_tag}} are filled and @var{n} is returned. Otherwise -1 is +returned. +@end deftypefun + +@deftypefun int uc_canonical_decomposition (ucs4_t @var{uc}, ucs4_t *@var{decomposition}) +Returns the canonical character decomposition mapping of the Unicode character +@var{uc}. @var{decomposition} must point to an array of at least +@code{UC_DECOMPOSITION_MAX_LENGTH} @code{ucs_t} elements. + +When a decomposition exists, @code{@var{decomposition}[0..@var{n}-1]} is filled +and @var{n} is returned. Otherwise -1 is returned. +@end deftypefun + +@node Composition of characters +@section Composition of Unicode characters + +@cindex composing, Unicode characters +@cindex combining, Unicode characters +The following function composes a Unicode character from two Unicode +characters. + +@deftypefun ucs4_t uc_composition (ucs4_t @var{uc1}, ucs4_t @var{uc2}) +Attempts to combine the Unicode characters @var{uc1}, @var{uc2}. +@var{uc1} is known to have canonical combining class 0. + +Returns the combination of @var{uc1} and @var{uc2}, if it exists. +Returns 0 otherwise. + +Not all decompositions can be recombined using this function. See the Unicode +file @file{CompositionExclusions.txt} for details. +@end deftypefun + +@node Normalization of strings +@section Normalization of strings + +The Unicode standard defines four normalization forms for Unicode strings. +The following type is used to denote a normalization form. + +@deftp Type uninorm_t +An object of type @code{uninorm_t} denotes a Unicode normalization form. +This is a scalar type; its values can be compared with @code{==}. +@end deftp + +The following constants denote the four normalization forms. + +@deftypevr Macro uninorm_t UNINORM_NFD +Denotes Normalization form D: canonical decomposition. +@end deftypevr + +@deftypevr Macro uninorm_t UNINORM_NFC +Normalization form C: canonical decomposition, then canonical composition. +@end deftypevr + +@deftypevr Macro uninorm_t UNINORM_NFKD +Normalization form KD: compatibility decomposition. +@end deftypevr + +@deftypevr Macro uninorm_t UNINORM_NFKC +Normalization form KC: compatibility decomposition, then canonical composition. +@end deftypevr + +The following functions operate on @code{uninorm_t} objects. + +@deftypefun bool uninorm_is_compat_decomposing (uninorm_t @var{nf}) +Tests whether the normalization form @var{nf} does compatibility decomposition. +@end deftypefun + +@deftypefun bool uninorm_is_composing (uninorm_t @var{nf}) +Tests whether the normalization form @var{nf} includes canonical composition. +@end deftypefun + +@deftypefun uninorm_t uninorm_decomposing_form (uninorm_t @var{nf}) +Returns the decomposing variant of the normalization form @var{nf}. +This maps NFC,NFD @arrow{} NFD and NFKC,NFKD @arrow{} NFKD. +@end deftypefun + +The following functions apply a Unicode normalization form to a Unicode string. + +@deftypefun {uint8_t *} u8_normalize (uninorm_t @var{nf}, const uint8_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_normalize (uninorm_t @var{nf}, const uint16_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_normalize (uninorm_t @var{nf}, const uint32_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the specified normalization form of a string. +@end deftypefun + +@node Normalizing comparisons +@section Normalizing comparisons + +@cindex comparing, ignoring normalization +The following functions compare Unicode string, ignoring differences in +normalization. + +@deftypefun int u8_normcmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_normcmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_normcmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in normalization. + +@var{nf} must be either @code{UNINORM_NFD} or @code{UNINORM_NFKD}. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@cindex comparing, ignoring normalization, with collation rules +@cindex comparing, with collation rules, ignoring normalization +@deftypefun {char *} u8_normxfrm (const uint8_t *@var{s}, size_t @var{n}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_normxfrm (const uint16_t *@var{s}, size_t @var{n}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_normxfrm (const uint32_t *@var{s}, size_t @var{n}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts the string @var{s} of length @var{n} to a NUL-terminated byte +sequence, in such a way that comparing @code{u8_normxfrm (@var{s1})} and +@code{u8_normxfrm (@var{s2})} with the @code{u8_cmp2} function is equivalent to +comparing @var{s1} and @var{s2} with the @code{u8_normcoll} function. + +@var{nf} must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. +@end deftypefun + +@deftypefun int u8_normcoll (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_normcoll (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_normcoll (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in normalization, using +the collation rules of the current locale. + +@var{nf} must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@node Normalization of streams +@section Normalization of streams of Unicode characters + +@cindex stream, normalizing a +A ``stream of Unicode characters'' is essentially a function that accepts an +@code{ucs4_t} argument repeatedly, optionally combined with a function that +``flushes'' the stream. + +@deftp Type {struct uninorm_filter} +This is the data type of a stream of Unicode characters that normalizes its +input according to a given normalization form and passes the normalized +character sequence to the encapsulated stream of Unicode characters. +@end deftp + +@deftypefun {struct uninorm_filter *} uninorm_filter_create (uninorm_t @var{nf}, int (*@var{stream_func}) (void *@var{stream_data}, ucs4_t @var{uc}), void *@var{stream_data}) +Creates and returns a normalization filter for Unicode characters. + +The pair (@var{stream_func}, @var{stream_data}) is the encapsulated stream. +@code{@var{stream_func} (@var{stream_data}, @var{uc})} receives the Unicode +character @var{uc} and returns 0 if successful, or -1 with @code{errno} set +upon failure. + +Returns the new filter, or NULL with @code{errno} set upon failure. +@end deftypefun + +@deftypefun int uninorm_filter_write (struct uninorm_filter *@var{filter}, ucs4_t @var{uc}) +Stuffs a Unicode character into a normalizing filter. +Returns 0 if successful, or -1 with @code{errno} set upon failure. +@end deftypefun + +@deftypefun int uninorm_filter_flush (struct uninorm_filter *@var{filter}) +Brings data buffered in the filter to its destination, the encapsulated stream. + +Returns 0 if successful, or -1 with @code{errno} set upon failure. + +Note! If after calling this function, additional characters are written +into the filter, the resulting character sequence in the encapsulated stream +will not necessarily be normalized. +@end deftypefun + +@deftypefun int uninorm_filter_free (struct uninorm_filter *@var{filter}) +Brings data buffered in the filter to its destination, the encapsulated stream, +then closes and frees the filter. + +Returns 0 if successful, or -1 with @code{errno} set upon failure. +@end deftypefun diff --git a/doc/uniregex.texi b/doc/uniregex.texi new file mode 100644 index 00000000..ae290ffa --- /dev/null +++ b/doc/uniregex.texi @@ -0,0 +1,5 @@ +@node uniregex.h +@chapter Regular expressions @code{<uniregex.h>} + +@cindex regular expression +This include file is not yet implemented. diff --git a/doc/unistdio.texi b/doc/unistdio.texi new file mode 100644 index 00000000..e1fb9cfa --- /dev/null +++ b/doc/unistdio.texi @@ -0,0 +1,197 @@ +@node unistdio.h +@chapter Output with Unicode strings @code{<unistdio.h>} + +@cindex formatted output +@cindex output, formatted +This include file declares functions for doing formatted output with Unicode +strings. It defines a set of functions similar to @code{fprintf} and +@code{sprintf}, which are declared in @code{<stdio.h>}. + +These functions work like the @code{printf} function family. +In the format string: +@itemize +@item +The format directive @samp{U} takes an UTF-8 string (@code{const uint8_t *}). +@item +The format directive @samp{lU} takes an UTF-16 string +(@code{const uint16_t *}). +@item +The format directive @samp{llU} takes an UTF-32 string +(@code{const uint32_t *}). +@end itemize + +A function name with an infix @samp{v} indicates that a @code{va_list} is +passed instead of multiple arguments. + +The functions @code{*sprintf} have a @var{buf} argument that is assumed to be +large enough. +(@emph{DANGEROUS! Overflowing the buffer will crash the program.}) + +The functions @code{*snprintf} have a @var{buf} argument that is assumed to be +@var{size} units large. (@emph{DANGEROUS! The resulting string might be +truncated in the middle of a multibyte character.}) + +The functions @code{*asprintf} have a @var{resultp} argument. The result will +be freshly allocated and stored in @code{*resultp}. + +The functions @code{*asnprintf} have a (@var{resultbuf}, @var{lengthp}) +argument pair. If @var{resultbuf} is not NULL and the result fits into +@code{*@var{lengthp}} units, it is put in @var{resultbuf}, and @var{resultbuf} +is returned. Otherwise, a freshly allocated string is returned. In both +cases, @code{*@var{lengthp}} is set to the length (number of units) of the +returned string. In case of error, NULL is returned and @code{errno} is set. + +The following functions take an ASCII format string and return a result that +is a @code{char *} string in locale encoding. + +@deftypefun int ulc_sprintf (char *@var{buf}, const char *@var{format}, ...) +@end deftypefun + +@deftypefun int ulc_snprintf (char *@var{buf}, size_t size, const char *@var{format}, ...) +@end deftypefun + +@deftypefun int ulc_asprintf (char **@var{resultp}, const char *@var{format}, ...) +@end deftypefun + +@deftypefun {char *} ulc_asnprintf (char *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun + +@deftypefun int ulc_vsprintf (char *@var{buf}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +@deftypefun int ulc_vsnprintf (char *@var{buf}, size_t size, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +@deftypefun int ulc_vasprintf (char **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +@deftypefun {char *} ulc_vasnprintf (char *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and return a result in +UTF-8 format. + +@deftypefun int u8_sprintf (uint8_t *@var{buf}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u8_snprintf (uint8_t *@var{buf}, size_t @var{size}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u8_asprintf (uint8_t **@var{resultp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun {uint8_t *} u8_asnprintf (uint8_t *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u8_vsprintf (uint8_t *@var{buf}, const char *@var{format}, va_list ap) +@end deftypefun +@deftypefun int u8_vsnprintf (uint8_t *@var{buf}, size_t @var{size}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u8_vasprintf (uint8_t **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint8_t *} u8_vasnprintf (uint8_t *resultbuf, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an UTF-8 format string and return a result in +UTF-8 format. + +@deftypefun int u8_u8_sprintf (uint8_t *@var{buf}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun int u8_u8_snprintf (uint8_t *@var{buf}, size_t @var{size}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun int u8_u8_asprintf (uint8_t **@var{resultp}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun {uint8_t *} u8_u8_asnprintf (uint8_t *resultbuf, size_t *@var{lengthp}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun int u8_u8_vsprintf (uint8_t *@var{buf}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u8_u8_vsnprintf (uint8_t *@var{buf}, size_t @var{size}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u8_u8_vasprintf (uint8_t **@var{resultp}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint8_t *} u8_u8_vasnprintf (uint8_t *resultbuf, size_t *@var{lengthp}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and return a result in +UTF-16 format. + +@deftypefun int u16_sprintf (uint16_t *@var{buf}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u16_snprintf (uint16_t *@var{buf}, size_t @var{size}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u16_asprintf (uint16_t **@var{resultp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun {uint16_t *} u16_asnprintf (uint16_t *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u16_vsprintf (uint16_t *@var{buf}, const char *@var{format}, va_list ap) +@end deftypefun +@deftypefun int u16_vsnprintf (uint16_t *@var{buf}, size_t @var{size}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u16_vasprintf (uint16_t **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint16_t *} u16_vasnprintf (uint16_t *resultbuf, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an UTF-16 format string and return a result in +UTF-16 format. + +@deftypefun int u16_u16_sprintf (uint16_t *@var{buf}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun int u16_u16_snprintf (uint16_t *@var{buf}, size_t @var{size}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun int u16_u16_asprintf (uint16_t **@var{resultp}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun {uint16_t *} u16_u16_asnprintf (uint16_t *resultbuf, size_t *@var{lengthp}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun int u16_u16_vsprintf (uint16_t *@var{buf}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u16_u16_vsnprintf (uint16_t *@var{buf}, size_t @var{size}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u16_u16_vasprintf (uint16_t **@var{resultp}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint16_t *} u16_u16_vasnprintf (uint16_t *resultbuf, size_t *@var{lengthp}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and return a result in +UTF-32 format. + +@deftypefun int u32_sprintf (uint32_t *@var{buf}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u32_snprintf (uint32_t *@var{buf}, size_t @var{size}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u32_asprintf (uint32_t **@var{resultp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun {uint32_t *} u32_asnprintf (uint32_t *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u32_vsprintf (uint32_t *@var{buf}, const char *@var{format}, va_list ap) +@end deftypefun +@deftypefun int u32_vsnprintf (uint32_t *@var{buf}, size_t @var{size}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u32_vasprintf (uint32_t **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint32_t *} u32_vasnprintf (uint32_t *resultbuf, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an UTF-32 format string and return a result in +UTF-32 format. + +@deftypefun int u32_u32_sprintf (uint32_t *@var{buf}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun int u32_u32_snprintf (uint32_t *@var{buf}, size_t @var{size}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun int u32_u32_asprintf (uint32_t **@var{resultp}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun {uint32_t *} u32_u32_asnprintf (uint32_t *resultbuf, size_t *@var{lengthp}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun int u32_u32_vsprintf (uint32_t *@var{buf}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u32_u32_vsnprintf (uint32_t *@var{buf}, size_t @var{size}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u32_u32_vasprintf (uint32_t **@var{resultp}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint32_t *} u32_u32_vasnprintf (uint32_t *resultbuf, size_t *@var{lengthp}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and produce output in +locale encoding to a @code{FILE} stream. + +@deftypefun int ulc_fprintf (FILE *@var{stream}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int ulc_vfprintf (FILE *@var{stream}, const char *@var{format}, va_list @var{ap}) +@end deftypefun diff --git a/doc/unistr.texi b/doc/unistr.texi new file mode 100644 index 00000000..9c6261b2 --- /dev/null +++ b/doc/unistr.texi @@ -0,0 +1,493 @@ +@node unistr.h +@chapter Elementary Unicode string functions @code{<unistr.h>} + +This include file declares elementary functions for Unicode strings. It is +essentially the equivalent of what @code{<string.h>} is for C strings. + +@menu +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: +@end menu + +@node Elementary string checks +@section Elementary string checks + +@cindex validity +@cindex verification +The following function is available to verify the integrity of a Unicode string. + +@deftypefun {const uint8_t *} u8_check (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx {const uint16_t *} u16_check (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx {const uint32_t *} u32_check (const uint32_t *@var{s}, size_t @var{n}) +This function checks whether a Unicode string is well-formed. +It returns NULL if valid, or a pointer to the first invalid unit otherwise. +@end deftypefun + +@node Elementary string conversions +@section Elementary string conversions + +@cindex converting +The following functions perform conversions between the different forms of Unicode strings. + +@deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-8 string to an UTF-16 string. +@end deftypefun + +@deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-8 string to an UTF-32 string. +@end deftypefun + +@deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-16 string to an UTF-8 string. +@end deftypefun + +@deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-16 string to an UTF-32 string. +@end deftypefun + +@deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-32 string to an UTF-8 string. +@end deftypefun + +@deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-32 string to an UTF-16 string. +@end deftypefun + +@node Elementary string functions +@section Elementary string functions + +@cindex iterating +The following functions inspect and return details about the first character +in a Unicode string. + +@deftypefun int u8_mblen (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mblen (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mblen (const uint32_t *@var{s}, size_t @var{n}) +Returns the length (number of units) of the first character in @var{s}, which +is no longer than @var{n}. Returns 0 if it is the NUL character. Returns -1 +upon failure. + +This function is similar to @posixfunc{mblen}, except that it operates on a +Unicode string and that @var{s} must not be NULL. +@end deftypefun + +@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +Returns the length (number of units) of the first character in @var{s}, +putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, +@code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units +is returned. + +The number of available units, @var{n}, must be > 0. + +This function is similar to @posixfunc{mbtowc}, except that it operates on a +Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0, +and the NUL character is not treated specially. +@end deftypefun + +@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +This function is like @code{u8_mbtouc_unsafe}, except that it will detect an +invalid UTF-8 character, even if the library is compiled without +@option{--enable-safety}. +@end deftypefun + +@deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtoucr (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtoucr (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +Returns the length (number of units) of the first character in @var{s}, +putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, +@code{*@var{puc}} is set to @code{0xfffd}, and -1 is returned for an invalid +sequence of units, -2 is returned for an incomplete sequence of units. + +The number of available units, @var{n}, must be > 0. + +This function is similar to @code{u8_mbtouc}, except that the return value +gives more details about the failure, similar to @posixfunc{mbrtowc}. +@end deftypefun + +The following function stores a Unicode character as a Unicode string in +memory. + +@deftypefun int u8_uctomb (uint8_t *@var{s}, ucs4_t @var{uc}, int @var{n}) +@deftypefunx int u16_uctomb (uint16_t *@var{s}, ucs4_t @var{uc}, int @var{n}) +@deftypefunx int u32_uctomb (uint32_t *@var{s}, ucs4_t @var{uc}, int @var{n}) +Puts the multibyte character represented by @var{uc} in @var{s}, returning its +length. Returns -1 upon failure, -2 if the number of available units, @var{n}, +is too small. The latter case cannot occur if @var{n} >= 6/2/1, respectively. + +This function is similar to @posixfunc{wctomb}, except that it operates on a +Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be +specified. +@end deftypefun + +@cindex copying +The following functions copy Unicode strings in memory. + +@deftypefun {uint8_t *} u8_cpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_cpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_cpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies @var{n} units from @var{src} to @var{dest}. + +This function is similar to @posixfunc{memcpy}, except that it operates on +Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_move (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_move (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_move (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies @var{n} units from @var{src} to @var{dest}, guaranteeing correct +behavior for overlapping memory areas. + +This function is similar to @posixfunc{memmove}, except that it operates on +Unicode strings. +@end deftypefun + +The following function fills a Unicode string. + +@deftypefun {uint8_t *} u8_set (uint8_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_set (uint16_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_set (uint32_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) +Sets the first @var{n} characters of @var{s} to @var{uc}. @var{uc} should be +a character that occupies only 1 unit. + +This function is similar to @posixfunc{memset}, except that it operates on +Unicode strings. +@end deftypefun + +@cindex comparing +The following function compares two Unicode strings of the same length. + +@deftypefun int u8_cmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) +@deftypefunx int u16_cmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) +@deftypefunx int u32_cmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) +Compares @var{s1} and @var{s2}, each of length @var{n}, lexicographically. +Returns a negative value if @var{s1} compares smaller than @var{s2}, +a positive value if @var{s1} compares larger than @var{s2}, or 0 if +they compare equal. + +This function is similar to @posixfunc{memcmp}, except that it operates on +Unicode strings. +@end deftypefun + +The following function compares two Unicode strings of possibly different +lengths. + +@deftypefun int u8_cmp2 (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}) +@deftypefunx int u16_cmp2 (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}) +@deftypefunx int u32_cmp2 (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}) +Compares @var{s1} and @var{s2}, lexicographically. +Returns a negative value if @var{s1} compares smaller than @var{s2}, +a positive value if @var{s1} compares larger than @var{s2}, or 0 if +they compare equal. + +This function is similar to the gnulib function @func{memcmp2}, except that it +operates on Unicode strings. +@end deftypefun + +@cindex searching, for a character +The following function searches for a given Unicode character. + +@deftypefun {uint8_t *} u8_chr (const uint8_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) +@deftypefunx {uint16_t *} u16_chr (const uint16_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) +@deftypefunx {uint32_t *} u32_chr (const uint32_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) +Searches the string at @var{s} for @var{uc}. Returns a pointer to the first +occurrence of @var{uc} in @var{s}, or NULL if @var{uc} does not occur in +@var{s}. + +This function is similar to @posixfunc{memchr}, except that it operates on +Unicode strings. +@end deftypefun + +@cindex counting +The following function counts the number of Unicode characters. + +@deftypefun size_t u8_mbsnlen (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx size_t u16_mbsnlen (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx size_t u32_mbsnlen (const uint32_t *@var{s}, size_t @var{n}) +Counts and returns the number of Unicode characters in the @var{n} units +from @var{s}. + +This function is similar to the gnulib function @func{mbsnlen}, except that +it operates on Unicode strings. +@end deftypefun + +@node Elementary string functions with memory allocation +@section Elementary string functions with memory allocation + +@cindex duplicating +The following function copies a Unicode string. + +@deftypefun {uint8_t *} u8_cpy_alloc (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_cpy_alloc (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_cpy_alloc (const uint32_t *@var{s}, size_t @var{n}) +Makes a freshly allocated copy of @var{s}, of length @var{n}. +@end deftypefun + +@node Elementary string functions on NUL terminated strings +@section Elementary string functions on NUL terminated strings + +The following functions inspect and return details about the first character +in a Unicode string. + +@deftypefun int u8_strmblen (const uint8_t *@var{s}) +@deftypefunx int u16_strmblen (const uint16_t *@var{s}) +@deftypefunx int u32_strmblen (const uint32_t *@var{s}) +Returns the length (number of units) of the first character in @var{s}. +Returns 0 if it is the NUL character. Returns -1 upon failure. +@end deftypefun + +@cindex iterating +@deftypefun int u8_strmbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}) +@deftypefunx int u16_strmbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}) +@deftypefunx int u32_strmbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}) +Returns the length (number of units) of the first character in @var{s}, +putting its @code{ucs4_t} representation in @code{*@var{puc}}. Returns 0 +if it is the NUL character. Returns -1 upon failure. +@end deftypefun + +@deftypefun {const uint8_t *} u8_next (ucs4_t *@var{puc}, const uint8_t *@var{s}) +@deftypefunx {const uint16_t *} u16_next (ucs4_t *@var{puc}, const uint16_t *@var{s}) +@deftypefunx {const uint32_t *} u32_next (ucs4_t *@var{puc}, const uint32_t *@var{s}) +Forward iteration step. Advances the pointer past the next character, +or returns NULL if the end of the string has been reached. Puts the +character's @code{ucs4_t} representation in @code{*@var{puc}}. +@end deftypefun + +The following function inspects and returns details about the previous +character in a Unicode string. + +@deftypefun {const uint8_t *} u8_prev (ucs4_t *@var{puc}, const uint8_t *@var{s}, const uint8_t *@var{start}) +@deftypefunx {const uint16_t *} u16_prev (ucs4_t *@var{puc}, const uint16_t *@var{s}, const uint16_t *@var{start}) +@deftypefunx {const uint32_t *} u32_prev (ucs4_t *@var{puc}, const uint32_t *@var{s}, const uint32_t *@var{start}) +Backward iteration step. Advances the pointer to point to the previous +character, or returns NULL if the beginning of the string had been reached. +Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}. +@end deftypefun + +The following functions determine the length of a Unicode string. + +@deftypefun size_t u8_strlen (const uint8_t *@var{s}) +@deftypefunx size_t u16_strlen (const uint16_t *@var{s}) +@deftypefunx size_t u32_strlen (const uint32_t *@var{s}) +Returns the number of units in @var{s}. + +This function is similar to @posixfunc{strlen} and @posixfunc{wcslen}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun size_t u8_strnlen (const uint8_t *@var{s}, size_t @var{maxlen}) +@deftypefunx size_t u16_strnlen (const uint16_t *@var{s}, size_t @var{maxlen}) +@deftypefunx size_t u32_strnlen (const uint32_t *@var{s}, size_t @var{maxlen}) +Returns the number of units in @var{s}, but at most @var{maxlen}. + +This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex copying +The following functions copy portions of Unicode strings in memory. + +@deftypefun {uint8_t *} u8_strcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) +@deftypefunx {uint16_t *} u16_strcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) +@deftypefunx {uint32_t *} u32_strcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) +Copies @var{src} to @var{dest}. + +This function is similar to @posixfunc{strcpy} and @posixfunc{wcscpy}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_stpcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) +@deftypefunx {uint16_t *} u16_stpcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) +@deftypefunx {uint32_t *} u32_stpcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) +Copies @var{src} to @var{dest}, returning the address of the terminating NUL +in @var{dest}. + +This function is similar to @posixfunc{stpcpy}, except that it operates on +Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_strncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_strncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies no more than @var{n} units of @var{src} to @var{dest}. + +This function is similar to @posixfunc{strncpy} and @posixfunc{wcsncpy}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_stpncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_stpncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_stpncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies no more than @var{n} units of @var{src} to @var{dest}, returning the +address of the last unit written into @var{dest}. + +This function is similar to @posixfunc{stpncpy}, except that it operates on +Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strcat (uint8_t *@var{dest}, const uint8_t *@var{src}) +@deftypefunx {uint16_t *} u16_strcat (uint16_t *@var{dest}, const uint16_t *@var{src}) +@deftypefunx {uint32_t *} u32_strcat (uint32_t *@var{dest}, const uint32_t *@var{src}) +Appends @var{src} onto @var{dest}. + +This function is similar to @posixfunc{strcat} and @posixfunc{wcscat}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strncat (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_strncat (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_strncat (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Appends no more than @var{n} units of @var{src} onto @var{dest}. + +This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex comparing +The following functions compare two Unicode strings. + +@deftypefun int u8_strcmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}) +@deftypefunx int u16_strcmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}) +@deftypefunx int u32_strcmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}) +Compares @var{s1} and @var{s2}, lexicographically. +Returns a negative value if @var{s1} compares smaller than @var{s2}, +a positive value if @var{s1} compares larger than @var{s2}, or 0 if +they compare equal. + +This function is similar to @posixfunc{strcmp} and @posixfunc{wcscmp}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex comparing, with collation rules +@deftypefun int u8_strcoll (const uint8_t *@var{s1}, const uint8_t *@var{s2}) +@deftypefunx int u16_strcoll (const uint16_t *@var{s1}, const uint16_t *@var{s2}) +@deftypefunx int u32_strcoll (const uint32_t *@var{s1}, const uint32_t *@var{s2}) +Compares @var{s1} and @var{s2} using the collation rules of the current +locale. +Returns -1 if @var{s1} < @var{s2}, 0 if @var{s1} = @var{s2}, 1 if +@var{s1} > @var{s2}. Upon failure, sets @code{errno} and returns any value. + +This function is similar to @posixfunc{strcoll} and @posixfunc{wcscoll}, except +that it operates on Unicode strings. + +Note that this function may consider different canonical normalizations +of the same string as having a large distance. It is therefore better to +use the function @code{u8_normcoll} instead of this one; see @ref{uninorm.h}. +@end deftypefun + +@deftypefun int u8_strncmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) +@deftypefunx int u16_strncmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) +@deftypefunx int u32_strncmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) +Compares no more than @var{n} units of @var{s1} and @var{s2}. + +This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex duplicating +The following function allocates a duplicate of a Unicode string. + +@deftypefun {uint8_t *} u8_strdup (const uint8_t *@var{s}) +@deftypefunx {uint16_t *} u16_strdup (const uint16_t *@var{s}) +@deftypefunx {uint32_t *} u32_strdup (const uint32_t *@var{s}) +Duplicates @var{s}, returning an identical malloc'd string. + +This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex searching, for a character +The following functions search for a given Unicode character. + +@deftypefun {uint8_t *} u8_strchr (const uint8_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint16_t *} u16_strchr (const uint16_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint32_t *} u32_strchr (const uint32_t *@var{str}, ucs4_t @var{uc}) +Finds the first occurrence of @var{uc} in @var{str}. + +This function is similar to @posixfunc{strchr} and @posixfunc{wcschr}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strrchr (const uint8_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint16_t *} u16_strrchr (const uint16_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint32_t *} u32_strrchr (const uint32_t *@var{str}, ucs4_t @var{uc}) +Finds the last occurrence of @var{uc} in @var{str}. + +This function is similar to @posixfunc{strrchr} and @posixfunc{wcsrchr}, except +that it operates on Unicode strings. +@end deftypefun + +The following functions search for the first occurrence of some Unicode +character in or outside a given set of Unicode characters. + +@deftypefun size_t u8_strcspn (const uint8_t *@var{str}, const uint8_t *@var{reject}) +@deftypefunx size_t u16_strcspn (const uint16_t *@var{str}, const uint16_t *@var{reject}) +@deftypefunx size_t u32_strcspn (const uint32_t *@var{str}, const uint32_t *@var{reject}) +Returns the length of the initial segment of @var{str} which consists entirely +of Unicode characters not in @var{reject}. + +This function is similar to @posixfunc{strcspn} and @posixfunc{wcscspn}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun size_t u8_strspn (const uint8_t *@var{str}, const uint8_t *@var{accept}) +@deftypefunx size_t u16_strspn (const uint16_t *@var{str}, const uint16_t *@var{accept}) +@deftypefunx size_t u32_strspn (const uint32_t *@var{str}, const uint32_t *@var{accept}) +Returns the length of the initial segment of @var{str} which consists entirely +of Unicode characters in @var{accept}. + +This function is similar to @posixfunc{strspn} and @posixfunc{wcsspn}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strpbrk (const uint8_t *@var{str}, const uint8_t *@var{accept}) +@deftypefunx {uint16_t *} u16_strpbrk (const uint16_t *@var{str}, const uint16_t *@var{accept}) +@deftypefunx {uint32_t *} u32_strpbrk (const uint32_t *@var{str}, const uint32_t *@var{accept}) +Finds the first occurrence in @var{str} of any character in @var{accept}. + +This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex searching, for a substring +The following functions search whether a given Unicode string is a substring +of another Unicode string. + +@deftypefun {uint8_t *} u8_strstr (const uint8_t *@var{haystack}, const uint8_t *@var{needle}) +@deftypefunx {uint16_t *} u16_strstr (const uint16_t *@var{haystack}, const uint16_t *@var{needle}) +@deftypefunx {uint32_t *} u32_strstr (const uint32_t *@var{haystack}, const uint32_t *@var{needle}) +Finds the first occurrence of @var{needle} in @var{haystack}. + +This function is similar to @posixfunc{strstr} and @posixfunc{wcsstr}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun bool u8_startswith (const uint8_t *@var{str}, const uint8_t *@var{prefix}) +@deftypefunx bool u16_startswith (const uint16_t *@var{str}, const uint16_t *@var{prefix}) +@deftypefunx bool u32_startswith (const uint32_t *@var{str}, const uint32_t *@var{prefix}) +Tests whether @var{str} starts with @var{prefix}. +@end deftypefun + +@deftypefun bool u8_endswith (const uint8_t *@var{str}, const uint8_t *@var{suffix}) +@deftypefunx bool u16_endswith (const uint16_t *@var{str}, const uint16_t *@var{suffix}) +@deftypefunx bool u32_endswith (const uint32_t *@var{str}, const uint32_t *@var{suffix}) +Tests whether @var{str} ends with @var{suffix}. +@end deftypefun + +The following function does one step in tokenizing a Unicode string. + +@deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr}) +@deftypefunx {uint16_t *} u16_strtok (uint16_t *@var{str}, const uint16_t *@var{delim}, uint16_t **@var{ptr}) +@deftypefunx {uint32_t *} u32_strtok (uint32_t *@var{str}, const uint32_t *@var{delim}, uint32_t **@var{ptr}) +Divides @var{str} into tokens separated by characters in @var{delim}. + +This function is similar to @posixfunc{strtok_r} and @posixfunc{wcstok}, except +that it operates on Unicode strings. Its interface is actually more similar to +@code{wcstok} than to @code{strtok}. +@end deftypefun diff --git a/doc/unitypes.texi b/doc/unitypes.texi new file mode 100644 index 00000000..696ba881 --- /dev/null +++ b/doc/unitypes.texi @@ -0,0 +1,15 @@ +@node unitypes.h +@chapter Elementary types @code{<unitypes.h>} + +The include file @code{<unitypes.h>} provides the following basic types. + +@deftp Type uint8_t +@deftpx Type uint16_t +@deftpx Type uint32_t +These are the storage units of UTF-8/16/32 strings, respectively. The definitions are +taken from @code{<stdint.h>}, on platforms where this include file is present. +@end deftp + +@deftp Type ucs4_t +This type represents a single Unicode character, outside of an UTF-32 string. +@end deftp diff --git a/doc/uniwbrk.texi b/doc/uniwbrk.texi new file mode 100644 index 00000000..6f06b926 --- /dev/null +++ b/doc/uniwbrk.texi @@ -0,0 +1,71 @@ +@node uniwbrk.h +@chapter Word breaks in strings @code{<uniwbrk.h>} + +@cindex word breaks +@cindex breaks, word +This include file declares functions for determining where in a string +``words'' start and end. Here ``words'' are not necessarily the same as +entities that can be looked up in dictionaries, but rather groups of +consecutive characters that should not be split by text processing +operations. + +@menu +* Word breaks in a string:: +* Word break property:: +@end menu + +@node Word breaks in a string +@section Word breaks in a string + +The following functions determine the word breaks in a string. + +@deftypefun void u8_wordbreaks (const uint8_t *@var{s}, size_t @var{n}, char *@var{p}) +@deftypefunx void u16_wordbreaks (const uint16_t *@var{s}, size_t @var{n}, char *@var{p}) +@deftypefunx void u32_wordbreaks (const uint32_t *@var{s}, size_t @var{n}, char *@var{p}) +@deftypefunx void ulc_wordbreaks (const char *@var{s}, size_t @var{n}, char *@var{p}) +Determines the word break points in @var{s}, an array of @var{n} units, and +stores the result at @code{@var{p}[0..@var{n}-1]}. +@table @asis +@item @code{@var{p}[i] = 1} +means that there is a word boundary between @code{@var{s}[i-1]} and +@code{@var{s}[i]}. +@item @code{@var{p}[i] = 0} +means that @code{@var{s}[i-1]} and @code{@var{s}[i]} must not be separated. +@end table +@code{@var{p}[0]} is always set to 0. If an application wants to consider a +word break to be present at the beginning of the string (before +@code{@var{s}[0]}) or at the end of the string (after +@code{@var{s}[0..@var{n}-1]}), it has to treat these cases explicitly. +@end deftypefun + +@node Word break property +@section Word break property + +This is a more low-level API. The word break property is a property defined +in Unicode Standard Annex #29, section ``Word Boundaries'', see +@url{http://www.unicode.org/reports/tr29/#Word_Boundaries}.@texnl{} It is +used for determining the word breaks in a string. + +The following are the possible values of the word break property. More values +may be added in the future. + +@deftypevr Constant int WBP_OTHER +@deftypevrx Constant int WBP_CR +@deftypevrx Constant int WBP_LF +@deftypevrx Constant int WBP_NEWLINE +@deftypevrx Constant int WBP_EXTEND +@deftypevrx Constant int WBP_FORMAT +@deftypevrx Constant int WBP_KATAKANA +@deftypevrx Constant int WBP_ALETTER +@deftypevrx Constant int WBP_MIDNUMLET +@deftypevrx Constant int WBP_MIDLETTER +@deftypevrx Constant int WBP_MIDNUM +@deftypevrx Constant int WBP_NUMERIC +@deftypevrx Constant int WBP_EXTENDNUMLET +@end deftypevr + +The following function looks up the word break property of a character. + +@deftypefun int uc_wordbreak_property (ucs4_t @var{uc}) +Returns the Word_Break property of a Unicode character. +@end deftypefun diff --git a/doc/uniwidth.texi b/doc/uniwidth.texi new file mode 100644 index 00000000..a05d101d --- /dev/null +++ b/doc/uniwidth.texi @@ -0,0 +1,43 @@ +@node uniwidth.h +@chapter Display width @code{<uniwidth.h>} + +@cindex width +This include file declares functions that return the display width, measured +in columns, of characters or strings, when output to a device that uses +non-proportional fonts. + +@cindex ambiguous width +Note that for some rarely used characters the actual fonts or terminal +emulators can use a different width. There is no mechanism for communicating +the display width of characters across a Unix pseudo-terminal (tty). Also, +there are scripts with complex rendering, like the Indic scripts. For these +scripts, there is no such concept as non-proportional fonts. Therefore +the results of these functions usually work fine on most scripts and on +most characters but can fail to represent the actual display width. + +These functions are locale dependent. The @var{encoding} argument identifies +the encoding (e.g@. @code{"ISO-8859-2"} for Polish). + +@cindex Unicode character, width +@cindex halfwidth +@cindex fullwidth +@deftypefun int uc_width (ucs4_t @var{uc}, const char *@var{encoding}) +Determines and returns the number of column positions required for @var{uc}. +Returns -1 if @var{uc} is a control character that has an influence on the +column position when output. +@end deftypefun + +@deftypefun int u8_width (const uint8_t *@var{s}, size_t @var{n}, const char *@var{encoding}) +@deftypefunx int u16_width (const uint16_t *@var{s}, size_t @var{n}, const char *@var{encoding}) +@deftypefunx int u32_width (const uint32_t *@var{s}, size_t @var{n}, const char *@var{encoding}) +Determines and returns the number of column positions required for first +@var{n} units (or fewer if @var{s} ends before this) in @var{s}. This +function ignores control characters in the string. +@end deftypefun + +@deftypefun int u8_strwidth (const uint8_t *@var{s}, const char *@var{encoding}) +@deftypefunx int u16_strwidth (const uint16_t *@var{s}, const char *@var{encoding}) +@deftypefunx int u32_strwidth (const uint32_t *@var{s}, const char *@var{encoding}) +Determines and returns the number of column positions required for @var{s}. +This function ignores control characters in the string. +@end deftypefun diff --git a/doc/version.texi b/doc/version.texi new file mode 100644 index 00000000..ee4ba906 --- /dev/null +++ b/doc/version.texi @@ -0,0 +1,4 @@ +@set UPDATED 29 June 2009 +@set UPDATED-MONTH June 2009 +@set EDITION 0.9.1 +@set VERSION 0.9.1 |