From fa095a4504cbe668e4244547e2c141597bea4ecf Mon Sep 17 00:00:00 2001 From: Andreas Rottmann Date: Mon, 14 Sep 2009 12:32:44 +0200 Subject: Imported Upstream version 0.9.1 --- doc/Makefile.am | 185 ++ doc/Makefile.in | 1207 +++++++++ doc/fdl.texi | 506 ++++ doc/gpl.texi | 717 ++++++ doc/lgpl.texi | 190 ++ doc/libunistring.info | 6200 +++++++++++++++++++++++++++++++++++++++++++++ doc/libunistring.texi | 989 ++++++++ doc/libunistring_1.html | 531 ++++ doc/libunistring_10.html | 192 ++ doc/libunistring_11.html | 200 ++ doc/libunistring_12.html | 507 ++++ doc/libunistring_13.html | 611 +++++ doc/libunistring_14.html | 87 + doc/libunistring_15.html | 232 ++ doc/libunistring_16.html | 93 + doc/libunistring_17.html | 1526 +++++++++++ doc/libunistring_18.html | 770 ++++++ doc/libunistring_19.html | 188 ++ doc/libunistring_2.html | 141 ++ doc/libunistring_3.html | 107 + doc/libunistring_4.html | 864 +++++++ doc/libunistring_5.html | 296 +++ doc/libunistring_6.html | 451 ++++ doc/libunistring_7.html | 125 + doc/libunistring_8.html | 2071 +++++++++++++++ doc/libunistring_9.html | 141 ++ doc/libunistring_abt.html | 167 ++ doc/libunistring_toc.html | 164 ++ doc/stamp-vti | 4 + doc/unicase.texi | 364 +++ doc/uniconv.texi | 157 ++ doc/unictype.texi | 1145 +++++++++ doc/unilbrk.texi | 88 + doc/uniname.texi | 32 + doc/uninorm.texi | 299 +++ doc/uniregex.texi | 5 + doc/unistdio.texi | 197 ++ doc/unistr.texi | 493 ++++ doc/unitypes.texi | 15 + doc/uniwbrk.texi | 71 + doc/uniwidth.texi | 43 + doc/version.texi | 4 + 42 files changed, 22375 insertions(+) create mode 100644 doc/Makefile.am create mode 100644 doc/Makefile.in create mode 100644 doc/fdl.texi create mode 100644 doc/gpl.texi create mode 100644 doc/lgpl.texi create mode 100644 doc/libunistring.info create mode 100644 doc/libunistring.texi create mode 100644 doc/libunistring_1.html create mode 100644 doc/libunistring_10.html create mode 100644 doc/libunistring_11.html create mode 100644 doc/libunistring_12.html create mode 100644 doc/libunistring_13.html create mode 100644 doc/libunistring_14.html create mode 100644 doc/libunistring_15.html create mode 100644 doc/libunistring_16.html create mode 100644 doc/libunistring_17.html create mode 100644 doc/libunistring_18.html create mode 100644 doc/libunistring_19.html create mode 100644 doc/libunistring_2.html create mode 100644 doc/libunistring_3.html create mode 100644 doc/libunistring_4.html create mode 100644 doc/libunistring_5.html create mode 100644 doc/libunistring_6.html create mode 100644 doc/libunistring_7.html create mode 100644 doc/libunistring_8.html create mode 100644 doc/libunistring_9.html create mode 100644 doc/libunistring_abt.html create mode 100644 doc/libunistring_toc.html create mode 100644 doc/stamp-vti create mode 100644 doc/unicase.texi create mode 100644 doc/uniconv.texi create mode 100644 doc/unictype.texi create mode 100644 doc/unilbrk.texi create mode 100644 doc/uniname.texi create mode 100644 doc/uninorm.texi create mode 100644 doc/uniregex.texi create mode 100644 doc/unistdio.texi create mode 100644 doc/unistr.texi create mode 100644 doc/unitypes.texi create mode 100644 doc/uniwbrk.texi create mode 100644 doc/uniwidth.texi create mode 100644 doc/version.texi (limited to 'doc') diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 00000000..a66c69da --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,185 @@ +## Makefile for the doc subdirectory of GNU libunistring. +## Copyright (C) 2009 Free Software Foundation, Inc. +## +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +## Process this file with automake to produce Makefile.in. + +AUTOMAKE_OPTIONS = 1.5 gnits +EXTRA_DIST = +MOSTLYCLEANFILES = + +# List of -I options referring to directories that contain texinfo sources +# used by this directory. +# Should contain at least one -I option, to work around a bug in texi2dvi 1.13, +# see . +TEXINCLUDES = -I . + +MAKEINFO = env LANG= LC_MESSAGES= LC_ALL= LANGUAGE= @MAKEINFO@ +MAKEINFOFLAGS = $(TEXINCLUDES) --no-split + +info_TEXINFOS = libunistring.texi +# List of texinfo sources @included by libunistring.texi, excluding version.texi. +libunistring_TEXINFOS = \ + unitypes.texi unistr.texi uniconv.texi unistdio.texi uniname.texi \ + unictype.texi uniwidth.texi uniwbrk.texi unilbrk.texi uninorm.texi \ + unicase.texi uniregex.texi \ + gpl.texi lgpl.texi fdl.texi + +# The dependencies of stamp-vti generated by automake are incomplete. +# So we have to duplicate the entire rule which would otherwise be generated +# by automake. +$(srcdir)/stamp-vti: $(info_TEXINFOS) $(libunistring_TEXINFOS) $(top_srcdir)/version.sh + (dir=.; test -f ./libunistring.texi || dir=$(srcdir); \ + set `$(SHELL) $(top_srcdir)/build-aux/mdate-sh $$dir/libunistring.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp + cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + rm -f vti.tmp + cp $(srcdir)/version.texi $@ + +# We distribute only the split HTML documentation. +# The user can generate the others, via +# make libunistring.ps +# make libunistring.pdf +# make libunistring.html + +all-local: html-local +install-data-local: install-html +installdirs-local: installdirs-html +uninstall-local: uninstall-html +dist-hook: dist-html + +html-local: html-split +# Override of automake's definition. The HTML files we want to distribute are +# not the ones that automake knows about, and we cannot define HTMLS to a value +# containing wildcards. +install-html: install-html-split + @: +uninstall-html: uninstall-html-split +dist-html: dist-html-split + +# CLEANFILES: libunistring.{dvi,ps,pdf,html} are already known to automake. +MAINTAINERCLEANFILES = libunistring_*.html + + +# Documentation in DVI format. + +# Override of automake's definition: +#TEXI2DVI = @TEXI2DVI@ +TEXI2DVI = @TEXI2DVI@ $(TEXINCLUDES) + +# The install-dvi target is already defined by automake. + +installdirs-dvi: + $(mkdir_p) $(DESTDIR)$(dvidir) + +uninstall-dvi: + $(RM) $(DESTDIR)$(dvidir)/libunistring.dvi + + +# Documentation in Postscript format. + +# Override of automake's definition: +#DVIPS = @DVIPS@ +DVIPS = @DVIPS@ -D600 + +libunistring.ps: libunistring.dvi + $(DVIPS) -o $@ `if test -f libunistring.dvi; then echo libunistring.dvi; else echo $(srcdir)/libunistring.dvi; fi` + +# The install-ps target is already defined by automake. + +installdirs-ps: + $(mkdir_p) $(DESTDIR)$(psdir) + +uninstall-ps: + $(RM) $(DESTDIR)$(psdir)/libunistring.ps + + +# Documentation in Portable Document Format. + +# Override of automake's definition: +#TEXI2PDF = @TEXI2DVI@ --pdf +TEXI2PDF = @TEXI2DVI@ --pdf $(TEXINCLUDES) + +# The install-pdf target is already defined by automake. + +installdirs-pdf: + $(mkdir_p) $(DESTDIR)$(pdfdir) + +uninstall-pdf: + $(RM) $(DESTDIR)$(pdfdir)/libunistring.pdf + + +# Documentation in HTML format. + +TEXI2HTML = @PERL@ $(top_srcdir)/build-aux/texi2html + +html-monolithic: libunistring.html +html-split: libunistring_toc.html + +# Override of automake's definition. +# We want to use texi2html, not makeinfo --html. +libunistring.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -monolithic `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` + +libunistring_toc.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + case "@PERL@" in \ + *"/missing perl") \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` || exit 0 ;; \ + *) $(RM) libunistring_*.html ; \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` ;; \ + esac \ + && { mv libunistring/libunistring.html libunistring_toc.html; \ + mv libunistring/*.html .; \ + rmdir libunistring; \ + } + +install-html-monolithic: libunistring.html + $(mkdir_p) $(DESTDIR)$(htmldir) + $(INSTALL_DATA) `if test -f libunistring.html; then echo .; else echo $(srcdir); fi`/libunistring.html $(DESTDIR)$(htmldir)/libunistring.html + +install-html-split: libunistring_toc.html + $(mkdir_p) $(DESTDIR)$(htmldir) + for file in `if test -f libunistring_toc.html; then echo .; else echo $(srcdir); fi`/libunistring_*.html; do \ + $(INSTALL_DATA) $$file $(DESTDIR)$(htmldir)/`basename $$file`; \ + done + +installdirs-html: + $(mkdir_p) $(DESTDIR)$(htmldir) + +uninstall-html-monolithic: + $(RM) $(DESTDIR)$(htmldir)/libunistring.html + +uninstall-html-split: + $(RM) $(DESTDIR)$(htmldir)/libunistring_*.html + +dist-html-monolithic: + $(mkdir_p) $(distdir)/ + file=libunistring.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + cp -p $$d/$$file $(distdir)/$$file || exit 1 + +# We would like to put libunistring_*.html into EXTRA_DIST, but it doesn't work. +dist-html-split: + $(mkdir_p) $(distdir)/ + file=libunistring_toc.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + for file in `cd $$d && echo libunistring_*.html`; do \ + cp -p $$d/$$file $(distdir)/$$file || exit 1; \ + done diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 00000000..66a4a3bc --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,1207 @@ +# Makefile.in generated by automake 1.11 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc +DIST_COMMON = $(libunistring_TEXINFOS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/stamp-vti \ + $(srcdir)/version.texi +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/gnulib-m4/00gnulib.m4 \ + $(top_srcdir)/gnulib-m4/alloca.m4 \ + $(top_srcdir)/gnulib-m4/codeset.m4 \ + $(top_srcdir)/gnulib-m4/eealloc.m4 \ + $(top_srcdir)/gnulib-m4/environ.m4 \ + $(top_srcdir)/gnulib-m4/errno_h.m4 \ + $(top_srcdir)/gnulib-m4/error.m4 \ + $(top_srcdir)/gnulib-m4/exitfail.m4 \ + $(top_srcdir)/gnulib-m4/exponentd.m4 \ + $(top_srcdir)/gnulib-m4/exponentf.m4 \ + $(top_srcdir)/gnulib-m4/exponentl.m4 \ + $(top_srcdir)/gnulib-m4/extensions.m4 \ + $(top_srcdir)/gnulib-m4/float_h.m4 \ + $(top_srcdir)/gnulib-m4/fpieee.m4 \ + $(top_srcdir)/gnulib-m4/frexp.m4 \ + $(top_srcdir)/gnulib-m4/frexpl.m4 \ + $(top_srcdir)/gnulib-m4/getpagesize.m4 \ + $(top_srcdir)/gnulib-m4/glibc21.m4 \ + $(top_srcdir)/gnulib-m4/gnulib-common.m4 \ + $(top_srcdir)/gnulib-m4/gnulib-comp.m4 \ + $(top_srcdir)/gnulib-m4/iconv.m4 \ + $(top_srcdir)/gnulib-m4/iconv_h.m4 \ + $(top_srcdir)/gnulib-m4/iconv_open.m4 \ + $(top_srcdir)/gnulib-m4/include_next.m4 \ + $(top_srcdir)/gnulib-m4/inline.m4 \ + $(top_srcdir)/gnulib-m4/intlmacosx.m4 \ + $(top_srcdir)/gnulib-m4/intmax_t.m4 \ + $(top_srcdir)/gnulib-m4/inttypes_h.m4 \ + $(top_srcdir)/gnulib-m4/isnand.m4 \ + $(top_srcdir)/gnulib-m4/isnanf.m4 \ + $(top_srcdir)/gnulib-m4/isnanl.m4 \ + $(top_srcdir)/gnulib-m4/lcmessage.m4 \ + $(top_srcdir)/gnulib-m4/ldexpl.m4 \ + $(top_srcdir)/gnulib-m4/lib-ld.m4 \ + $(top_srcdir)/gnulib-m4/lib-link.m4 \ + $(top_srcdir)/gnulib-m4/lib-prefix.m4 \ + $(top_srcdir)/gnulib-m4/localcharset.m4 \ + $(top_srcdir)/gnulib-m4/locale-fr.m4 \ + $(top_srcdir)/gnulib-m4/locale-ja.m4 \ + $(top_srcdir)/gnulib-m4/locale-tr.m4 \ + $(top_srcdir)/gnulib-m4/locale-zh.m4 \ + $(top_srcdir)/gnulib-m4/locale_h.m4 \ + $(top_srcdir)/gnulib-m4/localename.m4 \ + $(top_srcdir)/gnulib-m4/longlong.m4 \ + $(top_srcdir)/gnulib-m4/malloc.m4 \ + $(top_srcdir)/gnulib-m4/malloca.m4 \ + $(top_srcdir)/gnulib-m4/math_h.m4 \ + $(top_srcdir)/gnulib-m4/mbchar.m4 \ + $(top_srcdir)/gnulib-m4/mbiter.m4 \ + $(top_srcdir)/gnulib-m4/mbrtowc.m4 \ + $(top_srcdir)/gnulib-m4/mbsinit.m4 \ + $(top_srcdir)/gnulib-m4/mbstate_t.m4 \ + $(top_srcdir)/gnulib-m4/memchr.m4 \ + $(top_srcdir)/gnulib-m4/minmax.m4 \ + $(top_srcdir)/gnulib-m4/mmap-anon.m4 \ + $(top_srcdir)/gnulib-m4/multiarch.m4 \ + $(top_srcdir)/gnulib-m4/nocrash.m4 \ + $(top_srcdir)/gnulib-m4/printf-frexp.m4 \ + $(top_srcdir)/gnulib-m4/printf-frexpl.m4 \ + $(top_srcdir)/gnulib-m4/printf.m4 \ + $(top_srcdir)/gnulib-m4/relocatable-lib.m4 \ + $(top_srcdir)/gnulib-m4/setenv.m4 \ + $(top_srcdir)/gnulib-m4/signbit.m4 \ + $(top_srcdir)/gnulib-m4/size_max.m4 \ + $(top_srcdir)/gnulib-m4/stdbool.m4 \ + $(top_srcdir)/gnulib-m4/stdint.m4 \ + $(top_srcdir)/gnulib-m4/stdint_h.m4 \ + $(top_srcdir)/gnulib-m4/stdlib_h.m4 \ + $(top_srcdir)/gnulib-m4/strerror.m4 \ + $(top_srcdir)/gnulib-m4/string_h.m4 \ + $(top_srcdir)/gnulib-m4/unistd_h.m4 \ + $(top_srcdir)/gnulib-m4/vasnprintf.m4 \ + $(top_srcdir)/gnulib-m4/wchar.m4 \ + $(top_srcdir)/gnulib-m4/wchar_t.m4 \ + $(top_srcdir)/gnulib-m4/wctob.m4 \ + $(top_srcdir)/gnulib-m4/wctype.m4 \ + $(top_srcdir)/gnulib-m4/wcwidth.m4 \ + $(top_srcdir)/gnulib-m4/wint_t.m4 \ + $(top_srcdir)/gnulib-m4/xalloc.m4 \ + $(top_srcdir)/gnulib-m4/xsize.m4 $(top_srcdir)/m4/exported.m4 \ + $(top_srcdir)/m4/init-package-version.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/woe32-dll.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_GEN = $(am__v_GEN_$(V)) +am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) +am__v_GEN_0 = @echo " GEN " $@; +AM_V_at = $(am__v_at_$(V)) +am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) +am__v_at_0 = @ +SOURCES = +DIST_SOURCES = +INFO_DEPS = $(srcdir)/libunistring.info +TEXINFO_TEX = $(top_srcdir)/build-aux/texinfo.tex +am__TEXINFO_TEX_DIR = $(top_srcdir)/build-aux +DVIS = libunistring.dvi +PDFS = libunistring.pdf +PSS = libunistring.ps +HTMLS = libunistring.html +TEXINFOS = libunistring.texi +MAKEINFOHTML = $(MAKEINFO) --html +AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS) +am__installdirs = "$(DESTDIR)$(infodir)" +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALLOCA_H = @ALLOCA_H@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPLE_UNIVERSAL_BUILD = @APPLE_UNIVERSAL_BUILD@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BITSIZEOF_PTRDIFF_T = @BITSIZEOF_PTRDIFF_T@ +BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@ +BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@ +BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@ +BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLL_VARIABLE = @DLL_VARIABLE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ + +# Documentation in Postscript format. + +# Override of automake's definition: +#DVIPS = @DVIPS@ +DVIPS = @DVIPS@ -D600 +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EMULTIHOP_HIDDEN = @EMULTIHOP_HIDDEN@ +EMULTIHOP_VALUE = @EMULTIHOP_VALUE@ +ENOLINK_HIDDEN = @ENOLINK_HIDDEN@ +ENOLINK_VALUE = @ENOLINK_VALUE@ +EOVERFLOW_HIDDEN = @EOVERFLOW_HIDDEN@ +EOVERFLOW_VALUE = @EOVERFLOW_VALUE@ +ERRNO_H = @ERRNO_H@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLOAT_H = @FLOAT_H@ +GLIBC21 = @GLIBC21@ +GLOBAL_SYMBOL_PIPE = @GLOBAL_SYMBOL_PIPE@ +GNULIB_ATOLL = @GNULIB_ATOLL@ +GNULIB_BTOWC = @GNULIB_BTOWC@ +GNULIB_CALLOC_POSIX = @GNULIB_CALLOC_POSIX@ +GNULIB_CEILF = @GNULIB_CEILF@ +GNULIB_CEILL = @GNULIB_CEILL@ +GNULIB_CHOWN = @GNULIB_CHOWN@ +GNULIB_CLOSE = @GNULIB_CLOSE@ +GNULIB_DUP2 = @GNULIB_DUP2@ +GNULIB_ENVIRON = @GNULIB_ENVIRON@ +GNULIB_EUIDACCESS = @GNULIB_EUIDACCESS@ +GNULIB_FCHDIR = @GNULIB_FCHDIR@ +GNULIB_FLOORF = @GNULIB_FLOORF@ +GNULIB_FLOORL = @GNULIB_FLOORL@ +GNULIB_FREXP = @GNULIB_FREXP@ +GNULIB_FREXPL = @GNULIB_FREXPL@ +GNULIB_FSYNC = @GNULIB_FSYNC@ +GNULIB_FTRUNCATE = @GNULIB_FTRUNCATE@ +GNULIB_GETCWD = @GNULIB_GETCWD@ +GNULIB_GETDOMAINNAME = @GNULIB_GETDOMAINNAME@ +GNULIB_GETDTABLESIZE = @GNULIB_GETDTABLESIZE@ +GNULIB_GETHOSTNAME = @GNULIB_GETHOSTNAME@ +GNULIB_GETLOADAVG = @GNULIB_GETLOADAVG@ +GNULIB_GETLOGIN_R = @GNULIB_GETLOGIN_R@ +GNULIB_GETPAGESIZE = @GNULIB_GETPAGESIZE@ +GNULIB_GETSUBOPT = @GNULIB_GETSUBOPT@ +GNULIB_GETUSERSHELL = @GNULIB_GETUSERSHELL@ +GNULIB_ISFINITE = @GNULIB_ISFINITE@ +GNULIB_ISINF = @GNULIB_ISINF@ +GNULIB_ISNAN = @GNULIB_ISNAN@ +GNULIB_ISNAND = @GNULIB_ISNAND@ +GNULIB_ISNANF = @GNULIB_ISNANF@ +GNULIB_ISNANL = @GNULIB_ISNANL@ +GNULIB_LCHOWN = @GNULIB_LCHOWN@ +GNULIB_LDEXPL = @GNULIB_LDEXPL@ +GNULIB_LINK = @GNULIB_LINK@ +GNULIB_LSEEK = @GNULIB_LSEEK@ +GNULIB_MALLOC_POSIX = @GNULIB_MALLOC_POSIX@ +GNULIB_MATHL = @GNULIB_MATHL@ +GNULIB_MBRLEN = @GNULIB_MBRLEN@ +GNULIB_MBRTOWC = @GNULIB_MBRTOWC@ +GNULIB_MBSCASECMP = @GNULIB_MBSCASECMP@ +GNULIB_MBSCASESTR = @GNULIB_MBSCASESTR@ +GNULIB_MBSCHR = @GNULIB_MBSCHR@ +GNULIB_MBSCSPN = @GNULIB_MBSCSPN@ +GNULIB_MBSINIT = @GNULIB_MBSINIT@ +GNULIB_MBSLEN = @GNULIB_MBSLEN@ +GNULIB_MBSNCASECMP = @GNULIB_MBSNCASECMP@ +GNULIB_MBSNLEN = @GNULIB_MBSNLEN@ +GNULIB_MBSNRTOWCS = @GNULIB_MBSNRTOWCS@ +GNULIB_MBSPBRK = @GNULIB_MBSPBRK@ +GNULIB_MBSPCASECMP = @GNULIB_MBSPCASECMP@ +GNULIB_MBSRCHR = @GNULIB_MBSRCHR@ +GNULIB_MBSRTOWCS = @GNULIB_MBSRTOWCS@ +GNULIB_MBSSEP = @GNULIB_MBSSEP@ +GNULIB_MBSSPN = @GNULIB_MBSSPN@ +GNULIB_MBSSTR = @GNULIB_MBSSTR@ +GNULIB_MBSTOK_R = @GNULIB_MBSTOK_R@ +GNULIB_MEMCHR = @GNULIB_MEMCHR@ +GNULIB_MEMMEM = @GNULIB_MEMMEM@ +GNULIB_MEMPCPY = @GNULIB_MEMPCPY@ +GNULIB_MEMRCHR = @GNULIB_MEMRCHR@ +GNULIB_MKDTEMP = @GNULIB_MKDTEMP@ +GNULIB_MKSTEMP = @GNULIB_MKSTEMP@ +GNULIB_PUTENV = @GNULIB_PUTENV@ +GNULIB_RANDOM_R = @GNULIB_RANDOM_R@ +GNULIB_RAWMEMCHR = @GNULIB_RAWMEMCHR@ +GNULIB_READLINK = @GNULIB_READLINK@ +GNULIB_REALLOC_POSIX = @GNULIB_REALLOC_POSIX@ +GNULIB_ROUND = @GNULIB_ROUND@ +GNULIB_ROUNDF = @GNULIB_ROUNDF@ +GNULIB_ROUNDL = @GNULIB_ROUNDL@ +GNULIB_RPMATCH = @GNULIB_RPMATCH@ +GNULIB_SETENV = @GNULIB_SETENV@ +GNULIB_SIGNBIT = @GNULIB_SIGNBIT@ +GNULIB_SLEEP = @GNULIB_SLEEP@ +GNULIB_STPCPY = @GNULIB_STPCPY@ +GNULIB_STPNCPY = @GNULIB_STPNCPY@ +GNULIB_STRCASESTR = @GNULIB_STRCASESTR@ +GNULIB_STRCHRNUL = @GNULIB_STRCHRNUL@ +GNULIB_STRDUP = @GNULIB_STRDUP@ +GNULIB_STRERROR = @GNULIB_STRERROR@ +GNULIB_STRNDUP = @GNULIB_STRNDUP@ +GNULIB_STRNLEN = @GNULIB_STRNLEN@ +GNULIB_STRPBRK = @GNULIB_STRPBRK@ +GNULIB_STRSEP = @GNULIB_STRSEP@ +GNULIB_STRSIGNAL = @GNULIB_STRSIGNAL@ +GNULIB_STRSTR = @GNULIB_STRSTR@ +GNULIB_STRTOD = @GNULIB_STRTOD@ +GNULIB_STRTOK_R = @GNULIB_STRTOK_R@ +GNULIB_STRTOLL = @GNULIB_STRTOLL@ +GNULIB_STRTOULL = @GNULIB_STRTOULL@ +GNULIB_STRVERSCMP = @GNULIB_STRVERSCMP@ +GNULIB_TRUNC = @GNULIB_TRUNC@ +GNULIB_TRUNCF = @GNULIB_TRUNCF@ +GNULIB_TRUNCL = @GNULIB_TRUNCL@ +GNULIB_UNISTD_H_SIGPIPE = @GNULIB_UNISTD_H_SIGPIPE@ +GNULIB_UNSETENV = @GNULIB_UNSETENV@ +GNULIB_WCRTOMB = @GNULIB_WCRTOMB@ +GNULIB_WCSNRTOMBS = @GNULIB_WCSNRTOMBS@ +GNULIB_WCSRTOMBS = @GNULIB_WCSRTOMBS@ +GNULIB_WCTOB = @GNULIB_WCTOB@ +GNULIB_WCWIDTH = @GNULIB_WCWIDTH@ +GNULIB_WRITE = @GNULIB_WRITE@ +GREP = @GREP@ +HAVE_ATOLL = @HAVE_ATOLL@ +HAVE_BTOWC = @HAVE_BTOWC@ +HAVE_CALLOC_POSIX = @HAVE_CALLOC_POSIX@ +HAVE_DECL_ACOSL = @HAVE_DECL_ACOSL@ +HAVE_DECL_ASINL = @HAVE_DECL_ASINL@ +HAVE_DECL_ATANL = @HAVE_DECL_ATANL@ +HAVE_DECL_COSL = @HAVE_DECL_COSL@ +HAVE_DECL_ENVIRON = @HAVE_DECL_ENVIRON@ +HAVE_DECL_EXPL = @HAVE_DECL_EXPL@ +HAVE_DECL_FREXPL = @HAVE_DECL_FREXPL@ +HAVE_DECL_GETLOADAVG = @HAVE_DECL_GETLOADAVG@ +HAVE_DECL_GETLOGIN_R = @HAVE_DECL_GETLOGIN_R@ +HAVE_DECL_LDEXPL = @HAVE_DECL_LDEXPL@ +HAVE_DECL_LOGL = @HAVE_DECL_LOGL@ +HAVE_DECL_MEMMEM = @HAVE_DECL_MEMMEM@ +HAVE_DECL_MEMRCHR = @HAVE_DECL_MEMRCHR@ +HAVE_DECL_SINL = @HAVE_DECL_SINL@ +HAVE_DECL_SQRTL = @HAVE_DECL_SQRTL@ +HAVE_DECL_STRDUP = @HAVE_DECL_STRDUP@ +HAVE_DECL_STRERROR = @HAVE_DECL_STRERROR@ +HAVE_DECL_STRNDUP = @HAVE_DECL_STRNDUP@ +HAVE_DECL_STRNLEN = @HAVE_DECL_STRNLEN@ +HAVE_DECL_STRSIGNAL = @HAVE_DECL_STRSIGNAL@ +HAVE_DECL_STRTOK_R = @HAVE_DECL_STRTOK_R@ +HAVE_DECL_TANL = @HAVE_DECL_TANL@ +HAVE_DECL_TRUNC = @HAVE_DECL_TRUNC@ +HAVE_DECL_TRUNCF = @HAVE_DECL_TRUNCF@ +HAVE_DECL_WCTOB = @HAVE_DECL_WCTOB@ +HAVE_DECL_WCWIDTH = @HAVE_DECL_WCWIDTH@ +HAVE_DUP2 = @HAVE_DUP2@ +HAVE_EUIDACCESS = @HAVE_EUIDACCESS@ +HAVE_FSYNC = @HAVE_FSYNC@ +HAVE_FTRUNCATE = @HAVE_FTRUNCATE@ +HAVE_GETDOMAINNAME = @HAVE_GETDOMAINNAME@ +HAVE_GETDTABLESIZE = @HAVE_GETDTABLESIZE@ +HAVE_GETHOSTNAME = @HAVE_GETHOSTNAME@ +HAVE_GETPAGESIZE = @HAVE_GETPAGESIZE@ +HAVE_GETSUBOPT = @HAVE_GETSUBOPT@ +HAVE_GETUSERSHELL = @HAVE_GETUSERSHELL@ +HAVE_GLOBAL_SYMBOL_PIPE = @HAVE_GLOBAL_SYMBOL_PIPE@ +HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ +HAVE_ISNAND = @HAVE_ISNAND@ +HAVE_ISNANF = @HAVE_ISNANF@ +HAVE_ISNANL = @HAVE_ISNANL@ +HAVE_ISWCNTRL = @HAVE_ISWCNTRL@ +HAVE_LINK = @HAVE_LINK@ +HAVE_LONG_LONG_INT = @HAVE_LONG_LONG_INT@ +HAVE_MALLOC_POSIX = @HAVE_MALLOC_POSIX@ +HAVE_MBRLEN = @HAVE_MBRLEN@ +HAVE_MBRTOWC = @HAVE_MBRTOWC@ +HAVE_MBSINIT = @HAVE_MBSINIT@ +HAVE_MBSNRTOWCS = @HAVE_MBSNRTOWCS@ +HAVE_MBSRTOWCS = @HAVE_MBSRTOWCS@ +HAVE_MEMPCPY = @HAVE_MEMPCPY@ +HAVE_MKDTEMP = @HAVE_MKDTEMP@ +HAVE_OS_H = @HAVE_OS_H@ +HAVE_RANDOM_H = @HAVE_RANDOM_H@ +HAVE_RANDOM_R = @HAVE_RANDOM_R@ +HAVE_RAWMEMCHR = @HAVE_RAWMEMCHR@ +HAVE_READLINK = @HAVE_READLINK@ +HAVE_REALLOC_POSIX = @HAVE_REALLOC_POSIX@ +HAVE_RPMATCH = @HAVE_RPMATCH@ +HAVE_SETENV = @HAVE_SETENV@ +HAVE_SIGNED_SIG_ATOMIC_T = @HAVE_SIGNED_SIG_ATOMIC_T@ +HAVE_SIGNED_WCHAR_T = @HAVE_SIGNED_WCHAR_T@ +HAVE_SIGNED_WINT_T = @HAVE_SIGNED_WINT_T@ +HAVE_SLEEP = @HAVE_SLEEP@ +HAVE_STDINT_H = @HAVE_STDINT_H@ +HAVE_STPCPY = @HAVE_STPCPY@ +HAVE_STPNCPY = @HAVE_STPNCPY@ +HAVE_STRCASESTR = @HAVE_STRCASESTR@ +HAVE_STRCHRNUL = @HAVE_STRCHRNUL@ +HAVE_STRNDUP = @HAVE_STRNDUP@ +HAVE_STRPBRK = @HAVE_STRPBRK@ +HAVE_STRSEP = @HAVE_STRSEP@ +HAVE_STRTOD = @HAVE_STRTOD@ +HAVE_STRTOLL = @HAVE_STRTOLL@ +HAVE_STRTOULL = @HAVE_STRTOULL@ +HAVE_STRUCT_RANDOM_DATA = @HAVE_STRUCT_RANDOM_DATA@ +HAVE_STRVERSCMP = @HAVE_STRVERSCMP@ +HAVE_SYS_BITYPES_H = @HAVE_SYS_BITYPES_H@ +HAVE_SYS_INTTYPES_H = @HAVE_SYS_INTTYPES_H@ +HAVE_SYS_LOADAVG_H = @HAVE_SYS_LOADAVG_H@ +HAVE_SYS_PARAM_H = @HAVE_SYS_PARAM_H@ +HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HAVE_UNISTD_H = @HAVE_UNISTD_H@ +HAVE_UNSETENV = @HAVE_UNSETENV@ +HAVE_UNSIGNED_LONG_LONG_INT = @HAVE_UNSIGNED_LONG_LONG_INT@ +HAVE_WCHAR_H = @HAVE_WCHAR_H@ +HAVE_WCRTOMB = @HAVE_WCRTOMB@ +HAVE_WCSNRTOMBS = @HAVE_WCSNRTOMBS@ +HAVE_WCSRTOMBS = @HAVE_WCSRTOMBS@ +HAVE_WCTYPE_H = @HAVE_WCTYPE_H@ +HAVE_WINT_T = @HAVE_WINT_T@ +HAVE__BOOL = @HAVE__BOOL@ +HEXVERSION = @HEXVERSION@ +ICONV_H = @ICONV_H@ +INCLUDE_NEXT = @INCLUDE_NEXT@ +INCLUDE_NEXT_AS_FIRST_DIRECTIVE = @INCLUDE_NEXT_AS_FIRST_DIRECTIVE@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTESTS_LIBDEPS = @LIBTESTS_LIBDEPS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LOCALCHARSET_TESTS_ENVIRONMENT = @LOCALCHARSET_TESTS_ENVIRONMENT@ +LOCALE_FR = @LOCALE_FR@ +LOCALE_FR_UTF8 = @LOCALE_FR_UTF8@ +LOCALE_H = @LOCALE_H@ +LOCALE_JA = @LOCALE_JA@ +LOCALE_TR_UTF8 = @LOCALE_TR_UTF8@ +LOCALE_ZH_CN = @LOCALE_ZH_CN@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = env LANG= LC_MESSAGES= LC_ALL= LANGUAGE= @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NAMESPACING = @NAMESPACING@ +NEXT_AS_FIRST_DIRECTIVE_ERRNO_H = @NEXT_AS_FIRST_DIRECTIVE_ERRNO_H@ +NEXT_AS_FIRST_DIRECTIVE_FLOAT_H = @NEXT_AS_FIRST_DIRECTIVE_FLOAT_H@ +NEXT_AS_FIRST_DIRECTIVE_ICONV_H = @NEXT_AS_FIRST_DIRECTIVE_ICONV_H@ +NEXT_AS_FIRST_DIRECTIVE_LOCALE_H = @NEXT_AS_FIRST_DIRECTIVE_LOCALE_H@ +NEXT_AS_FIRST_DIRECTIVE_MATH_H = @NEXT_AS_FIRST_DIRECTIVE_MATH_H@ +NEXT_AS_FIRST_DIRECTIVE_STDINT_H = @NEXT_AS_FIRST_DIRECTIVE_STDINT_H@ +NEXT_AS_FIRST_DIRECTIVE_STDLIB_H = @NEXT_AS_FIRST_DIRECTIVE_STDLIB_H@ +NEXT_AS_FIRST_DIRECTIVE_STRING_H = @NEXT_AS_FIRST_DIRECTIVE_STRING_H@ +NEXT_AS_FIRST_DIRECTIVE_UNISTD_H = @NEXT_AS_FIRST_DIRECTIVE_UNISTD_H@ +NEXT_AS_FIRST_DIRECTIVE_WCHAR_H = @NEXT_AS_FIRST_DIRECTIVE_WCHAR_H@ +NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H = @NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H@ +NEXT_ERRNO_H = @NEXT_ERRNO_H@ +NEXT_FLOAT_H = @NEXT_FLOAT_H@ +NEXT_ICONV_H = @NEXT_ICONV_H@ +NEXT_LOCALE_H = @NEXT_LOCALE_H@ +NEXT_MATH_H = @NEXT_MATH_H@ +NEXT_STDINT_H = @NEXT_STDINT_H@ +NEXT_STDLIB_H = @NEXT_STDLIB_H@ +NEXT_STRING_H = @NEXT_STRING_H@ +NEXT_UNISTD_H = @NEXT_UNISTD_H@ +NEXT_WCHAR_H = @NEXT_WCHAR_H@ +NEXT_WCTYPE_H = @NEXT_WCTYPE_H@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PRAGMA_SYSTEM_HEADER = @PRAGMA_SYSTEM_HEADER@ +PTRDIFF_T_SUFFIX = @PTRDIFF_T_SUFFIX@ +RANLIB = @RANLIB@ +RC = @RC@ +RELOCATABLE = @RELOCATABLE@ +REPLACE_BTOWC = @REPLACE_BTOWC@ +REPLACE_CEILF = @REPLACE_CEILF@ +REPLACE_CEILL = @REPLACE_CEILL@ +REPLACE_CHOWN = @REPLACE_CHOWN@ +REPLACE_CLOSE = @REPLACE_CLOSE@ +REPLACE_FCHDIR = @REPLACE_FCHDIR@ +REPLACE_FLOORF = @REPLACE_FLOORF@ +REPLACE_FLOORL = @REPLACE_FLOORL@ +REPLACE_FREXP = @REPLACE_FREXP@ +REPLACE_FREXPL = @REPLACE_FREXPL@ +REPLACE_GETCWD = @REPLACE_GETCWD@ +REPLACE_GETPAGESIZE = @REPLACE_GETPAGESIZE@ +REPLACE_HUGE_VAL = @REPLACE_HUGE_VAL@ +REPLACE_ICONV = @REPLACE_ICONV@ +REPLACE_ICONV_OPEN = @REPLACE_ICONV_OPEN@ +REPLACE_ICONV_UTF = @REPLACE_ICONV_UTF@ +REPLACE_ISFINITE = @REPLACE_ISFINITE@ +REPLACE_ISINF = @REPLACE_ISINF@ +REPLACE_ISNAN = @REPLACE_ISNAN@ +REPLACE_ISWCNTRL = @REPLACE_ISWCNTRL@ +REPLACE_LCHOWN = @REPLACE_LCHOWN@ +REPLACE_LDEXPL = @REPLACE_LDEXPL@ +REPLACE_LSEEK = @REPLACE_LSEEK@ +REPLACE_MBRLEN = @REPLACE_MBRLEN@ +REPLACE_MBRTOWC = @REPLACE_MBRTOWC@ +REPLACE_MBSINIT = @REPLACE_MBSINIT@ +REPLACE_MBSNRTOWCS = @REPLACE_MBSNRTOWCS@ +REPLACE_MBSRTOWCS = @REPLACE_MBSRTOWCS@ +REPLACE_MBSTATE_T = @REPLACE_MBSTATE_T@ +REPLACE_MEMCHR = @REPLACE_MEMCHR@ +REPLACE_MEMMEM = @REPLACE_MEMMEM@ +REPLACE_MKSTEMP = @REPLACE_MKSTEMP@ +REPLACE_NAN = @REPLACE_NAN@ +REPLACE_PUTENV = @REPLACE_PUTENV@ +REPLACE_ROUND = @REPLACE_ROUND@ +REPLACE_ROUNDF = @REPLACE_ROUNDF@ +REPLACE_ROUNDL = @REPLACE_ROUNDL@ +REPLACE_SIGNBIT = @REPLACE_SIGNBIT@ +REPLACE_SIGNBIT_USING_GCC = @REPLACE_SIGNBIT_USING_GCC@ +REPLACE_STRCASESTR = @REPLACE_STRCASESTR@ +REPLACE_STRDUP = @REPLACE_STRDUP@ +REPLACE_STRERROR = @REPLACE_STRERROR@ +REPLACE_STRSIGNAL = @REPLACE_STRSIGNAL@ +REPLACE_STRSTR = @REPLACE_STRSTR@ +REPLACE_STRTOD = @REPLACE_STRTOD@ +REPLACE_TRUNCL = @REPLACE_TRUNCL@ +REPLACE_WCRTOMB = @REPLACE_WCRTOMB@ +REPLACE_WCSNRTOMBS = @REPLACE_WCSNRTOMBS@ +REPLACE_WCSRTOMBS = @REPLACE_WCSRTOMBS@ +REPLACE_WCTOB = @REPLACE_WCTOB@ +REPLACE_WCWIDTH = @REPLACE_WCWIDTH@ +REPLACE_WRITE = @REPLACE_WRITE@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@ +SIZE_T_SUFFIX = @SIZE_T_SUFFIX@ +STDBOOL_H = @STDBOOL_H@ +STDINT_H = @STDINT_H@ +STRIP = @STRIP@ + +# Documentation in DVI format. + +# Override of automake's definition: +#TEXI2DVI = @TEXI2DVI@ +TEXI2DVI = @TEXI2DVI@ $(TEXINCLUDES) +UNISTD_H_HAVE_WINSOCK2_H = @UNISTD_H_HAVE_WINSOCK2_H@ +VERSION = @VERSION@ +VOID_UNSETENV = @VOID_UNSETENV@ +WCHAR_H = @WCHAR_H@ +WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@ +WCTYPE_H = @WCTYPE_H@ +WINT_T_SUFFIX = @WINT_T_SUFFIX@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +gl_LIBOBJS = @gl_LIBOBJS@ +gl_LTLIBOBJS = @gl_LTLIBOBJS@ +gltests_LIBOBJS = @gltests_LIBOBJS@ +gltests_LTLIBOBJS = @gltests_LTLIBOBJS@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = 1.5 gnits +EXTRA_DIST = +MOSTLYCLEANFILES = + +# List of -I options referring to directories that contain texinfo sources +# used by this directory. +# Should contain at least one -I option, to work around a bug in texi2dvi 1.13, +# see . +TEXINCLUDES = -I . +MAKEINFOFLAGS = $(TEXINCLUDES) --no-split +info_TEXINFOS = libunistring.texi +# List of texinfo sources @included by libunistring.texi, excluding version.texi. +libunistring_TEXINFOS = \ + unitypes.texi unistr.texi uniconv.texi unistdio.texi uniname.texi \ + unictype.texi uniwidth.texi uniwbrk.texi unilbrk.texi uninorm.texi \ + unicase.texi uniregex.texi \ + gpl.texi lgpl.texi fdl.texi + + +# CLEANFILES: libunistring.{dvi,ps,pdf,html} are already known to automake. +MAINTAINERCLEANFILES = libunistring_*.html + +# Documentation in Portable Document Format. + +# Override of automake's definition: +#TEXI2PDF = @TEXI2DVI@ --pdf +TEXI2PDF = @TEXI2DVI@ --pdf $(TEXINCLUDES) + +# Documentation in HTML format. +TEXI2HTML = @PERL@ $(top_srcdir)/build-aux/texi2html +all: all-am + +.SUFFIXES: +.SUFFIXES: .dvi .html .info .pdf .ps .texi +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnits doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnits doc/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +.texi.info: + restore=: && backupdir="$(am__leading_dot)am$$$$" && \ + am__cwd=`pwd` && $(am__cd) $(srcdir) && \ + rm -rf $$backupdir && mkdir $$backupdir && \ + if ($(MAKEINFO) --version) >/dev/null 2>&1; then \ + for f in $@ $@-[0-9] $@-[0-9][0-9] $(@:.info=).i[0-9] $(@:.info=).i[0-9][0-9]; do \ + if test -f $$f; then mv $$f $$backupdir; restore=mv; else :; fi; \ + done; \ + else :; fi && \ + cd "$$am__cwd"; \ + if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $@ $<; \ + then \ + rc=0; \ + $(am__cd) $(srcdir); \ + else \ + rc=$$?; \ + $(am__cd) $(srcdir) && \ + $$restore $$backupdir/* `echo "./$@" | sed 's|[^/]*$$||'`; \ + fi; \ + rm -rf $$backupdir; exit $$rc + +.texi.dvi: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2DVI) $< + +.texi.pdf: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2PDF) $< + +.texi.html: + rm -rf $(@:.html=.htp) + if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $(@:.html=.htp) $<; \ + then \ + rm -rf $@; \ + if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \ + mv $(@:.html=) $@; else mv $(@:.html=.htp) $@; fi; \ + else \ + if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \ + rm -rf $(@:.html=); else rm -Rf $(@:.html=.htp) $@; fi; \ + exit 1; \ + fi +$(srcdir)/libunistring.info: libunistring.texi $(srcdir)/version.texi $(libunistring_TEXINFOS) +libunistring.dvi: libunistring.texi $(srcdir)/version.texi $(libunistring_TEXINFOS) +libunistring.pdf: libunistring.texi $(srcdir)/version.texi $(libunistring_TEXINFOS) +$(srcdir)/version.texi: $(srcdir)/stamp-vti + +mostlyclean-vti: + -rm -f vti.tmp + +maintainer-clean-vti: + -rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi +.dvi.ps: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + $(DVIPS) -o $@ $< + +uninstall-dvi-am: + @$(NORMAL_UNINSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(dvidir)/$$f'"; \ + rm -f "$(DESTDIR)$(dvidir)/$$f"; \ + done + +uninstall-html-am: + @$(NORMAL_UNINSTALL) + @list='$(HTMLS)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$f'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$f"; \ + done + +uninstall-info-am: + @$(PRE_UNINSTALL) + @if test -d '$(DESTDIR)$(infodir)' && \ + (install-info --version && \ + install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' --remove '$(DESTDIR)$(infodir)/$$relfile'"; \ + if install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$$relfile"; \ + then :; else test ! -f "$(DESTDIR)$(infodir)/$$relfile" || exit 1; fi; \ + done; \ + else :; fi + @$(NORMAL_UNINSTALL) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + relfile_i=`echo "$$relfile" | sed 's|\.info$$||;s|$$|.i|'`; \ + (if test -d "$(DESTDIR)$(infodir)" && cd "$(DESTDIR)$(infodir)"; then \ + echo " cd '$(DESTDIR)$(infodir)' && rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]"; \ + rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]; \ + else :; fi); \ + done + +uninstall-pdf-am: + @$(NORMAL_UNINSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(pdfdir)/$$f'"; \ + rm -f "$(DESTDIR)$(pdfdir)/$$f"; \ + done + +uninstall-ps-am: + @$(NORMAL_UNINSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(psdir)/$$f'"; \ + rm -f "$(DESTDIR)$(psdir)/$$f"; \ + done + +dist-info: $(INFO_DEPS) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; \ + for base in $$list; do \ + case $$base in \ + $(srcdir)/*) base=`echo "$$base" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$base; then d=.; else d=$(srcdir); fi; \ + base_i=`echo "$$base" | sed 's|\.info$$||;s|$$|.i|'`; \ + for file in $$d/$$base $$d/$$base-[0-9] $$d/$$base-[0-9][0-9] $$d/$$base_i[0-9] $$d/$$base_i[0-9][0-9]; do \ + if test -f $$file; then \ + relfile=`expr "$$file" : "$$d/\(.*\)"`; \ + test -f "$(distdir)/$$relfile" || \ + cp -p $$file "$(distdir)/$$relfile"; \ + else :; fi; \ + done; \ + done + +mostlyclean-aminfo: + -rm -rf libunistring.am libunistring.aux libunistring.cp libunistring.cps \ + libunistring.fn libunistring.ky libunistring.kys \ + libunistring.log libunistring.pg libunistring.pgs \ + libunistring.tmp libunistring.toc libunistring.tp \ + libunistring.vr libunistring.vrs + +clean-aminfo: + -test -z "libunistring.dvi libunistring.pdf libunistring.ps libunistring.html" \ + || rm -rf libunistring.dvi libunistring.pdf libunistring.ps libunistring.html + +maintainer-clean-aminfo: + @list='$(INFO_DEPS)'; for i in $$list; do \ + i_i=`echo "$$i" | sed 's|\.info$$||;s|$$|.i|'`; \ + echo " rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]"; \ + rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]; \ + done +tags: TAGS +TAGS: + +ctags: CTAGS +CTAGS: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-info dist-hook +check-am: all-am +check: check-am +all-am: Makefile $(INFO_DEPS) all-local +installdirs: installdirs-local + for dir in "$(DESTDIR)$(infodir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-aminfo clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: $(DVIS) + +html: html-am + +html-am: $(HTMLS) html-local + +info: info-am + +info-am: $(INFO_DEPS) + +install-data-am: install-data-local install-info-am + +install-dvi: install-dvi-am + +install-dvi-am: $(DVIS) + @$(NORMAL_INSTALL) + test -z "$(dvidir)" || $(MKDIR_P) "$(DESTDIR)$(dvidir)" + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(dvidir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(dvidir)" || exit $$?; \ + done +install-exec-am: + +install-html-am: $(HTMLS) + @$(NORMAL_INSTALL) + test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)" + @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \ + $(am__strip_dir) \ + if test -d "$$d$$p"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \ + echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \ + else \ + list2="$$list2 $$d$$p"; \ + fi; \ + done; \ + test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; } +install-info: install-info-am + +install-info-am: $(INFO_DEPS) + @$(NORMAL_INSTALL) + test -z "$(infodir)" || $(MKDIR_P) "$(DESTDIR)$(infodir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + file_i=`echo "$$file" | sed 's|\.info$$||;s|$$|.i|'`; \ + for ifile in $$d/$$file $$d/$$file-[0-9] $$d/$$file-[0-9][0-9] \ + $$d/$$file_i[0-9] $$d/$$file_i[0-9][0-9] ; do \ + if test -f $$ifile; then \ + echo "$$ifile"; \ + else : ; fi; \ + done; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done + @$(POST_INSTALL) + @if (install-info --version && \ + install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' '$(DESTDIR)$(infodir)/$$relfile'";\ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$$relfile" || :;\ + done; \ + else : ; fi +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: $(PDFS) + @$(NORMAL_INSTALL) + test -z "$(pdfdir)" || $(MKDIR_P) "$(DESTDIR)$(pdfdir)" + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pdfdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pdfdir)" || exit $$?; done +install-ps: install-ps-am + +install-ps-am: $(PSS) + @$(NORMAL_INSTALL) + test -z "$(psdir)" || $(MKDIR_P) "$(DESTDIR)$(psdir)" + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(psdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(psdir)" || exit $$?; done +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-aminfo mostlyclean-generic \ + mostlyclean-libtool mostlyclean-vti + +pdf: pdf-am + +pdf-am: $(PDFS) + +ps: ps-am + +ps-am: $(PSS) + +uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-local uninstall-pdf-am uninstall-ps-am + +.MAKE: install-am install-strip + +.PHONY: all all-am all-local check check-am clean clean-aminfo \ + clean-generic clean-libtool dist-hook dist-info distclean \ + distclean-generic distclean-libtool distdir dvi dvi-am html \ + html-am html-local info info-am install install-am \ + install-data install-data-am install-data-local install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-local maintainer-clean maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti mostlyclean \ + mostlyclean-aminfo mostlyclean-generic mostlyclean-libtool \ + mostlyclean-vti pdf pdf-am ps ps-am uninstall uninstall-am \ + uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-local uninstall-pdf-am uninstall-ps-am + + +# The dependencies of stamp-vti generated by automake are incomplete. +# So we have to duplicate the entire rule which would otherwise be generated +# by automake. +$(srcdir)/stamp-vti: $(info_TEXINFOS) $(libunistring_TEXINFOS) $(top_srcdir)/version.sh + (dir=.; test -f ./libunistring.texi || dir=$(srcdir); \ + set `$(SHELL) $(top_srcdir)/build-aux/mdate-sh $$dir/libunistring.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp + cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + rm -f vti.tmp + cp $(srcdir)/version.texi $@ + +# We distribute only the split HTML documentation. +# The user can generate the others, via +# make libunistring.ps +# make libunistring.pdf +# make libunistring.html + +all-local: html-local +install-data-local: install-html +installdirs-local: installdirs-html +uninstall-local: uninstall-html +dist-hook: dist-html + +html-local: html-split +# Override of automake's definition. The HTML files we want to distribute are +# not the ones that automake knows about, and we cannot define HTMLS to a value +# containing wildcards. +install-html: install-html-split + @: +uninstall-html: uninstall-html-split +dist-html: dist-html-split + +# The install-dvi target is already defined by automake. + +installdirs-dvi: + $(mkdir_p) $(DESTDIR)$(dvidir) + +uninstall-dvi: + $(RM) $(DESTDIR)$(dvidir)/libunistring.dvi + +libunistring.ps: libunistring.dvi + $(DVIPS) -o $@ `if test -f libunistring.dvi; then echo libunistring.dvi; else echo $(srcdir)/libunistring.dvi; fi` + +# The install-ps target is already defined by automake. + +installdirs-ps: + $(mkdir_p) $(DESTDIR)$(psdir) + +uninstall-ps: + $(RM) $(DESTDIR)$(psdir)/libunistring.ps + +# The install-pdf target is already defined by automake. + +installdirs-pdf: + $(mkdir_p) $(DESTDIR)$(pdfdir) + +uninstall-pdf: + $(RM) $(DESTDIR)$(pdfdir)/libunistring.pdf + +html-monolithic: libunistring.html +html-split: libunistring_toc.html + +# Override of automake's definition. +# We want to use texi2html, not makeinfo --html. +libunistring.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -monolithic `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` + +libunistring_toc.html: libunistring.texi version.texi $(libunistring_TEXINFOS) + case "@PERL@" in \ + *"/missing perl") \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` || exit 0 ;; \ + *) $(RM) libunistring_*.html ; \ + $(TEXI2HTML) $(TEXINCLUDES) -no-sec-nav -no-menu -toc-links -number -split_chapter `if test -f libunistring.texi; then echo libunistring.texi; else echo $(srcdir)/libunistring.texi; fi` ;; \ + esac \ + && { mv libunistring/libunistring.html libunistring_toc.html; \ + mv libunistring/*.html .; \ + rmdir libunistring; \ + } + +install-html-monolithic: libunistring.html + $(mkdir_p) $(DESTDIR)$(htmldir) + $(INSTALL_DATA) `if test -f libunistring.html; then echo .; else echo $(srcdir); fi`/libunistring.html $(DESTDIR)$(htmldir)/libunistring.html + +install-html-split: libunistring_toc.html + $(mkdir_p) $(DESTDIR)$(htmldir) + for file in `if test -f libunistring_toc.html; then echo .; else echo $(srcdir); fi`/libunistring_*.html; do \ + $(INSTALL_DATA) $$file $(DESTDIR)$(htmldir)/`basename $$file`; \ + done + +installdirs-html: + $(mkdir_p) $(DESTDIR)$(htmldir) + +uninstall-html-monolithic: + $(RM) $(DESTDIR)$(htmldir)/libunistring.html + +uninstall-html-split: + $(RM) $(DESTDIR)$(htmldir)/libunistring_*.html + +dist-html-monolithic: + $(mkdir_p) $(distdir)/ + file=libunistring.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + cp -p $$d/$$file $(distdir)/$$file || exit 1 + +# We would like to put libunistring_*.html into EXTRA_DIST, but it doesn't work. +dist-html-split: + $(mkdir_p) $(distdir)/ + file=libunistring_toc.html; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + for file in `cd $$d && echo libunistring_*.html`; do \ + cp -p $$d/$$file $(distdir)/$$file || exit 1; \ + done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/fdl.texi b/doc/fdl.texi new file mode 100644 index 00000000..8805f1a4 --- /dev/null +++ b/doc/fdl.texi @@ -0,0 +1,506 @@ +@c The GNU Free Documentation License. +@center Version 1.3, 3 November 2008 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +@uref{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@enumerate 0 +@item +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document @dfn{free} in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +@item +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +@sc{ascii} without markup, Texinfo input format, La@TeX{} input +format, @acronym{SGML} or @acronym{XML} using a publicly available +@acronym{DTD}, and standard-conforming simple @acronym{HTML}, +PostScript or @acronym{PDF} designed for human modification. Examples +of transparent image formats include @acronym{PNG}, @acronym{XCF} and +@acronym{JPG}. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, @acronym{SGML} or +@acronym{XML} for which the @acronym{DTD} and/or processing tools are +not generally available, and the machine-generated @acronym{HTML}, +PostScript or @acronym{PDF} produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +The ``publisher'' means any person or entity that distributes copies +of the Document to the public. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +@item +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +@item +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + +@item +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +@enumerate A +@item +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +@item +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +@item +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +@item +Preserve all the copyright notices of the Document. + +@item +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +@item +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +@item +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +@item +Include an unaltered copy of this License. + +@item +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +@item +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +@item +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +@item +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +@item +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. + +@item +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. + +@item +Preserve any Warranty Disclaimers. +@end enumerate + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + +@item +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' + +@item +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + +@item +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. + +@item +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. + +@item +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. + +@item +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +@uref{http://www.gnu.org/copyleft/}. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. + +@item +RELICENSING + +``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the +site means any set of copyrightable works thus published on the MMC +site. + +``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. + +``Incorporate'' means to publish or republish a Document, in whole or +in part, as part of another Document. + +An MMC is ``eligible for relicensing'' if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. + +@end enumerate + +@page +@heading ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +@smallexample +@group + Copyright (C) @var{year} @var{your name}. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +@end group +@end smallexample + +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with@dots{}Texts.'' line with this: + +@smallexample +@group + with the Invariant Sections being @var{list their titles}, with + the Front-Cover Texts being @var{list}, and with the Back-Cover Texts + being @var{list}. +@end group +@end smallexample + +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +@c Local Variables: +@c ispell-local-pdict: "ispell-dict" +@c End: + diff --git a/doc/gpl.texi b/doc/gpl.texi new file mode 100644 index 00000000..97a17e19 --- /dev/null +++ b/doc/gpl.texi @@ -0,0 +1,717 @@ +@c The GNU General Public License. +@center Version 3, 29 June 2007 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2007 Free Software Foundation, Inc. @url{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. +@end display + +@heading Preamble + +The GNU General Public License is a free, copyleft license for +software and other kinds of works. + +The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom +to share and change all versions of a program---to make sure it remains +free software for all its users. We, the Free Software Foundation, +use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the +software, or if you modify it: responsibilities to respect the freedom +of others. + +For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, +receive or can get the source code. And you must show them these +terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + +Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those +domains in future versions of the GPL, as needed to protect the +freedom of users. + +Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish +to avoid the special danger that patents applied to a free program +could make it effectively proprietary. To prevent this, the GPL +assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and +modification follow. + +@heading TERMS AND CONDITIONS + +@enumerate 0 +@item Definitions. + +``This License'' refers to version 3 of the GNU General Public License. + +``Copyright'' also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. + +``The Program'' refers to any copyrightable work licensed under this +License. Each licensee is addressed as ``you''. ``Licensees'' and +``recipients'' may be individuals or organizations. + +To ``modify'' a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a ``modified version'' of +the earlier work or a work ``based on'' the earlier work. + +A ``covered work'' means either the unmodified Program or a work based +on the Program. + +To ``propagate'' a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + +To ``convey'' a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. + +An interactive user interface displays ``Appropriate Legal Notices'' to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +@item Source Code. + +The ``source code'' for a work means the preferred form of the work for +making modifications to it. ``Object code'' means any non-source form +of a work. + +A ``Standard Interface'' means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + +The ``System Libraries'' of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +``Major Component'', in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + +The ``Corresponding Source'' for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same +work. + +@item Basic Permissions. + +All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + +@item Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. + +@item Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +@item Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: + +@enumerate a +@item +The work must carry prominent notices stating that you modified it, +and giving a relevant date. + +@item +The work must carry prominent notices stating that it is released +under this License and any conditions added under section 7. This +requirement modifies the requirement in section 4 to ``keep intact all +notices''. + +@item +You must license the entire work, as a whole, under this License to +anyone who comes into possession of a copy. This License will +therefore apply, along with any applicable section 7 additional terms, +to the whole of the work, and all its parts, regardless of how they +are packaged. This License gives no permission to license the work in +any other way, but it does not invalidate such permission if you have +separately received it. + +@item +If the work has interactive user interfaces, each must display +Appropriate Legal Notices; however, if the Program has interactive +interfaces that do not display Appropriate Legal Notices, your work +need not make them do so. +@end enumerate + +A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +``aggregate'' if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +@item Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: + +@enumerate a +@item +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by the +Corresponding Source fixed on a durable physical medium customarily +used for software interchange. + +@item +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by a written +offer, valid for at least three years and valid for as long as you +offer spare parts or customer support for that product model, to give +anyone who possesses the object code either (1) a copy of the +Corresponding Source for all the software in the product that is +covered by this License, on a durable physical medium customarily used +for software interchange, for a price no more than your reasonable +cost of physically performing this conveying of source, or (2) access +to copy the Corresponding Source from a network server at no charge. + +@item +Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is +allowed only occasionally and noncommercially, and only if you +received the object code with such an offer, in accord with subsection +6b. + +@item +Convey the object code by offering access from a designated place +(gratis or for a charge), and offer equivalent access to the +Corresponding Source in the same way through the same place at no +further charge. You need not require recipients to copy the +Corresponding Source along with the object code. If the place to copy +the object code is a network server, the Corresponding Source may be +on a different server (operated by you or a third party) that supports +equivalent copying facilities, provided you maintain clear directions +next to the object code saying where to find the Corresponding Source. +Regardless of what server hosts the Corresponding Source, you remain +obligated to ensure that it is available for as long as needed to +satisfy these requirements. + +@item +Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of +the work are being offered to the general public at no charge under +subsection 6d. + +@end enumerate + +A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + +A ``User Product'' is either (1) a ``consumer product'', which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +``normally used'' refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. + +``Installation Information'' for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. + +Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +@item Additional Terms. + +``Additional permissions'' are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: + +@enumerate a +@item +Disclaiming warranty or limiting liability differently from the terms +of sections 15 and 16 of this License; or + +@item +Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices +displayed by works containing it; or + +@item +Prohibiting misrepresentation of the origin of that material, or +requiring that modified versions of such material be marked in +reasonable ways as different from the original version; or + +@item +Limiting the use for publicity purposes of names of licensors or +authors of the material; or + +@item +Declining to grant rights under trademark law for use of some trade +names, trademarks, or service marks; or + +@item +Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with +contractual assumptions of liability to the recipient, for any +liability that these contractual assumptions directly impose on those +licensors and authors. +@end enumerate + +All other non-permissive additional terms are considered ``further +restrictions'' within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. + +@item Termination. + +You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +@item Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +@item Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + +An ``entity transaction'' is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +@item Patents. + +A ``contributor'' is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's ``contributor version''. + +A contributor's ``essential patent claims'' are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, ``control'' includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + +In the following three paragraphs, a ``patent license'' is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To ``grant'' such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + +If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. ``Knowingly relying'' means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + +A patent license is ``discriminatory'' if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +@item No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey +a covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree +to terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. + +@item Use with the GNU Affero General Public License. + +Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + +@item Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU General Public +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that numbered version or +of any later version published by the Free Software Foundation. If +the Program does not specify a version number of the GNU General +Public License, you may choose any version ever published by the Free +Software Foundation. + +If the Program specifies that a proxy can decide which future versions +of the GNU General Public License can be used, that proxy's public +statement of acceptance of a version permanently authorizes you to +choose that version for the Program. + +Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +@item Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS'' WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + +@item Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +@item Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +@end enumerate + +@heading END OF TERMS AND CONDITIONS + +@heading How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the ``copyright'' line and a pointer to where the full notice is found. + +@smallexample +@var{one line to give the program's name and a brief idea of what it does.} +Copyright (C) @var{year} @var{name of author} + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see @url{http://www.gnu.org/licenses/}. +@end smallexample + +Also add information on how to contact you by electronic and paper mail. + +If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + +@smallexample +@var{program} Copyright (C) @var{year} @var{name of author} +This program comes with ABSOLUTELY NO WARRANTY; for details type @samp{show w}. +This is free software, and you are welcome to redistribute it +under certain conditions; type @samp{show c} for details. +@end smallexample + +The hypothetical commands @samp{show w} and @samp{show c} should show +the appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an ``about box''. + +You should also get your employer (if you work as a programmer) or school, +if any, to sign a ``copyright disclaimer'' for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +@url{http://www.gnu.org/licenses/}. + +The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use +the GNU Lesser General Public License instead of this License. But +first, please read @url{http://www.gnu.org/philosophy/why-not-lgpl.html}. diff --git a/doc/lgpl.texi b/doc/lgpl.texi new file mode 100644 index 00000000..c29a6fb4 --- /dev/null +++ b/doc/lgpl.texi @@ -0,0 +1,190 @@ +@c The GNU Lesser General Public License. +@center Version 3, 29 June 2007 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2007 Free Software Foundation, Inc. @url{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. +@end display + +This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + +@enumerate 0 +@item Additional Definitions. + +As used herein, ``this License'' refers to version 3 of the GNU Lesser +General Public License, and the ``GNU GPL'' refers to version 3 of the GNU +General Public License. + +``The Library'' refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + +An ``Application'' is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + +A ``Combined Work'' is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the ``Linked +Version''. + +The ``Minimal Corresponding Source'' for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + +The ``Corresponding Application Code'' for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + +@item Exception to Section 3 of the GNU GPL. + +You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + +@item Conveying Modified Versions. + +If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + +@enumerate a +@item +under this License, provided that you make a good faith effort to +ensure that, in the event an Application does not supply the +function or data, the facility still operates, and performs +whatever part of its purpose remains meaningful, or + +@item +under the GNU GPL, with none of the additional permissions of +this License applicable to that copy. +@end enumerate + +@item Object Code Incorporating Material from Library Header Files. + +The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + +@enumerate a +@item +Give prominent notice with each copy of the object code that the +Library is used in it and that the Library and its use are +covered by this License. +@item +Accompany the object code with a copy of the GNU GPL and this license +document. +@end enumerate + +@item Combined Works. + +You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + +@enumerate a +@item +Give prominent notice with each copy of the Combined Work that +the Library is used in it and that the Library and its use are +covered by this License. +@item +Accompany the Combined Work with a copy of the GNU GPL and this license +document. +@item +For a Combined Work that displays copyright notices during +execution, include the copyright notice for the Library among +these notices, as well as a reference directing the user to the +copies of the GNU GPL and this license document. +@item +Do one of the following: + +@enumerate 0 +@item +Convey the Minimal Corresponding Source under the terms of this +License, and the Corresponding Application Code in a form +suitable for, and under terms that permit, the user to +recombine or relink the Application with a modified version of +the Linked Version to produce a modified Combined Work, in the +manner specified by section 6 of the GNU GPL for conveying +Corresponding Source. +@item +Use a suitable shared library mechanism for linking with the +Library. A suitable mechanism is one that (a) uses at run time +a copy of the Library already present on the user's computer +system, and (b) will operate properly with a modified version +of the Library that is interface-compatible with the Linked +Version. +@end enumerate + +@item +Provide Installation Information, but only if you would otherwise +be required to provide such information under section 6 of the +GNU GPL, and only to the extent that such information is +necessary to install and execute a modified version of the +Combined Work produced by recombining or relinking the +Application with a modified version of the Linked Version. (If +you use option 4d0, the Installation Information must accompany +the Minimal Corresponding Source and Corresponding Application +Code. If you use option 4d1, you must provide the Installation +Information in the manner specified by section 6 of the GNU GPL +for conveying Corresponding Source.) +@end enumerate + +@item Combined Libraries. + +You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + +@enumerate a +@item +Accompany the combined library with a copy of the same work based +on the Library, uncombined with any other library facilities, +conveyed under the terms of this License. +@item +Give prominent notice with the combined library that part of it +is a work based on the Library, and explaining where to find the +accompanying uncombined form of the same work. +@end enumerate + +@item Revised Versions of the GNU Lesser General Public License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License ``or any later version'' +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + +If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + +@end enumerate diff --git a/doc/libunistring.info b/doc/libunistring.info new file mode 100644 index 00000000..2fad8fec --- /dev/null +++ b/doc/libunistring.info @@ -0,0 +1,6200 @@ +This is libunistring.info, produced by makeinfo version 4.13 from +libunistring.texi. + +INFO-DIR-SECTION Software development +START-INFO-DIR-ENTRY +* GNU libunistring: (libunistring). Unicode string library. +END-INFO-DIR-ENTRY + + This manual is for GNU libunistring. + + +File: libunistring.info, Node: Top, Next: Introduction, Up: (dir) + +GNU libunistring +**************** + +* Menu: + +* Introduction:: Who may need Unicode strings? +* Conventions:: Conventions used in this manual +* unitypes.h:: Elementary types +* unistr.h:: Elementary Unicode string functions +* uniconv.h:: Conversions between Unicode and encodings +* unistdio.h:: Output with Unicode strings +* uniname.h:: Names of Unicode characters +* unictype.h:: Unicode character classification and properties +* uniwidth.h:: Display width +* uniwbrk.h:: Word breaks in strings +* unilbrk.h:: Line breaking +* uninorm.h:: Normalization forms +* unicase.h:: Case mappings +* uniregex.h:: Regular expressions +* Using the library:: How to link with the library and use it? +* More functionality:: More advanced functionality +* Licenses:: Licenses + +* Index:: General Index + + --- The Detailed Node Listing --- + +Introduction + +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with `char *' strings +* The wchar_t mess:: Why `wchar_t *' strings are useless +* Unicode strings:: How are Unicode strings represented? + +unistr.h + +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: + +unictype.h + +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: + +General category + +* Object oriented API:: +* Bit mask API:: + +Properties + +* Properties as objects:: +* Properties as functions:: + +uniwbrk.h + +* Word breaks in a string:: +* Word break property:: + +uninorm.h + +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: + +unicase,h + +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: + +Using the library + +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: + +Licenses + +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License + + +File: libunistring.info, Node: Introduction, Next: Conventions, Prev: Top, Up: Top + +1 Introduction +************** + + This library provides functions for manipulating Unicode strings and +for manipulating C strings according to the Unicode standard. + + It consists of the following parts: + +`' + elementary string functions + +`' + conversion from/to legacy encodings + +`' + formatted output to strings + +`' + character names + +`' + character classification and properties + +`' + string width when using nonproportional fonts + +`' + word breaks + +`' + line breaking algorithm + +`' + normalization (composition and decomposition) + +`' + case folding + +`' + regular expressions (not yet implemented) + + libunistring is for you if your application involves non-trivial text +processing, such as upper/lower case conversions, line breaking, +operations on words, or more advanced analysis of text. Text provided +by the user can, in general, contain characters of all kinds of +scripts. The text processing functions provided by this library handle +all scripts and all languages. + + libunistring is for you if your application already uses the ISO C / +POSIX `', `' functions and the text it operates on is +provided by the user and can be in any language. + + libunistring is also for you if your application uses Unicode +strings as internal in-memory representation. + +* Menu: + +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with `char *' strings +* The wchar_t mess:: Why `wchar_t *' strings are useless +* Unicode strings:: How are Unicode strings represented? + + +File: libunistring.info, Node: Unicode, Next: Unicode and i18n, Up: Introduction + +1.1 Unicode +=========== + + Unicode is a standardized repertoire of characters that contains +characters from all scripts of the world, from Latin letters to Chinese +ideographs and Babylonian cuneiform glyphs. It also specifies how +these characters are to be rendered on a screen or on paper, and how +common text processing (word selection, line breaking, uppercasing of +page titles etc.) is supposed to behave on Unicode text. + + Unicode also specifies three ways of storing sequences of Unicode +characters in a computer whose basic unit of data is an 8-bit byte: +UTF-8 + Every character is represented as 1 to 4 bytes. + +UTF-16 + Every character is represented as 1 to 2 units of 16 bits. + +UTF-32, a.k.a. UCS-4 + Every character is represented as 1 unit of 32 bits. + + For encoding Unicode text in a file, UTF-8 is usually used. For +encoding Unicode strings in memory for a program, either of the three +encoding forms can be reasonably used. + + Unicode is widely used on the web. Prior to the use of Unicode, web +pages were in many different encodings (ISO-8859-1 for English, French, +Spanish, ISO-8859-2 for Polish, ISO-8859-7 for Greek, KOI8-R for +Russian, GB2312 or BIG5 for Chinese, ISO-2022-JP-2 or EUC-JP or +Shift_JIS for Japanese, and many many others). It was next to +impossible to create a document that contained Chinese and Polish text +in the same document. Due to the many encodings for Japanese, even the +processing of pure Japanese text was error prone. + + References: + * The Unicode standard: `http://www.unicode.org/' + + * Definition of UTF-8: `http://www.rfc-editor.org/rfc/rfc3629.txt' + + * Definition of UTF-16: `http://www.rfc-editor.org/rfc/rfc2781.txt' + + * Markus Kuhn's UTF-8 and Unicode FAQ: + `http://www.cl.cam.ac.uk/~mgk25/unicode.html' + + +File: libunistring.info, Node: Unicode and i18n, Next: Locale encodings, Prev: Unicode, Up: Introduction + +1.2 Unicode and Internationalization +==================================== + + Internationalization is the process of changing the source code of a +program so that it can meet the expectations of users in any culture, +if culture specific data (translations, images etc.) are provided. + + Use of Unicode is not strictly required for internationalization, +but it makes internationalization much easier, because operations that +need to look at specific characters (like hyphenation, spell checking, +or the automatic conversion of double-quotes to opening and closing +double-quote characters) don't need to consider multiple possible +encodings of the text. + + Use of Unicode also enables multilingualization: the ability of +having text in multiple languages present in the same document or even +in the same line of text. + + But use of Unicode is not everything. Internationalization usually +consists of three features: + * Use of Unicode where needed for text processing. This is what + this library is for. + + * Use of message catalogs for messages shown to the user, This is + what GNU gettext is about. + + * Use of locale specific conventions for date and time formats, for + numeric formatting, or for sorting of text. This can be done + adequately with the POSIX APIs and the implementation of locales + in the GNU C library. + + +File: libunistring.info, Node: Locale encodings, Next: In-memory representation, Prev: Unicode and i18n, Up: Introduction + +1.3 Locale encodings +==================== + + A locale is a set of cultural conventions. According to POSIX, for +a program, at any moment, there is one locale being designated as the +"current locale". (Actually, POSIX supports also one locale per +thread, but this feature is not yet universally implemented and not +widely used.) The locale is partitioned into several aspects, called +the "categories" of the locale. The main various aspects are: + * The character encoding and the character properties. This is the + `LC_CTYPE' category. + + * The sorting rules for text. This is the `LC_COLLATE' category. + + * The language specific translations of messages. This is the + `LC_MESSAGES' category. + + * The formatting rules for numbers, such as the decimal separator. + This is the `LC_NUMERIC' category. + + * The formatting rules for amounts of money. This is the + `LC_MONETARY' category. + + * The formatting of date and time. This is the `LC_TIME' category. + + In particular, the `LC_CTYPE' category of the current locale +determines the character encoding. This is the encoding of `char *' +strings. We also call it the "locale encoding". GNU libunistring has +a function, `locale_charset', that returns a standardized (platform +independent) name for this encoding. + + All locale encodings used on glibc systems are essentially ASCII +compatible: Most graphic ASCII characters have the same representation, +as a single byte, in that encoding as in ASCII. + + Among the possible locale encodings are UTF-8 and GB18030. Both +allow to represent any Unicode character as a sequence of bytes. UTF-8 +is used in most of the world, whereas GB18030 is used in the People's +Republic of China, because it is backward compatible with the GB2312 +encoding that was used in this country earlier. + + The legacy locale encodings, ISO-8859-15 (which supplanted +ISO-8859-1 in most of Europe), ISO-8859-2, KOI8-R, EUC-JP, etc., are +still in use in many places, though. + + UTF-16 and UTF-32 are not used as locale encodings, because they are +not ASCII compatible. + + +File: libunistring.info, Node: In-memory representation, Next: char * strings, Prev: Locale encodings, Up: Introduction + +1.4 Choice of in-memory representation of strings +================================================= + + There are three ways of representing strings in memory of a running +program. + * As `char *' strings. Such strings are represented in locale + encoding. This approach is employed when not much text processing + is done by the program. When some Unicode aware processing is to + be done, a string is converted to Unicode on the fly and back to + locale encoding afterwards. + + * As UTF-8 or UTF-16 or UTF-32 strings. This implies that + conversion from locale encoding to Unicode is performed on input, + and in the opposite direction on output. This approach is + employed when the program does a significant amount of text + processing, or when the program has multiple threads operating on + the same data but in different locales. + + * As `wchar_t *', a.k.a. "wide strings". This approach is misguided, + see *note The wchar_t mess::. + + +File: libunistring.info, Node: char * strings, Next: The wchar_t mess, Prev: In-memory representation, Up: Introduction + +1.5 `char *' strings +==================== + + The classical C strings, with its C library support standardized by +ISO C and POSIX, can be used in internationalized programs with some +precautions. The problem with this API is that many of the C library +functions for strings don't work correctly on strings in locale +encodings, leading to bugs that only people in some cultures of the +world will experience. + + The first problem with the C library API is the support of multibyte +locales. According to the locale encoding, in general, every character +is represented by one or more bytes (up to 4 bytes in practice -- but +use `MB_LEN_MAX' instead of the number 4 in the code). When every +character is represented by only 1 byte, we speak of an "unibyte +locale", otherwise of a "multibyte locale". It is important to realize +that the majority of Unix installations nowadays use UTF-8 or GB18030 +as locale encoding; therefore, the majority of users are using +multibyte locales. + + The important fact to remember is: _A `char' is a byte, not a +character._ + + As a consequence: + * The `' API is useless in this context; it does not work in + multibyte locales. + + * The `strlen' function does not return the number of characters in + a string. Nor does it return the number of screen columns occupied + by a string after it is output. It merely returns the number of + _bytes_ occupied by a string. + + * Truncating a string, for example, with `strncpy', can have the + effect of truncating it in the middle of a multibyte character. + Such a string will, when output, have a garbled character at its + end, often represented by a hollow box. + + * `strchr' and `strrchr' do not work with multibyte strings if the + locale encoding is GB18030 and the character to be searched is a + digit. + + * `strstr' does not work with multibyte strings if the locale + encoding is different from UTF-8. + + * `strcspn', `strpbrk', `strspn' cannot work correctly in multibyte + locales: they assume the second argument is a list of single-byte + characters. Even in this simple case, they do not work with + multibyte strings if the locale encoding is GB18030 and one of the + characters to be searched is a digit. + + * `strsep' and `strtok_r' do not work with multibyte strings unless + all of the delimiter characters are ASCII characters < 0x30. + + * The `strcasecmp', `strncasecmp', and `strcasestr' functions do not + work with multibyte strings. + + The workarounds can be found in GNU gnulib +`http://www.gnu.org/software/gnulib/'. + * gnulib has modules `mbchar', `mbiter', `mbuiter' that represent + multibyte characters and allow to iterate across a multibyte + string with the same ease as through a unibyte string. + + * gnulib has functions `mbslen' and `mbswidth' that can be used + instead of `strlen' when the number of characters or the number of + screen columns of a string is requested. + + * gnulib has functions `mbschr' and `mbsrrchr' that are like + `strchr' and `strrchr', but work in multibyte locales. + + * gnulib has a function `mbsstr', like `strstr', but works in + multibyte locales. + + * gnulib has functions `mbscspn', `mbspbrk', `mbsspn' that are like + `strcspn', `strpbrk', `strspn', but work in multibyte locales. + + * gnulib has functions `mbssep' and `mbstok_r' that are like + `strsep' and `strtok_r' but work in multibyte locales. + + * gnulib has functions `mbscasecmp', `mbsncasecmp', `mbspcasecmp', + and `mbscasestr' that are like `strcasecmp', `strncasecmp', and + `strcasestr', but work in multibyte locales. Still, the function + `ulc_casecmp' is preferable to these functions; see below. + + The second problem with the C library API is that it has some +assumptions built-in that are not valid in some languages: + * It assumes that there are only two forms of every character: + uppercase and lowercase. This is not true for Croatian, where the + character LETTER DZ WITH CARON comes in three forms: LATIN CAPITAL + LETTER DZ WITH CARON (DZ), LATIN CAPITAL LETTER D WITH SMALL + LETTER Z WITH CARON (Dz), LATIN SMALL LETTER DZ WITH CARON (dz). + + * It assumes that uppercasing of 1 character leads to 1 character. + This is not true for German, where the LATIN SMALL LETTER SHARP S, + when uppercased, becomes `SS'. + + * It assumes that there is 1:1 mapping between uppercase and + lowercase forms. This is not true for the Greek sigma: GREEK + CAPITAL LETTER SIGMA is the uppercase of both GREEK SMALL LETTER + SIGMA and GREEK SMALL LETTER FINAL SIGMA. + + * It assumes that the upper/lowercase mappings are position + independent. This is not true for the Greek sigma and the + Lithuanian i. + + The correct way to deal with this problem is + 1. to provide functions for titlecasing, as well as for upper- and + lowercasing, + + 2. to view case transformations as functions that operates on strings, + rather than on characters. + + This is implemented in this library, through the functions declared +in `', see *note unicase.h::. + + +File: libunistring.info, Node: The wchar_t mess, Next: Unicode strings, Prev: char * strings, Up: Introduction + +1.6 The `wchar_t' mess +====================== + + The ISO C and POSIX standard creators made an attempt to fix the +first problem mentioned in the previous section. They introduced + * a type `wchar_t', designed to encapsulate an entire character, + + * a "wide string" type `wchar_t *', and + + * functions declared in `' that were meant to supplant the + ones in `'. + + Unfortunately, this API and its implementation has numerous problems: + + * On AIX and Windows platforms, `wchar_t' is a 16-bit type. This + means that it can never accommodate an entire Unicode character. + Either the `wchar_t *' strings are limited to characters in UCS-2 + (the "Basic Multilingual Plane" of Unicode), or -- if `wchar_t *' + strings are encoded in UTF-16 -- a `wchar_t' represents only half + of a character in the worst case, making the `' functions + pointless. + + * On Solaris and FreeBSD, the `wchar_t' encoding is locale dependent + and undocumented. This means, if you want to know any property of + a `wchar_t' character, other than the properties defined by + `' -- such as whether it's a dash, currency symbol, + paragraph separator, or similar --, you have to convert it to + `char *' encoding first, by use of the function `wctomb'. + + * When you read a stream of wide characters, through the functions + `fgetwc' and `fgetws', and when the input stream/file is not in + the expected encoding, you have no way to determine the invalid + byte sequence and do some corrective action. If you use these + functions, your program becomes "garbage in - more garbage out" or + "garbage in - abort". + + As a consequence, it is better to use multibyte strings, as +explained in the previous section. Such multibyte strings can bypass +limitations of the `wchar_t' type, if you use functions defined in +gnulib and libunistring for text processing. They can also faithfully +transport malformed characters that were present in the input, without +requiring the program to produce garbage or abort. + + +File: libunistring.info, Node: Unicode strings, Prev: The wchar_t mess, Up: Introduction + +1.7 Unicode strings +=================== + + libunistring supports Unicode strings in three representations: + * UTF-8 strings, through the type `uint8_t *'. The units are bytes + (`uint8_t'). + + * UTF-16 strings, through the type `uint16_t *', The units are + 16-bit memory words (`uint16_t'). + + * UTF-32 strings, through the type `uint32_t *'. The units are + 32-bit memory words (`uint32_t'). + + As with C strings, there are two variants: + * Unicode strings with a terminating NUL character are represented as + a pointer to the first unit of the string. There is a unit + containing a 0 value at the end. It is considered part of the + string for all memory allocation purposes, but is not considered + part of the string for all other logical purposes. + + * Unicode strings where embedded NUL characters are allowed. These + are represented by a pointer to the first unit and the number of + units (not bytes!) of the string. In this setting, there is no + trailing zero-valued unit used as "end marker". + + +File: libunistring.info, Node: Conventions, Next: unitypes.h, Prev: Introduction, Up: Top + +2 Conventions +************* + + This chapter explains conventions valid throughout the libunistring +library. + + Variables of type `char *' denote C strings in locale encoding. See +*note Locale encodings::. + + Variables of type `uint8_t *' denote UTF-8 strings. Their units are +bytes. + + Variables of type `uint16_t *' denote UTF-16 strings, without byte +order mark. Their units are 2-byte words. + + Variables of type `uint32_t *' denote UTF-32 strings, without byte +order mark. Their units are 4-byte words. + + Argument pairs `(S, N)' denote a string `S[0..N-1]' with exactly N +units. + + All functions with prefix `ulc_' operate on C strings in locale +encoding. + + All functions with prefix `u8_' operate on UTF-8 strings. + + All functions with prefix `u16_' operate on UTF-16 strings. + + All functions with prefix `u32_' operate on UTF-32 strings. + + For every function with prefix `u8_', operating on UTF-8 strings, +there is also a corresponding function with prefix `u16_', operating on +UTF-16 strings, and a corresponding function with prefix `u32_', +operating on UTF-32 strings. Their description is analogous; in this +documentation we describe only the function that operates on UTF-8 +strings, for brevity. + + A declaration with a variable N denotes the three concrete +declarations with N = 8, N = 16, N = 32. + + All parameters starting with `str' and the parameters of functions +starting with `u8_str'/`u16_str'/`u32_str' denote a NUL terminated +string. + + Error values are always returned through the `errno' variable, +usually with a return value that indicates the presence of an error +(NULL for functions that return an pointer, or -1 for functions that +return an `int'). + + Functions returning a string result take a `(RESULTBUF, LENGTHP)' +argument pair. If RESULTBUF is not NULL and the result fits into +`*LENGTHP' units, it is put in RESULTBUF, and RESULTBUF is returned. +Otherwise, a freshly allocated string is returned. In both cases, +`*LENGTHP' is set to the length (number of units) of the returned +string. In case of error, NULL is returned and `errno' is set. + + +File: libunistring.info, Node: unitypes.h, Next: unistr.h, Prev: Conventions, Up: Top + +3 Elementary types `' +********************************* + + The include file `' provides the following basic types. + + -- Type: uint8_t + -- Type: uint16_t + -- Type: uint32_t + These are the storage units of UTF-8/16/32 strings, respectively. + The definitions are taken from `', on platforms where + this include file is present. + + -- Type: ucs4_t + This type represents a single Unicode character, outside of an + UTF-32 string. + + +File: libunistring.info, Node: unistr.h, Next: uniconv.h, Prev: unitypes.h, Up: Top + +4 Elementary Unicode string functions `' +************************************************** + + This include file declares elementary functions for Unicode strings. +It is essentially the equivalent of what `' is for C strings. + +* Menu: + +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: + + +File: libunistring.info, Node: Elementary string checks, Next: Elementary string conversions, Up: unistr.h + +4.1 Elementary string checks +============================ + + The following function is available to verify the integrity of a +Unicode string. + + -- Function: const uint8_t * u8_check (const uint8_t *S, size_t N) + -- Function: const uint16_t * u16_check (const uint16_t *S, size_t N) + -- Function: const uint32_t * u32_check (const uint32_t *S, size_t N) + This function checks whether a Unicode string is well-formed. It + returns NULL if valid, or a pointer to the first invalid unit + otherwise. + + +File: libunistring.info, Node: Elementary string conversions, Next: Elementary string functions, Prev: Elementary string checks, Up: unistr.h + +4.2 Elementary string conversions +================================= + + The following functions perform conversions between the different +forms of Unicode strings. + + -- Function: uint16_t * u8_to_u16 (const uint8_t *S, size_t N, + uint16_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-8 string to an UTF-16 string. + + -- Function: uint32_t * u8_to_u32 (const uint8_t *S, size_t N, + uint32_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-8 string to an UTF-32 string. + + -- Function: uint8_t * u16_to_u8 (const uint16_t *S, size_t N, uint8_t + *RESULTBUF, size_t *LENGTHP) + Converts an UTF-16 string to an UTF-8 string. + + -- Function: uint32_t * u16_to_u32 (const uint16_t *S, size_t N, + uint32_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-16 string to an UTF-32 string. + + -- Function: uint8_t * u32_to_u8 (const uint32_t *S, size_t N, uint8_t + *RESULTBUF, size_t *LENGTHP) + Converts an UTF-32 string to an UTF-8 string. + + -- Function: uint16_t * u32_to_u16 (const uint32_t *S, size_t N, + uint16_t *RESULTBUF, size_t *LENGTHP) + Converts an UTF-32 string to an UTF-16 string. + + +File: libunistring.info, Node: Elementary string functions, Next: Elementary string functions with memory allocation, Prev: Elementary string conversions, Up: unistr.h + +4.3 Elementary string functions +=============================== + + The following functions inspect and return details about the first +character in a Unicode string. + + -- Function: int u8_mblen (const uint8_t *S, size_t N) + -- Function: int u16_mblen (const uint16_t *S, size_t N) + -- Function: int u32_mblen (const uint32_t *S, size_t N) + Returns the length (number of units) of the first character in S, + which is no longer than N. Returns 0 if it is the NUL character. + Returns -1 upon failure. + + This function is similar to `mblen', except that it operates on a + Unicode string and that S must not be NULL. + + -- Function: int u8_mbtouc_unsafe (ucs4_t *PUC, const uint8_t *S, + size_t N) + -- Function: int u16_mbtouc_unsafe (ucs4_t *PUC, const uint16_t *S, + size_t N) + -- Function: int u32_mbtouc_unsafe (ucs4_t *PUC, const uint32_t *S, + size_t N) + Returns the length (number of units) of the first character in S, + putting its `ucs4_t' representation in `*PUC'. Upon failure, + `*PUC' is set to `0xfffd', and an appropriate number of units is + returned. + + The number of available units, N, must be > 0. + + This function is similar to `mbtowc', except that it operates on a + Unicode string, PUC and S must not be NULL, N must be > 0, and the + NUL character is not treated specially. + + -- Function: int u8_mbtouc (ucs4_t *PUC, const uint8_t *S, size_t N) + -- Function: int u16_mbtouc (ucs4_t *PUC, const uint16_t *S, size_t N) + -- Function: int u32_mbtouc (ucs4_t *PUC, const uint32_t *S, size_t N) + This function is like `u8_mbtouc_unsafe', except that it will + detect an invalid UTF-8 character, even if the library is compiled + without `--enable-safety'. + + -- Function: int u8_mbtoucr (ucs4_t *PUC, const uint8_t *S, size_t N) + -- Function: int u16_mbtoucr (ucs4_t *PUC, const uint16_t *S, size_t N) + -- Function: int u32_mbtoucr (ucs4_t *PUC, const uint32_t *S, size_t N) + Returns the length (number of units) of the first character in S, + putting its `ucs4_t' representation in `*PUC'. Upon failure, + `*PUC' is set to `0xfffd', and -1 is returned for an invalid + sequence of units, -2 is returned for an incomplete sequence of + units. + + The number of available units, N, must be > 0. + + This function is similar to `u8_mbtouc', except that the return + value gives more details about the failure, similar to `mbrtowc'. + + The following function stores a Unicode character as a Unicode +string in memory. + + -- Function: int u8_uctomb (uint8_t *S, ucs4_t UC, int N) + -- Function: int u16_uctomb (uint16_t *S, ucs4_t UC, int N) + -- Function: int u32_uctomb (uint32_t *S, ucs4_t UC, int N) + Puts the multibyte character represented by UC in S, returning its + length. Returns -1 upon failure, -2 if the number of available + units, N, is too small. The latter case cannot occur if N >= + 6/2/1, respectively. + + This function is similar to `wctomb', except that it operates on a + Unicode strings, S must not be NULL, and the argument N must be + specified. + + The following functions copy Unicode strings in memory. + + -- Function: uint8_t * u8_cpy (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_cpy (uint16_t *DEST, const uint16_t *SRC, + size_t N) + -- Function: uint32_t * u32_cpy (uint32_t *DEST, const uint32_t *SRC, + size_t N) + Copies N units from SRC to DEST. + + This function is similar to `memcpy', except that it operates on + Unicode strings. + + -- Function: uint8_t * u8_move (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_move (uint16_t *DEST, const uint16_t *SRC, + size_t N) + -- Function: uint32_t * u32_move (uint32_t *DEST, const uint32_t *SRC, + size_t N) + Copies N units from SRC to DEST, guaranteeing correct behavior for + overlapping memory areas. + + This function is similar to `memmove', except that it operates on + Unicode strings. + + The following function fills a Unicode string. + + -- Function: uint8_t * u8_set (uint8_t *S, ucs4_t UC, size_t N) + -- Function: uint16_t * u16_set (uint16_t *S, ucs4_t UC, size_t N) + -- Function: uint32_t * u32_set (uint32_t *S, ucs4_t UC, size_t N) + Sets the first N characters of S to UC. UC should be a character + that occupies only 1 unit. + + This function is similar to `memset', except that it operates on + Unicode strings. + + The following function compares two Unicode strings of the same +length. + + -- Function: int u8_cmp (const uint8_t *S1, const uint8_t *S2, size_t + N) + -- Function: int u16_cmp (const uint16_t *S1, const uint16_t *S2, + size_t N) + -- Function: int u32_cmp (const uint32_t *S1, const uint32_t *S2, + size_t N) + Compares S1 and S2, each of length N, lexicographically. Returns + a negative value if S1 compares smaller than S2, a positive value + if S1 compares larger than S2, or 0 if they compare equal. + + This function is similar to `memcmp', except that it operates on + Unicode strings. + + The following function compares two Unicode strings of possibly +different lengths. + + -- Function: int u8_cmp2 (const uint8_t *S1, size_t N1, const uint8_t + *S2, size_t N2) + -- Function: int u16_cmp2 (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2) + -- Function: int u32_cmp2 (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2) + Compares S1 and S2, lexicographically. Returns a negative value + if S1 compares smaller than S2, a positive value if S1 compares + larger than S2, or 0 if they compare equal. + + This function is similar to the gnulib function `memcmp2', except + that it operates on Unicode strings. + + The following function searches for a given Unicode character. + + -- Function: uint8_t * u8_chr (const uint8_t *S, size_t N, ucs4_t UC) + -- Function: uint16_t * u16_chr (const uint16_t *S, size_t N, ucs4_t + UC) + -- Function: uint32_t * u32_chr (const uint32_t *S, size_t N, ucs4_t + UC) + Searches the string at S for UC. Returns a pointer to the first + occurrence of UC in S, or NULL if UC does not occur in S. + + This function is similar to `memchr', except that it operates on + Unicode strings. + + The following function counts the number of Unicode characters. + + -- Function: size_t u8_mbsnlen (const uint8_t *S, size_t N) + -- Function: size_t u16_mbsnlen (const uint16_t *S, size_t N) + -- Function: size_t u32_mbsnlen (const uint32_t *S, size_t N) + Counts and returns the number of Unicode characters in the N units + from S. + + This function is similar to the gnulib function `mbsnlen', except + that it operates on Unicode strings. + + +File: libunistring.info, Node: Elementary string functions with memory allocation, Next: Elementary string functions on NUL terminated strings, Prev: Elementary string functions, Up: unistr.h + +4.4 Elementary string functions with memory allocation +====================================================== + + The following function copies a Unicode string. + + -- Function: uint8_t * u8_cpy_alloc (const uint8_t *S, size_t N) + -- Function: uint16_t * u16_cpy_alloc (const uint16_t *S, size_t N) + -- Function: uint32_t * u32_cpy_alloc (const uint32_t *S, size_t N) + Makes a freshly allocated copy of S, of length N. + + +File: libunistring.info, Node: Elementary string functions on NUL terminated strings, Prev: Elementary string functions with memory allocation, Up: unistr.h + +4.5 Elementary string functions on NUL terminated strings +========================================================= + + The following functions inspect and return details about the first +character in a Unicode string. + + -- Function: int u8_strmblen (const uint8_t *S) + -- Function: int u16_strmblen (const uint16_t *S) + -- Function: int u32_strmblen (const uint32_t *S) + Returns the length (number of units) of the first character in S. + Returns 0 if it is the NUL character. Returns -1 upon failure. + + -- Function: int u8_strmbtouc (ucs4_t *PUC, const uint8_t *S) + -- Function: int u16_strmbtouc (ucs4_t *PUC, const uint16_t *S) + -- Function: int u32_strmbtouc (ucs4_t *PUC, const uint32_t *S) + Returns the length (number of units) of the first character in S, + putting its `ucs4_t' representation in `*PUC'. Returns 0 if it is + the NUL character. Returns -1 upon failure. + + -- Function: const uint8_t * u8_next (ucs4_t *PUC, const uint8_t *S) + -- Function: const uint16_t * u16_next (ucs4_t *PUC, const uint16_t *S) + -- Function: const uint32_t * u32_next (ucs4_t *PUC, const uint32_t *S) + Forward iteration step. Advances the pointer past the next + character, or returns NULL if the end of the string has been + reached. Puts the character's `ucs4_t' representation in `*PUC'. + + The following function inspects and returns details about the +previous character in a Unicode string. + + -- Function: const uint8_t * u8_prev (ucs4_t *PUC, const uint8_t *S, + const uint8_t *START) + -- Function: const uint16_t * u16_prev (ucs4_t *PUC, const uint16_t + *S, const uint16_t *START) + -- Function: const uint32_t * u32_prev (ucs4_t *PUC, const uint32_t + *S, const uint32_t *START) + Backward iteration step. Advances the pointer to point to the + previous character, or returns NULL if the beginning of the string + had been reached. Puts the character's `ucs4_t' representation in + `*PUC'. + + The following functions determine the length of a Unicode string. + + -- Function: size_t u8_strlen (const uint8_t *S) + -- Function: size_t u16_strlen (const uint16_t *S) + -- Function: size_t u32_strlen (const uint32_t *S) + Returns the number of units in S. + + This function is similar to `strlen' and `wcslen', except that it + operates on Unicode strings. + + -- Function: size_t u8_strnlen (const uint8_t *S, size_t MAXLEN) + -- Function: size_t u16_strnlen (const uint16_t *S, size_t MAXLEN) + -- Function: size_t u32_strnlen (const uint32_t *S, size_t MAXLEN) + Returns the number of units in S, but at most MAXLEN. + + This function is similar to `strnlen' and `wcsnlen', except that + it operates on Unicode strings. + + The following functions copy portions of Unicode strings in memory. + + -- Function: uint8_t * u8_strcpy (uint8_t *DEST, const uint8_t *SRC) + -- Function: uint16_t * u16_strcpy (uint16_t *DEST, const uint16_t + *SRC) + -- Function: uint32_t * u32_strcpy (uint32_t *DEST, const uint32_t + *SRC) + Copies SRC to DEST. + + This function is similar to `strcpy' and `wcscpy', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_stpcpy (uint8_t *DEST, const uint8_t *SRC) + -- Function: uint16_t * u16_stpcpy (uint16_t *DEST, const uint16_t + *SRC) + -- Function: uint32_t * u32_stpcpy (uint32_t *DEST, const uint32_t + *SRC) + Copies SRC to DEST, returning the address of the terminating NUL + in DEST. + + This function is similar to `stpcpy', except that it operates on + Unicode strings. + + -- Function: uint8_t * u8_strncpy (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_strncpy (uint16_t *DEST, const uint16_t + *SRC, size_t N) + -- Function: uint32_t * u32_strncpy (uint32_t *DEST, const uint32_t + *SRC, size_t N) + Copies no more than N units of SRC to DEST. + + This function is similar to `strncpy' and `wcsncpy', except that + it operates on Unicode strings. + + -- Function: uint8_t * u8_stpncpy (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_stpncpy (uint16_t *DEST, const uint16_t + *SRC, size_t N) + -- Function: uint32_t * u32_stpncpy (uint32_t *DEST, const uint32_t + *SRC, size_t N) + Copies no more than N units of SRC to DEST, returning the address + of the last unit written into DEST. + + This function is similar to `stpncpy', except that it operates on + Unicode strings. + + -- Function: uint8_t * u8_strcat (uint8_t *DEST, const uint8_t *SRC) + -- Function: uint16_t * u16_strcat (uint16_t *DEST, const uint16_t + *SRC) + -- Function: uint32_t * u32_strcat (uint32_t *DEST, const uint32_t + *SRC) + Appends SRC onto DEST. + + This function is similar to `strcat' and `wcscat', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_strncat (uint8_t *DEST, const uint8_t *SRC, + size_t N) + -- Function: uint16_t * u16_strncat (uint16_t *DEST, const uint16_t + *SRC, size_t N) + -- Function: uint32_t * u32_strncat (uint32_t *DEST, const uint32_t + *SRC, size_t N) + Appends no more than N units of SRC onto DEST. + + This function is similar to `strncat' and `wcsncat', except that + it operates on Unicode strings. + + The following functions compare two Unicode strings. + + -- Function: int u8_strcmp (const uint8_t *S1, const uint8_t *S2) + -- Function: int u16_strcmp (const uint16_t *S1, const uint16_t *S2) + -- Function: int u32_strcmp (const uint32_t *S1, const uint32_t *S2) + Compares S1 and S2, lexicographically. Returns a negative value + if S1 compares smaller than S2, a positive value if S1 compares + larger than S2, or 0 if they compare equal. + + This function is similar to `strcmp' and `wcscmp', except that it + operates on Unicode strings. + + -- Function: int u8_strcoll (const uint8_t *S1, const uint8_t *S2) + -- Function: int u16_strcoll (const uint16_t *S1, const uint16_t *S2) + -- Function: int u32_strcoll (const uint32_t *S1, const uint32_t *S2) + Compares S1 and S2 using the collation rules of the current locale. + Returns -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. Upon failure, + sets `errno' and returns any value. + + This function is similar to `strcoll' and `wcscoll', except that + it operates on Unicode strings. + + Note that this function may consider different canonical + normalizations of the same string as having a large distance. It + is therefore better to use the function `u8_normcoll' instead of + this one; see *note uninorm.h::. + + -- Function: int u8_strncmp (const uint8_t *S1, const uint8_t *S2, + size_t N) + -- Function: int u16_strncmp (const uint16_t *S1, const uint16_t *S2, + size_t N) + -- Function: int u32_strncmp (const uint32_t *S1, const uint32_t *S2, + size_t N) + Compares no more than N units of S1 and S2. + + This function is similar to `strncmp' and `wcsncmp', except that + it operates on Unicode strings. + + The following function allocates a duplicate of a Unicode string. + + -- Function: uint8_t * u8_strdup (const uint8_t *S) + -- Function: uint16_t * u16_strdup (const uint16_t *S) + -- Function: uint32_t * u32_strdup (const uint32_t *S) + Duplicates S, returning an identical malloc'd string. + + This function is similar to `strdup' and `wcsdup', except that it + operates on Unicode strings. + + The following functions search for a given Unicode character. + + -- Function: uint8_t * u8_strchr (const uint8_t *STR, ucs4_t UC) + -- Function: uint16_t * u16_strchr (const uint16_t *STR, ucs4_t UC) + -- Function: uint32_t * u32_strchr (const uint32_t *STR, ucs4_t UC) + Finds the first occurrence of UC in STR. + + This function is similar to `strchr' and `wcschr', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_strrchr (const uint8_t *STR, ucs4_t UC) + -- Function: uint16_t * u16_strrchr (const uint16_t *STR, ucs4_t UC) + -- Function: uint32_t * u32_strrchr (const uint32_t *STR, ucs4_t UC) + Finds the last occurrence of UC in STR. + + This function is similar to `strrchr' and `wcsrchr', except that + it operates on Unicode strings. + + The following functions search for the first occurrence of some +Unicode character in or outside a given set of Unicode characters. + + -- Function: size_t u8_strcspn (const uint8_t *STR, const uint8_t + *REJECT) + -- Function: size_t u16_strcspn (const uint16_t *STR, const uint16_t + *REJECT) + -- Function: size_t u32_strcspn (const uint32_t *STR, const uint32_t + *REJECT) + Returns the length of the initial segment of STR which consists + entirely of Unicode characters not in REJECT. + + This function is similar to `strcspn' and `wcscspn', except that + it operates on Unicode strings. + + -- Function: size_t u8_strspn (const uint8_t *STR, const uint8_t + *ACCEPT) + -- Function: size_t u16_strspn (const uint16_t *STR, const uint16_t + *ACCEPT) + -- Function: size_t u32_strspn (const uint32_t *STR, const uint32_t + *ACCEPT) + Returns the length of the initial segment of STR which consists + entirely of Unicode characters in ACCEPT. + + This function is similar to `strspn' and `wcsspn', except that it + operates on Unicode strings. + + -- Function: uint8_t * u8_strpbrk (const uint8_t *STR, const uint8_t + *ACCEPT) + -- Function: uint16_t * u16_strpbrk (const uint16_t *STR, const + uint16_t *ACCEPT) + -- Function: uint32_t * u32_strpbrk (const uint32_t *STR, const + uint32_t *ACCEPT) + Finds the first occurrence in STR of any character in ACCEPT. + + This function is similar to `strpbrk' and `wcspbrk', except that + it operates on Unicode strings. + + The following functions search whether a given Unicode string is a +substring of another Unicode string. + + -- Function: uint8_t * u8_strstr (const uint8_t *HAYSTACK, const + uint8_t *NEEDLE) + -- Function: uint16_t * u16_strstr (const uint16_t *HAYSTACK, const + uint16_t *NEEDLE) + -- Function: uint32_t * u32_strstr (const uint32_t *HAYSTACK, const + uint32_t *NEEDLE) + Finds the first occurrence of NEEDLE in HAYSTACK. + + This function is similar to `strstr' and `wcsstr', except that it + operates on Unicode strings. + + -- Function: bool u8_startswith (const uint8_t *STR, const uint8_t + *PREFIX) + -- Function: bool u16_startswith (const uint16_t *STR, const uint16_t + *PREFIX) + -- Function: bool u32_startswith (const uint32_t *STR, const uint32_t + *PREFIX) + Tests whether STR starts with PREFIX. + + -- Function: bool u8_endswith (const uint8_t *STR, const uint8_t + *SUFFIX) + -- Function: bool u16_endswith (const uint16_t *STR, const uint16_t + *SUFFIX) + -- Function: bool u32_endswith (const uint32_t *STR, const uint32_t + *SUFFIX) + Tests whether STR ends with SUFFIX. + + The following function does one step in tokenizing a Unicode string. + + -- Function: uint8_t * u8_strtok (uint8_t *STR, const uint8_t *DELIM, + uint8_t **PTR) + -- Function: uint16_t * u16_strtok (uint16_t *STR, const uint16_t + *DELIM, uint16_t **PTR) + -- Function: uint32_t * u32_strtok (uint32_t *STR, const uint32_t + *DELIM, uint32_t **PTR) + Divides STR into tokens separated by characters in DELIM. + + This function is similar to `strtok_r' and `wcstok', except that + it operates on Unicode strings. Its interface is actually more + similar to `wcstok' than to `strtok'. + + +File: libunistring.info, Node: uniconv.h, Next: unistdio.h, Prev: unistr.h, Up: Top + +5 Conversions between Unicode and encodings `' +********************************************************* + + This include file declares functions for converting between Unicode +strings and `char *' strings in locale encoding or in other specified +encodings. + + The following function returns the locale encoding. + + -- Function: const char * locale_charset () + Determines the current locale's character encoding, and + canonicalizes it into one of the canonical names listed in + `config.charset'. If the canonical name cannot be determined, the + result is a non-canonical name. + + The result must not be freed; it is statically allocated. + + The result of this function can be used as an argument to the + `iconv_open' function in GNU libc, in GNU libiconv, or in the + gnulib provided wrapper around the native `iconv_open' function. + It may not work as an argument to the native `iconv_open' function + directly. + + The handling of unconvertible characters during the conversions can +be parametrized through the following enumeration type: + + -- Type: enum iconv_ilseq_handler + This type specifies how unconvertible characters in the input are + handled. + + -- Constant: enum iconv_ilseq_handler iconveh_error + This handler causes the function to return with `errno' set to + `EILSEQ'. + + -- Constant: enum iconv_ilseq_handler iconveh_question_mark + This handler produces one question mark `?' per unconvertible + character. + + -- Constant: enum iconv_ilseq_handler iconveh_escape_sequence + This handler produces an escape sequence `\uXXXX' or `\UXXXXXXXX' + for each unconvertible character. + + The following functions convert between strings in a specified +encoding and Unicode strings. + + -- Function: uint8_t * u8_conv_from_encoding (const char *FROMCODE, + enum iconv_ilseq_handler HANDLER, const char *SRC, size_t + SRCLEN, size_t *OFFSETS, uint8_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint16_t * u16_conv_from_encoding (const char *FROMCODE, + enum iconv_ilseq_handler HANDLER, const char *SRC, size_t + SRCLEN, size_t *OFFSETS, uint16_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_conv_from_encoding (const char *FROMCODE, + enum iconv_ilseq_handler HANDLER, const char *SRC, size_t + SRCLEN, size_t *OFFSETS, uint32_t *RESULTBUF, size_t *LENGTHP) + Converts an entire string, possibly including NUL bytes, from one + encoding to UTF-8 encoding. + + Converts a memory region given in encoding FROMCODE. FROMCODE is + as for the `iconv_open' function. + + The input is in the memory region between SRC (inclusive) and `SRC + + SRCLEN' (exclusive). + + If OFFSETS is not NULL, it should point to an array of SRCLEN + integers; this array is filled with offsets into the result, i.e. + the character starting at `SRC[i]' corresponds to the character + starting at `RESULT[OFFSETS[i]]', and other offsets are set to + `(size_t)(-1)'. + + `RESULTBUF' and `*LENGTHP' should be a scratch buffer and its + size, or `RESULTBUF' can be NULL. + + May erase the contents of the memory at `RESULTBUF'. + + If successful: The resulting Unicode string (non-NULL) is returned + and its length stored in `*LENGTHP'. The resulting string is + `RESULTBUF' if no dynamic memory allocation was necessary, or a + freshly allocated memory block otherwise. + + In case of error: NULL is returned and `errno' is set. Particular + `errno' values: `EINVAL', `EILSEQ', `ENOMEM'. + + -- Function: char * u8_conv_to_encoding (const char *TOCODE, enum + iconv_ilseq_handler HANDLER, const uint8_t *SRC, size_t + SRCLEN, size_t *OFFSETS, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u16_conv_to_encoding (const char *TOCODE, enum + iconv_ilseq_handler HANDLER, const uint16_t *SRC, size_t + SRCLEN, size_t *OFFSETS, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u32_conv_to_encoding (const char *TOCODE, enum + iconv_ilseq_handler HANDLER, const uint32_t *SRC, size_t + SRCLEN, size_t *OFFSETS, char *RESULTBUF, size_t *LENGTHP) + Converts an entire Unicode string, possibly including NUL units, + from UTF-8 encoding to a given encoding. + + Converts a memory region to encoding TOCODE. TOCODE is as for the + `iconv_open' function. + + The input is in the memory region between SRC (inclusive) and `SRC + + SRCLEN' (exclusive). + + If OFFSETS is not NULL, it should point to an array of SRCLEN + integers; this array is filled with offsets into the result, i.e. + the character starting at `SRC[i]' corresponds to the character + starting at `RESULT[OFFSETS[i]]', and other offsets are set to + `(size_t)(-1)'. + + `RESULTBUF' and `*LENGTHP' should be a scratch buffer and its + size, or `RESULTBUF' can be NULL. + + May erase the contents of the memory at `RESULTBUF'. + + If successful: The resulting Unicode string (non-NULL) is returned + and its length stored in `*LENGTHP'. The resulting string is + `RESULTBUF' if no dynamic memory allocation was necessary, or a + freshly allocated memory block otherwise. + + In case of error: NULL is returned and `errno' is set. Particular + `errno' values: `EINVAL', `EILSEQ', `ENOMEM'. + + The following functions convert between NUL terminated strings in a +specified encoding and NUL terminated Unicode strings. + + -- Function: uint8_t * u8_strconv_from_encoding (const char *STRING, + const char *FROMCODE, enum iconv_ilseq_handler HANDLER) + -- Function: uint16_t * u16_strconv_from_encoding (const char *STRING, + const char *FROMCODE, enum iconv_ilseq_handler HANDLER) + -- Function: uint32_t * u32_strconv_from_encoding (const char *STRING, + const char *FROMCODE, enum iconv_ilseq_handler HANDLER) + Converts a NUL terminated string from a given encoding. + + The result is `malloc' allocated, or NULL (with ERRNO set) in case + of error. + + Particular `errno' values: `EILSEQ', `ENOMEM'. + + -- Function: char * u8_strconv_to_encoding (const uint8_t *STRING, + const char *TOCODE, enum iconv_ilseq_handler HANDLER) + -- Function: char * u16_strconv_to_encoding (const uint16_t *STRING, + const char *TOCODE, enum iconv_ilseq_handler HANDLER) + -- Function: char * u32_strconv_to_encoding (const uint32_t *STRING, + const char *TOCODE, enum iconv_ilseq_handler HANDLER) + Converts a NUL terminated string to a given encoding. + + The result is `malloc' allocated, or NULL (with `errno' set) in + case of error. + + Particular `errno' values: `EILSEQ', `ENOMEM'. + + The following functions are shorthands that convert between NUL +terminated strings in locale encoding and NUL terminated Unicode +strings. + + -- Function: uint8_t * u8_strconv_from_locale (const char *STRING) + -- Function: uint16_t * u16_strconv_from_locale (const char *STRING) + -- Function: uint32_t * u32_strconv_from_locale (const char *STRING) + Converts a NUL terminated string from the locale encoding. + + The result is `malloc' allocated, or NULL (with `errno' set) in + case of error. + + Particular `errno' values: `ENOMEM'. + + -- Function: char * u8_strconv_to_locale (const uint8_t *STRING) + -- Function: char * u16_strconv_to_locale (const uint16_t *STRING) + -- Function: char * u32_strconv_to_locale (const uint32_t *STRING) + Converts a NUL terminated string to the locale encoding. + + The result is `malloc' allocated, or NULL (with `errno' set) in + case of error. + + Particular `errno' values: `ENOMEM'. + + +File: libunistring.info, Node: unistdio.h, Next: uniname.h, Prev: uniconv.h, Up: Top + +6 Output with Unicode strings `' +******************************************** + + This include file declares functions for doing formatted output with +Unicode strings. It defines a set of functions similar to `fprintf' and +`sprintf', which are declared in `'. + + These functions work like the `printf' function family. In the +format string: + * The format directive `U' takes an UTF-8 string (`const uint8_t *'). + + * The format directive `lU' takes an UTF-16 string (`const uint16_t + *'). + + * The format directive `llU' takes an UTF-32 string (`const uint32_t + *'). + + A function name with an infix `v' indicates that a `va_list' is +passed instead of multiple arguments. + + The functions `*sprintf' have a BUF argument that is assumed to be +large enough. (_DANGEROUS! Overflowing the buffer will crash the +program._) + + The functions `*snprintf' have a BUF argument that is assumed to be +SIZE units large. (_DANGEROUS! The resulting string might be +truncated in the middle of a multibyte character._) + + The functions `*asprintf' have a RESULTP argument. The result will +be freshly allocated and stored in `*resultp'. + + The functions `*asnprintf' have a (RESULTBUF, LENGTHP) argument +pair. If RESULTBUF is not NULL and the result fits into `*LENGTHP' +units, it is put in RESULTBUF, and RESULTBUF is returned. Otherwise, a +freshly allocated string is returned. In both cases, `*LENGTHP' is set +to the length (number of units) of the returned string. In case of +error, NULL is returned and `errno' is set. + + The following functions take an ASCII format string and return a +result that is a `char *' string in locale encoding. + + -- Function: int ulc_sprintf (char *BUF, const char *FORMAT, ...) + + -- Function: int ulc_snprintf (char *BUF, size_t size, const char + *FORMAT, ...) + + -- Function: int ulc_asprintf (char **RESULTP, const char *FORMAT, ...) + + -- Function: char * ulc_asnprintf (char *RESULTBUF, size_t *LENGTHP, + const char *FORMAT, ...) + + -- Function: int ulc_vsprintf (char *BUF, const char *FORMAT, va_list + AP) + + -- Function: int ulc_vsnprintf (char *BUF, size_t size, const char + *FORMAT, va_list AP) + + -- Function: int ulc_vasprintf (char **RESULTP, const char *FORMAT, + va_list AP) + + -- Function: char * ulc_vasnprintf (char *RESULTBUF, size_t *LENGTHP, + const char *FORMAT, va_list AP) + + The following functions take an ASCII format string and return a +result in UTF-8 format. + + -- Function: int u8_sprintf (uint8_t *BUF, const char *FORMAT, ...) + + -- Function: int u8_snprintf (uint8_t *BUF, size_t SIZE, const char + *FORMAT, ...) + + -- Function: int u8_asprintf (uint8_t **RESULTP, const char *FORMAT, + ...) + + -- Function: uint8_t * u8_asnprintf (uint8_t *RESULTBUF, size_t + *LENGTHP, const char *FORMAT, ...) + + -- Function: int u8_vsprintf (uint8_t *BUF, const char *FORMAT, + va_list ap) + + -- Function: int u8_vsnprintf (uint8_t *BUF, size_t SIZE, const char + *FORMAT, va_list AP) + + -- Function: int u8_vasprintf (uint8_t **RESULTP, const char *FORMAT, + va_list AP) + + -- Function: uint8_t * u8_vasnprintf (uint8_t *resultbuf, size_t + *LENGTHP, const char *FORMAT, va_list AP) + + The following functions take an UTF-8 format string and return a +result in UTF-8 format. + + -- Function: int u8_u8_sprintf (uint8_t *BUF, const uint8_t *FORMAT, + ...) + + -- Function: int u8_u8_snprintf (uint8_t *BUF, size_t SIZE, const + uint8_t *FORMAT, ...) + + -- Function: int u8_u8_asprintf (uint8_t **RESULTP, const uint8_t + *FORMAT, ...) + + -- Function: uint8_t * u8_u8_asnprintf (uint8_t *resultbuf, size_t + *LENGTHP, const uint8_t *FORMAT, ...) + + -- Function: int u8_u8_vsprintf (uint8_t *BUF, const uint8_t *FORMAT, + va_list AP) + + -- Function: int u8_u8_vsnprintf (uint8_t *BUF, size_t SIZE, const + uint8_t *FORMAT, va_list AP) + + -- Function: int u8_u8_vasprintf (uint8_t **RESULTP, const uint8_t + *FORMAT, va_list AP) + + -- Function: uint8_t * u8_u8_vasnprintf (uint8_t *resultbuf, size_t + *LENGTHP, const uint8_t *FORMAT, va_list AP) + + The following functions take an ASCII format string and return a +result in UTF-16 format. + + -- Function: int u16_sprintf (uint16_t *BUF, const char *FORMAT, ...) + + -- Function: int u16_snprintf (uint16_t *BUF, size_t SIZE, const char + *FORMAT, ...) + + -- Function: int u16_asprintf (uint16_t **RESULTP, const char *FORMAT, + ...) + + -- Function: uint16_t * u16_asnprintf (uint16_t *RESULTBUF, size_t + *LENGTHP, const char *FORMAT, ...) + + -- Function: int u16_vsprintf (uint16_t *BUF, const char *FORMAT, + va_list ap) + + -- Function: int u16_vsnprintf (uint16_t *BUF, size_t SIZE, const char + *FORMAT, va_list AP) + + -- Function: int u16_vasprintf (uint16_t **RESULTP, const char + *FORMAT, va_list AP) + + -- Function: uint16_t * u16_vasnprintf (uint16_t *resultbuf, size_t + *LENGTHP, const char *FORMAT, va_list AP) + + The following functions take an UTF-16 format string and return a +result in UTF-16 format. + + -- Function: int u16_u16_sprintf (uint16_t *BUF, const uint16_t + *FORMAT, ...) + + -- Function: int u16_u16_snprintf (uint16_t *BUF, size_t SIZE, const + uint16_t *FORMAT, ...) + + -- Function: int u16_u16_asprintf (uint16_t **RESULTP, const uint16_t + *FORMAT, ...) + + -- Function: uint16_t * u16_u16_asnprintf (uint16_t *resultbuf, size_t + *LENGTHP, const uint16_t *FORMAT, ...) + + -- Function: int u16_u16_vsprintf (uint16_t *BUF, const uint16_t + *FORMAT, va_list AP) + + -- Function: int u16_u16_vsnprintf (uint16_t *BUF, size_t SIZE, const + uint16_t *FORMAT, va_list AP) + + -- Function: int u16_u16_vasprintf (uint16_t **RESULTP, const uint16_t + *FORMAT, va_list AP) + + -- Function: uint16_t * u16_u16_vasnprintf (uint16_t *resultbuf, + size_t *LENGTHP, const uint16_t *FORMAT, va_list AP) + + The following functions take an ASCII format string and return a +result in UTF-32 format. + + -- Function: int u32_sprintf (uint32_t *BUF, const char *FORMAT, ...) + + -- Function: int u32_snprintf (uint32_t *BUF, size_t SIZE, const char + *FORMAT, ...) + + -- Function: int u32_asprintf (uint32_t **RESULTP, const char *FORMAT, + ...) + + -- Function: uint32_t * u32_asnprintf (uint32_t *RESULTBUF, size_t + *LENGTHP, const char *FORMAT, ...) + + -- Function: int u32_vsprintf (uint32_t *BUF, const char *FORMAT, + va_list ap) + + -- Function: int u32_vsnprintf (uint32_t *BUF, size_t SIZE, const char + *FORMAT, va_list AP) + + -- Function: int u32_vasprintf (uint32_t **RESULTP, const char + *FORMAT, va_list AP) + + -- Function: uint32_t * u32_vasnprintf (uint32_t *resultbuf, size_t + *LENGTHP, const char *FORMAT, va_list AP) + + The following functions take an UTF-32 format string and return a +result in UTF-32 format. + + -- Function: int u32_u32_sprintf (uint32_t *BUF, const uint32_t + *FORMAT, ...) + + -- Function: int u32_u32_snprintf (uint32_t *BUF, size_t SIZE, const + uint32_t *FORMAT, ...) + + -- Function: int u32_u32_asprintf (uint32_t **RESULTP, const uint32_t + *FORMAT, ...) + + -- Function: uint32_t * u32_u32_asnprintf (uint32_t *resultbuf, size_t + *LENGTHP, const uint32_t *FORMAT, ...) + + -- Function: int u32_u32_vsprintf (uint32_t *BUF, const uint32_t + *FORMAT, va_list AP) + + -- Function: int u32_u32_vsnprintf (uint32_t *BUF, size_t SIZE, const + uint32_t *FORMAT, va_list AP) + + -- Function: int u32_u32_vasprintf (uint32_t **RESULTP, const uint32_t + *FORMAT, va_list AP) + + -- Function: uint32_t * u32_u32_vasnprintf (uint32_t *resultbuf, + size_t *LENGTHP, const uint32_t *FORMAT, va_list AP) + + The following functions take an ASCII format string and produce +output in locale encoding to a `FILE' stream. + + -- Function: int ulc_fprintf (FILE *STREAM, const char *FORMAT, ...) + + -- Function: int ulc_vfprintf (FILE *STREAM, const char *FORMAT, + va_list AP) + + +File: libunistring.info, Node: uniname.h, Next: unictype.h, Prev: unistdio.h, Up: Top + +7 Names of Unicode characters `' +******************************************* + + This include file implements the association between a Unicode +character and its name. + + The name of a Unicode character allows to distinguish it from other, +similar looking characters. For example, the character `x' has the name +`"LATIN SMALL LETTER X"' and is therefore different from the character +named `"MULTIPLICATION SIGN"'. + + -- Macro: unsigned int UNINAME_MAX + This macro expands to a constant that is the required size of + buffer for a Unicode character name. + + -- Function: char * unicode_character_name (ucs4_t UC, char *BUF) + Looks up the name of a Unicode character, in uppercase ASCII. BUF + must point to a buffer, at least `UNINAME_MAX' bytes in size. + Returns the filled BUF, or NULL if the character does not have a + name. + + -- Function: ucs4_t unicode_name_character (const char *NAME) + Looks up the Unicode character with a given name, in upper- or + lowercase ASCII. Returns the character if found, or + `UNINAME_INVALID' if not found. + + -- Macro: ucs4_t UNINAME_INVALID + This macro expands to a constant that is a special return value of + the `unicode_name_character' function. + + +File: libunistring.info, Node: unictype.h, Next: uniwidth.h, Prev: uniname.h, Up: Top + +8 Unicode character classification and properties `' +**************************************************************** + + This include file declares functions that classify Unicode characters +and that test whether Unicode characters have specific properties. + + The classification assigns a "general category" to every Unicode +character. This is similar to the classification provided by ISO C in +`'. + + Properties are the data that guides various text processing +algorithms in the presence of specific Unicode characters. + +* Menu: + +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: + + +File: libunistring.info, Node: General category, Next: Canonical combining class, Up: unictype.h + +8.1 General category +==================== + + Every Unicode character or code point has a _general category_ +assigned to it. This classification is important for most algorithms +that work on Unicode text. + + The GNU libunistring library provides two kinds of API for working +with general categories. The object oriented API uses a variable to +denote every predefined general category value or combinations thereof. +The low-level API uses a bit mask instead. The advantage of the object +oriented API is that if only a few predefined general category values +are used, the data tables are relatively small. When you combine +general category values (using `uc_general_category_or', +`uc_general_category_and', or `uc_general_category_and_not'), or when +you use the low level bit masks, a big table is used thats holds the +complete general category information for all Unicode characters. + +* Menu: + +* Object oriented API:: +* Bit mask API:: + + +File: libunistring.info, Node: Object oriented API, Next: Bit mask API, Up: General category + +8.1.1 The object oriented API for general category +-------------------------------------------------- + + -- Type: uc_general_category_t + This data type denotes a general category value. It is an + immediate type that can be copied by simple assignment, without + involving memory allocation. It is not an array type. + + The following are the predefined general category value. Additional +general categories may be added in the future. + + -- Constant: uc_general_category_t UC_CATEGORY_L + -- Constant: uc_general_category_t UC_CATEGORY_Lu + -- Constant: uc_general_category_t UC_CATEGORY_Ll + -- Constant: uc_general_category_t UC_CATEGORY_Lt + -- Constant: uc_general_category_t UC_CATEGORY_Lm + -- Constant: uc_general_category_t UC_CATEGORY_Lo + -- Constant: uc_general_category_t UC_CATEGORY_M + -- Constant: uc_general_category_t UC_CATEGORY_Mn + -- Constant: uc_general_category_t UC_CATEGORY_Mc + -- Constant: uc_general_category_t UC_CATEGORY_Me + -- Constant: uc_general_category_t UC_CATEGORY_N + -- Constant: uc_general_category_t UC_CATEGORY_Nd + -- Constant: uc_general_category_t UC_CATEGORY_Nl + -- Constant: uc_general_category_t UC_CATEGORY_No + -- Constant: uc_general_category_t UC_CATEGORY_P + -- Constant: uc_general_category_t UC_CATEGORY_Pc + -- Constant: uc_general_category_t UC_CATEGORY_Pd + -- Constant: uc_general_category_t UC_CATEGORY_Ps + -- Constant: uc_general_category_t UC_CATEGORY_Pe + -- Constant: uc_general_category_t UC_CATEGORY_Pi + -- Constant: uc_general_category_t UC_CATEGORY_Pf + -- Constant: uc_general_category_t UC_CATEGORY_Po + -- Constant: uc_general_category_t UC_CATEGORY_S + -- Constant: uc_general_category_t UC_CATEGORY_Sm + -- Constant: uc_general_category_t UC_CATEGORY_Sc + -- Constant: uc_general_category_t UC_CATEGORY_Sk + -- Constant: uc_general_category_t UC_CATEGORY_So + -- Constant: uc_general_category_t UC_CATEGORY_Z + -- Constant: uc_general_category_t UC_CATEGORY_Zs + -- Constant: uc_general_category_t UC_CATEGORY_Zl + -- Constant: uc_general_category_t UC_CATEGORY_Zp + -- Constant: uc_general_category_t UC_CATEGORY_C + -- Constant: uc_general_category_t UC_CATEGORY_Cc + -- Constant: uc_general_category_t UC_CATEGORY_Cf + -- Constant: uc_general_category_t UC_CATEGORY_Cs + -- Constant: uc_general_category_t UC_CATEGORY_Co + -- Constant: uc_general_category_t UC_CATEGORY_Cn + + The following are alias names for predefined General category values. + + -- Macro: uc_general_category_t UC_LETTER + This is another name for `UC_CATEGORY_L'. + + -- Macro: uc_general_category_t UC_UPPERCASE_LETTER + This is another name for `UC_CATEGORY_Lu'. + + -- Macro: uc_general_category_t UC_LOWERCASE_LETTER + This is another name for `UC_CATEGORY_Ll'. + + -- Macro: uc_general_category_t UC_TITLECASE_LETTER + This is another name for `UC_CATEGORY_Lt'. + + -- Macro: uc_general_category_t UC_MODIFIER_LETTER + This is another name for `UC_CATEGORY_Lm'. + + -- Macro: uc_general_category_t UC_OTHER_LETTER + This is another name for `UC_CATEGORY_Lo'. + + -- Macro: uc_general_category_t UC_MARK + This is another name for `UC_CATEGORY_M'. + + -- Macro: uc_general_category_t UC_NON_SPACING_MARK + This is another name for `UC_CATEGORY_Mn'. + + -- Macro: uc_general_category_t UC_COMBINING_SPACING_MARK + This is another name for `UC_CATEGORY_Mc'. + + -- Macro: uc_general_category_t UC_ENCLOSING_MARK + This is another name for `UC_CATEGORY_Me'. + + -- Macro: uc_general_category_t UC_NUMBER + This is another name for `UC_CATEGORY_N'. + + -- Macro: uc_general_category_t UC_DECIMAL_DIGIT_NUMBER + This is another name for `UC_CATEGORY_Nd'. + + -- Macro: uc_general_category_t UC_LETTER_NUMBER + This is another name for `UC_CATEGORY_Nl'. + + -- Macro: uc_general_category_t UC_OTHER_NUMBER + This is another name for `UC_CATEGORY_No'. + + -- Macro: uc_general_category_t UC_PUNCTUATION + This is another name for `UC_CATEGORY_P'. + + -- Macro: uc_general_category_t UC_CONNECTOR_PUNCTUATION + This is another name for `UC_CATEGORY_Pc'. + + -- Macro: uc_general_category_t UC_DASH_PUNCTUATION + This is another name for `UC_CATEGORY_Pd'. + + -- Macro: uc_general_category_t UC_OPEN_PUNCTUATION + This is another name for `UC_CATEGORY_Ps' ("start punctuation"). + + -- Macro: uc_general_category_t UC_CLOSE_PUNCTUATION + This is another name for `UC_CATEGORY_Pe' ("end punctuation"). + + -- Macro: uc_general_category_t UC_INITIAL_QUOTE_PUNCTUATION + This is another name for `UC_CATEGORY_Pi'. + + -- Macro: uc_general_category_t UC_FINAL_QUOTE_PUNCTUATION + This is another name for `UC_CATEGORY_Pf'. + + -- Macro: uc_general_category_t UC_OTHER_PUNCTUATION + This is another name for `UC_CATEGORY_Po'. + + -- Macro: uc_general_category_t UC_SYMBOL + This is another name for `UC_CATEGORY_S'. + + -- Macro: uc_general_category_t UC_MATH_SYMBOL + This is another name for `UC_CATEGORY_Sm'. + + -- Macro: uc_general_category_t UC_CURRENCY_SYMBOL + This is another name for `UC_CATEGORY_Sc'. + + -- Macro: uc_general_category_t UC_MODIFIER_SYMBOL + This is another name for `UC_CATEGORY_Sk'. + + -- Macro: uc_general_category_t UC_OTHER_SYMBOL + This is another name for `UC_CATEGORY_So'. + + -- Macro: uc_general_category_t UC_SEPARATOR + This is another name for `UC_CATEGORY_Z'. + + -- Macro: uc_general_category_t UC_SPACE_SEPARATOR + This is another name for `UC_CATEGORY_Zs'. + + -- Macro: uc_general_category_t UC_LINE_SEPARATOR + This is another name for `UC_CATEGORY_Zl'. + + -- Macro: uc_general_category_t UC_PARAGRAPH_SEPARATOR + This is another name for `UC_CATEGORY_Zp'. + + -- Macro: uc_general_category_t UC_OTHER + This is another name for `UC_CATEGORY_C'. + + -- Macro: uc_general_category_t UC_CONTROL + This is another name for `UC_CATEGORY_Cc'. + + -- Macro: uc_general_category_t UC_FORMAT + This is another name for `UC_CATEGORY_Cf'. + + -- Macro: uc_general_category_t UC_SURROGATE + This is another name for `UC_CATEGORY_Cs'. All code points in this + category are invalid characters. + + -- Macro: uc_general_category_t UC_PRIVATE_USE + This is another name for `UC_CATEGORY_Co'. + + -- Macro: uc_general_category_t UC_UNASSIGNED + This is another name for `UC_CATEGORY_Cn'. Some code points in + this category are invalid characters. + + The following functions combine general categories, like in a +boolean algebra, except that there is no `not' operation. + + -- Function: uc_general_category_t uc_general_category_or + (uc_general_category_t CATEGORY1, uc_general_category_t + CATEGORY2) + Returns the union of two general categories. This corresponds to + the unions of the two sets of characters. + + -- Function: uc_general_category_t uc_general_category_and + (uc_general_category_t CATEGORY1, uc_general_category_t + CATEGORY2) + Returns the intersection of two general categories as bit masks. + This _does not_ correspond to the intersection of the two sets of + characters. + + -- Function: uc_general_category_t uc_general_category_and_not + (uc_general_category_t CATEGORY1, uc_general_category_t + CATEGORY2) + Returns the intersection of a general category with the complement + of a second general category, as bit masks. This _does not_ + correspond to the intersection with complement, when viewing the + categories as sets of characters. + + The following functions associate general categories with their name. + + -- Function: const char * uc_general_category_name + (uc_general_category_t CATEGORY) + Returns the name of a general category. Returns NULL if the + general category corresponds to a bit mask that does not have a + name. + + -- Function: uc_general_category_t uc_general_category_byname (const + char *CATEGORY_NAME) + Returns the general category given by name, e.g. `"Lu"'. + + The following functions view general categories as sets of Unicode +characters. + + -- Function: uc_general_category_t uc_general_category (ucs4_t UC) + Returns the general category of a Unicode character. + + This function uses a big table. + + -- Function: bool uc_is_general_category (ucs4_t UC, + uc_general_category_t CATEGORY) + Tests whether a Unicode character belongs to a given category. + The CATEGORY argument can be a predefined general category or the + combination of several predefined general categories. + + +File: libunistring.info, Node: Bit mask API, Prev: Object oriented API, Up: General category + +8.1.2 The bit mask API for general category +------------------------------------------- + + The following are the predefined general category value as bit masks. +Additional general categories may be added in the future. + + -- Macro: uint32_t UC_CATEGORY_MASK_L + -- Macro: uint32_t UC_CATEGORY_MASK_Lu + -- Macro: uint32_t UC_CATEGORY_MASK_Ll + -- Macro: uint32_t UC_CATEGORY_MASK_Lt + -- Macro: uint32_t UC_CATEGORY_MASK_Lm + -- Macro: uint32_t UC_CATEGORY_MASK_Lo + -- Macro: uint32_t UC_CATEGORY_MASK_M + -- Macro: uint32_t UC_CATEGORY_MASK_Mn + -- Macro: uint32_t UC_CATEGORY_MASK_Mc + -- Macro: uint32_t UC_CATEGORY_MASK_Me + -- Macro: uint32_t UC_CATEGORY_MASK_N + -- Macro: uint32_t UC_CATEGORY_MASK_Nd + -- Macro: uint32_t UC_CATEGORY_MASK_Nl + -- Macro: uint32_t UC_CATEGORY_MASK_No + -- Macro: uint32_t UC_CATEGORY_MASK_P + -- Macro: uint32_t UC_CATEGORY_MASK_Pc + -- Macro: uint32_t UC_CATEGORY_MASK_Pd + -- Macro: uint32_t UC_CATEGORY_MASK_Ps + -- Macro: uint32_t UC_CATEGORY_MASK_Pe + -- Macro: uint32_t UC_CATEGORY_MASK_Pi + -- Macro: uint32_t UC_CATEGORY_MASK_Pf + -- Macro: uint32_t UC_CATEGORY_MASK_Po + -- Macro: uint32_t UC_CATEGORY_MASK_S + -- Macro: uint32_t UC_CATEGORY_MASK_Sm + -- Macro: uint32_t UC_CATEGORY_MASK_Sc + -- Macro: uint32_t UC_CATEGORY_MASK_Sk + -- Macro: uint32_t UC_CATEGORY_MASK_So + -- Macro: uint32_t UC_CATEGORY_MASK_Z + -- Macro: uint32_t UC_CATEGORY_MASK_Zs + -- Macro: uint32_t UC_CATEGORY_MASK_Zl + -- Macro: uint32_t UC_CATEGORY_MASK_Zp + -- Macro: uint32_t UC_CATEGORY_MASK_C + -- Macro: uint32_t UC_CATEGORY_MASK_Cc + -- Macro: uint32_t UC_CATEGORY_MASK_Cf + -- Macro: uint32_t UC_CATEGORY_MASK_Cs + -- Macro: uint32_t UC_CATEGORY_MASK_Co + -- Macro: uint32_t UC_CATEGORY_MASK_Cn + + The following function views general categories as sets of Unicode +characters. + + -- Function: bool uc_is_general_category_withtable (ucs4_t UC, + uint32_t BITMASK) + Tests whether a Unicode character belongs to a given category. + The BITMASK argument can be a predefined general category bitmask + or the combination of several predefined general category bitmasks. + + This function uses a big table comprising all general categories. + + +File: libunistring.info, Node: Canonical combining class, Next: Bidirectional category, Prev: General category, Up: unictype.h + +8.2 Canonical combining class +============================= + + Every Unicode character or code point has a _canonical combining +class_ assigned to it. + + What is the meaning of the canonical combining class? Essentially, +it indicates the priority with which a combining character is attached +to its base character. The characters for which the canonical +combining class is 0 are the base characters, and the characters for +which it is greater than 0 are the combining characters. Combining +characters are rendered near/attached/around their base character, and +combining characters with small combining classes are attached "first" +or "closer" to the base character. + + The canonical combining class of a character is a number in the range +0..255. The possible values are described in the Unicode Character +Database `http://www.unicode.org/Public/UNIDATA/UCD.html'. The list +here is not definitive; more values can be added in future versions. + + -- Constant: int UC_CCC_NR + The canonical combining class value for "Not Reordered" characters. + The value is 0. + + -- Constant: int UC_CCC_OV + The canonical combining class value for "Overlay" characters. + + -- Constant: int UC_CCC_NK + The canonical combining class value for "Nukta" characters. + + -- Constant: int UC_CCC_KV + The canonical combining class value for "Kana Voicing" characters. + + -- Constant: int UC_CCC_VR + The canonical combining class value for "Virama" characters. + + -- Constant: int UC_CCC_ATBL + The canonical combining class value for "Attached Below Left" + characters. + + -- Constant: int UC_CCC_ATB + The canonical combining class value for "Attached Below" + characters. + + -- Constant: int UC_CCC_ATAR + The canonical combining class value for "Attached Above Right" + characters. + + -- Constant: int UC_CCC_BL + The canonical combining class value for "Below Left" characters. + + -- Constant: int UC_CCC_B + The canonical combining class value for "Below" characters. + + -- Constant: int UC_CCC_BR + The canonical combining class value for "Below Right" characters. + + -- Constant: int UC_CCC_L + The canonical combining class value for "Left" characters. + + -- Constant: int UC_CCC_R + The canonical combining class value for "Right" characters. + + -- Constant: int UC_CCC_AL + The canonical combining class value for "Above Left" characters. + + -- Constant: int UC_CCC_A + The canonical combining class value for "Above" characters. + + -- Constant: int UC_CCC_AR + The canonical combining class value for "Above Right" characters. + + -- Constant: int UC_CCC_DB + The canonical combining class value for "Double Below" characters. + + -- Constant: int UC_CCC_DA + The canonical combining class value for "Double Above" characters. + + -- Constant: int UC_CCC_IS + The canonical combining class value for "Iota Subscript" + characters. + + The following function looks up the canonical combining class of a +character. + + -- Function: int uc_combining_class (ucs4_t UC) + Returns the canonical combining class of a Unicode character. + + +File: libunistring.info, Node: Bidirectional category, Next: Decimal digit value, Prev: Canonical combining class, Up: unictype.h + +8.3 Bidirectional category +========================== + + Every Unicode character or code point has a _bidirectional category_ +assigned to it. + + The bidirectional category guides the bidirectional algorithm +(`http://www.unicode.org/reports/tr9/'). The possible values are the +following. + + -- Constant: int UC_BIDI_L + The bidirectional category for `Left-to-Right`" characters. + + -- Constant: int UC_BIDI_LRE + The bidirectional category for "Left-to-Right Embedding" + characters. + + -- Constant: int UC_BIDI_LRO + The bidirectional category for "Left-to-Right Override" characters. + + -- Constant: int UC_BIDI_R + The bidirectional category for "Right-to-Left" characters. + + -- Constant: int UC_BIDI_AL + The bidirectional category for "Right-to-Left Arabic" characters. + + -- Constant: int UC_BIDI_RLE + The bidirectional category for "Right-to-Left Embedding" + characters. + + -- Constant: int UC_BIDI_RLO + The bidirectional category for "Right-to-Left Override" characters. + + -- Constant: int UC_BIDI_PDF + The bidirectional category for "Pop Directional Format" characters. + + -- Constant: int UC_BIDI_EN + The bidirectional category for "European Number" characters. + + -- Constant: int UC_BIDI_ES + The bidirectional category for "European Number Separator" + characters. + + -- Constant: int UC_BIDI_ET + The bidirectional category for "European Number Terminator" + characters. + + -- Constant: int UC_BIDI_AN + The bidirectional category for "Arabic Number" characters. + + -- Constant: int UC_BIDI_CS + The bidirectional category for "Common Number Separator" + characters. + + -- Constant: int UC_BIDI_NSM + The bidirectional category for "Non-Spacing Mark" characters. + + -- Constant: int UC_BIDI_BN + The bidirectional category for "Boundary Neutral" characters. + + -- Constant: int UC_BIDI_B + The bidirectional category for "Paragraph Separator" characters. + + -- Constant: int UC_BIDI_S + The bidirectional category for "Segment Separator" characters. + + -- Constant: int UC_BIDI_WS + The bidirectional category for "Whitespace" characters. + + -- Constant: int UC_BIDI_ON + The bidirectional category for "Other Neutral" characters. + + The following functions implement the association between a +bidirectional category and its name. + + -- Function: const char * uc_bidi_category_name (int CATEGORY) + Returns the name of a bidirectional category. + + -- Function: int uc_bidi_category_byname (const char *CATEGORY_NAME) + Returns the bidirectional category given by name, e.g. `"LRE"'. + + The following functions view bidirectional categories as sets of +Unicode characters. + + -- Function: int uc_bidi_category (ucs4_t UC) + Returns the bidirectional category of a Unicode character. + + -- Function: bool uc_is_bidi_category (ucs4_t UC, int CATEGORY) + Tests whether a Unicode character belongs to a given bidirectional + category. + + +File: libunistring.info, Node: Decimal digit value, Next: Digit value, Prev: Bidirectional category, Up: unictype.h + +8.4 Decimal digit value +======================= + + Decimal digits (like the digits from `0' to `9') exist in many +scripts. The following function converts a decimal digit character to +its numerical value. + + -- Function: int uc_decimal_value (ucs4_t UC) + Returns the decimal digit value of a Unicode character. The + return value is an integer in the range 0..9, or -1 for characters + that do not represent a decimal digit. + + +File: libunistring.info, Node: Digit value, Next: Numeric value, Prev: Decimal digit value, Up: unictype.h + +8.5 Digit value +=============== + + Digit characters are like decimal digit characters, possibly in +special forms, like as superscript, subscript, or circled. The +following function converts a digit character to its numerical value. + + -- Function: int uc_digit_value (ucs4_t UC) + Returns the digit value of a Unicode character. The return value + is an integer in the range 0..9, or -1 for characters that do not + represent a digit. + + +File: libunistring.info, Node: Numeric value, Next: Mirrored character, Prev: Digit value, Up: unictype.h + +8.6 Numeric value +================= + + There are also characters that represent numbers without a digit +system, like the Roman numerals, and fractional numbers, like 1/4 or +3/4. + + The following type represents the numeric value of a Unicode +character. + + -- Type: uc_fraction_t + This is a structure type with the following fields: + int numerator; + int denominator; + An integer N is represented by `numerator = N', `denominator = 1'. + + The following function converts a number character to its numerical +value. + + -- Function: uc_fraction_t uc_numeric_value (ucs4_t UC) + Returns the numeric value of a Unicode character. The return + value is a fraction, or the pseudo-fraction `{ 0, 0 }' for + characters that do not represent a number. + + +File: libunistring.info, Node: Mirrored character, Next: Properties, Prev: Numeric value, Up: unictype.h + +8.7 Mirrored character +====================== + + Character mirroring is used to associate the closing parenthesis +character to the opening parenthesis character, the closing brace +character with the opening brace character, and so on. + + The following function looks up the mirrored character of a Unicode +character. + + -- Function: bool uc_mirror_char (ucs4_t UC, ucs4_t *PUC) + Stores the mirrored character of a Unicode character UC in `*PUC' + and returns `true', if it exists. Otherwise it stores UC + unmodified in `*PUC' and returns `false'. + + +File: libunistring.info, Node: Properties, Next: Scripts, Prev: Mirrored character, Up: unictype.h + +8.8 Properties +============== + + This section defines boolean properties of Unicode characters. This +means, a character either has the given property or does not have it. +In other words, the property can be viewed as a subset of the set of +Unicode characters. + + The GNU libunistring library provides two kinds of API for working +with properties. The object oriented API uses a type `uc_property_t' +to designate a property. In the function-based API, which is a bit more +low level, a property is merely a function. + +* Menu: + +* Properties as objects:: +* Properties as functions:: + + +File: libunistring.info, Node: Properties as objects, Next: Properties as functions, Up: Properties + +8.8.1 Properties as objects - the object oriented API +----------------------------------------------------- + + The following type designates a property on Unicode characters. + + -- Type: uc_property_t + This data type denotes a boolean property on Unicode characters. + It is an immediate type that can be copied by simple assignment, + without involving memory allocation. It is not an array type. + + Many Unicode properties are predefined. + + The following are general properties. + + -- Constant: uc_property_t UC_PROPERTY_WHITE_SPACE + -- Constant: uc_property_t UC_PROPERTY_ALPHABETIC + -- Constant: uc_property_t UC_PROPERTY_OTHER_ALPHABETIC + -- Constant: uc_property_t UC_PROPERTY_NOT_A_CHARACTER + -- Constant: uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT + -- Constant: uc_property_t +UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT + -- Constant: uc_property_t UC_PROPERTY_DEPRECATED + -- Constant: uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION + -- Constant: uc_property_t UC_PROPERTY_VARIATION_SELECTOR + -- Constant: uc_property_t UC_PROPERTY_PRIVATE_USE + -- Constant: uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE + + The following properties are related to case folding. + + -- Constant: uc_property_t UC_PROPERTY_UPPERCASE + -- Constant: uc_property_t UC_PROPERTY_OTHER_UPPERCASE + -- Constant: uc_property_t UC_PROPERTY_LOWERCASE + -- Constant: uc_property_t UC_PROPERTY_OTHER_LOWERCASE + -- Constant: uc_property_t UC_PROPERTY_TITLECASE + -- Constant: uc_property_t UC_PROPERTY_SOFT_DOTTED + + The following properties are related to identifiers. + + -- Constant: uc_property_t UC_PROPERTY_ID_START + -- Constant: uc_property_t UC_PROPERTY_OTHER_ID_START + -- Constant: uc_property_t UC_PROPERTY_ID_CONTINUE + -- Constant: uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE + -- Constant: uc_property_t UC_PROPERTY_XID_START + -- Constant: uc_property_t UC_PROPERTY_XID_CONTINUE + -- Constant: uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE + -- Constant: uc_property_t UC_PROPERTY_PATTERN_SYNTAX + + The following properties have an influence on shaping and rendering. + + -- Constant: uc_property_t UC_PROPERTY_JOIN_CONTROL + -- Constant: uc_property_t UC_PROPERTY_GRAPHEME_BASE + -- Constant: uc_property_t UC_PROPERTY_GRAPHEME_EXTEND + -- Constant: uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND + -- Constant: uc_property_t UC_PROPERTY_GRAPHEME_LINK + + The following properties relate to bidirectional reordering. + + -- Constant: uc_property_t UC_PROPERTY_BIDI_CONTROL + -- Constant: uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT + -- Constant: uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT + -- Constant: uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT + -- Constant: uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT + -- Constant: uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT + -- Constant: uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_BIDI_WHITESPACE + -- Constant: uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK + -- Constant: uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL + -- Constant: uc_property_t UC_PROPERTY_BIDI_PDF + -- Constant: uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE + -- Constant: uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL + + The following properties deal with number representations. + + -- Constant: uc_property_t UC_PROPERTY_HEX_DIGIT + -- Constant: uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT + + The following properties deal with CJK. + + -- Constant: uc_property_t UC_PROPERTY_IDEOGRAPHIC + -- Constant: uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH + -- Constant: uc_property_t UC_PROPERTY_RADICAL + -- Constant: uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR + -- Constant: uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR + + Other miscellaneous properties are: + + -- Constant: uc_property_t UC_PROPERTY_ZERO_WIDTH + -- Constant: uc_property_t UC_PROPERTY_SPACE + -- Constant: uc_property_t UC_PROPERTY_NON_BREAK + -- Constant: uc_property_t UC_PROPERTY_ISO_CONTROL + -- Constant: uc_property_t UC_PROPERTY_FORMAT_CONTROL + -- Constant: uc_property_t UC_PROPERTY_DASH + -- Constant: uc_property_t UC_PROPERTY_HYPHEN + -- Constant: uc_property_t UC_PROPERTY_PUNCTUATION + -- Constant: uc_property_t UC_PROPERTY_LINE_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR + -- Constant: uc_property_t UC_PROPERTY_QUOTATION_MARK + -- Constant: uc_property_t UC_PROPERTY_SENTENCE_TERMINAL + -- Constant: uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION + -- Constant: uc_property_t UC_PROPERTY_CURRENCY_SYMBOL + -- Constant: uc_property_t UC_PROPERTY_MATH + -- Constant: uc_property_t UC_PROPERTY_OTHER_MATH + -- Constant: uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION + -- Constant: uc_property_t UC_PROPERTY_LEFT_OF_PAIR + -- Constant: uc_property_t UC_PROPERTY_COMBINING + -- Constant: uc_property_t UC_PROPERTY_COMPOSITE + -- Constant: uc_property_t UC_PROPERTY_DECIMAL_DIGIT + -- Constant: uc_property_t UC_PROPERTY_NUMERIC + -- Constant: uc_property_t UC_PROPERTY_DIACRITIC + -- Constant: uc_property_t UC_PROPERTY_EXTENDER + -- Constant: uc_property_t UC_PROPERTY_IGNORABLE_CONTROL + + The following function looks up a property by its name. + + -- Function: uc_property_t uc_property_byname (const char + *PROPERTY_NAME) + Returns the property given by name, e.g. `"White space"'. If a + property with the given name exists, the result will satisfy the + `uc_property_is_valid' predicate. Otherwise the result will not + satisfy this predicate and must not be passed to functions that + expect an `uc_property_t' argument. + + This function references a big table of all predefined properties. + Its use can significantly increase the size of your application. + + -- Function: bool uc_property_is_valid (uc_property_t property) + Returns `true' when the given property is valid, or `false' + otherwise. + + The following function views a property as a set of Unicode +characters. + + -- Function: bool uc_is_property (ucs4_t UC, uc_property_t PROPERTY) + Tests whether the Unicode character UC has the given property. + + +File: libunistring.info, Node: Properties as functions, Prev: Properties as objects, Up: Properties + +8.8.2 Properties as functions - the functional API +-------------------------------------------------- + + The following are general properties. + + -- Function: bool uc_is_property_white_space (ucs4_t UC) + -- Function: bool uc_is_property_alphabetic (ucs4_t UC) + -- Function: bool uc_is_property_other_alphabetic (ucs4_t UC) + -- Function: bool uc_is_property_not_a_character (ucs4_t UC) + -- Function: bool uc_is_property_default_ignorable_code_point (ucs4_t + UC) + -- Function: bool uc_is_property_other_default_ignorable_code_point + (ucs4_t UC) + -- Function: bool uc_is_property_deprecated (ucs4_t UC) + -- Function: bool uc_is_property_logical_order_exception (ucs4_t UC) + -- Function: bool uc_is_property_variation_selector (ucs4_t UC) + -- Function: bool uc_is_property_private_use (ucs4_t UC) + -- Function: bool uc_is_property_unassigned_code_value (ucs4_t UC) + + The following properties are related to case folding. + + -- Function: bool uc_is_property_uppercase (ucs4_t UC) + -- Function: bool uc_is_property_other_uppercase (ucs4_t UC) + -- Function: bool uc_is_property_lowercase (ucs4_t UC) + -- Function: bool uc_is_property_other_lowercase (ucs4_t UC) + -- Function: bool uc_is_property_titlecase (ucs4_t UC) + -- Function: bool uc_is_property_soft_dotted (ucs4_t UC) + + The following properties are related to identifiers. + + -- Function: bool uc_is_property_id_start (ucs4_t UC) + -- Function: bool uc_is_property_other_id_start (ucs4_t UC) + -- Function: bool uc_is_property_id_continue (ucs4_t UC) + -- Function: bool uc_is_property_other_id_continue (ucs4_t UC) + -- Function: bool uc_is_property_xid_start (ucs4_t UC) + -- Function: bool uc_is_property_xid_continue (ucs4_t UC) + -- Function: bool uc_is_property_pattern_white_space (ucs4_t UC) + -- Function: bool uc_is_property_pattern_syntax (ucs4_t UC) + + The following properties have an influence on shaping and rendering. + + -- Function: bool uc_is_property_join_control (ucs4_t UC) + -- Function: bool uc_is_property_grapheme_base (ucs4_t UC) + -- Function: bool uc_is_property_grapheme_extend (ucs4_t UC) + -- Function: bool uc_is_property_other_grapheme_extend (ucs4_t UC) + -- Function: bool uc_is_property_grapheme_link (ucs4_t UC) + + The following properties relate to bidirectional reordering. + + -- Function: bool uc_is_property_bidi_control (ucs4_t UC) + -- Function: bool uc_is_property_bidi_left_to_right (ucs4_t UC) + -- Function: bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t UC) + -- Function: bool uc_is_property_bidi_arabic_right_to_left (ucs4_t UC) + -- Function: bool uc_is_property_bidi_european_digit (ucs4_t UC) + -- Function: bool uc_is_property_bidi_eur_num_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_eur_num_terminator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_arabic_digit (ucs4_t UC) + -- Function: bool uc_is_property_bidi_common_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_block_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_segment_separator (ucs4_t UC) + -- Function: bool uc_is_property_bidi_whitespace (ucs4_t UC) + -- Function: bool uc_is_property_bidi_non_spacing_mark (ucs4_t UC) + -- Function: bool uc_is_property_bidi_boundary_neutral (ucs4_t UC) + -- Function: bool uc_is_property_bidi_pdf (ucs4_t UC) + -- Function: bool uc_is_property_bidi_embedding_or_override (ucs4_t UC) + -- Function: bool uc_is_property_bidi_other_neutral (ucs4_t UC) + + The following properties deal with number representations. + + -- Function: bool uc_is_property_hex_digit (ucs4_t UC) + -- Function: bool uc_is_property_ascii_hex_digit (ucs4_t UC) + + The following properties deal with CJK. + + -- Function: bool uc_is_property_ideographic (ucs4_t UC) + -- Function: bool uc_is_property_unified_ideograph (ucs4_t UC) + -- Function: bool uc_is_property_radical (ucs4_t UC) + -- Function: bool uc_is_property_ids_binary_operator (ucs4_t UC) + -- Function: bool uc_is_property_ids_trinary_operator (ucs4_t UC) + + Other miscellaneous properties are: + + -- Function: bool uc_is_property_zero_width (ucs4_t UC) + -- Function: bool uc_is_property_space (ucs4_t UC) + -- Function: bool uc_is_property_non_break (ucs4_t UC) + -- Function: bool uc_is_property_iso_control (ucs4_t UC) + -- Function: bool uc_is_property_format_control (ucs4_t UC) + -- Function: bool uc_is_property_dash (ucs4_t UC) + -- Function: bool uc_is_property_hyphen (ucs4_t UC) + -- Function: bool uc_is_property_punctuation (ucs4_t UC) + -- Function: bool uc_is_property_line_separator (ucs4_t UC) + -- Function: bool uc_is_property_paragraph_separator (ucs4_t UC) + -- Function: bool uc_is_property_quotation_mark (ucs4_t UC) + -- Function: bool uc_is_property_sentence_terminal (ucs4_t UC) + -- Function: bool uc_is_property_terminal_punctuation (ucs4_t UC) + -- Function: bool uc_is_property_currency_symbol (ucs4_t UC) + -- Function: bool uc_is_property_math (ucs4_t UC) + -- Function: bool uc_is_property_other_math (ucs4_t UC) + -- Function: bool uc_is_property_paired_punctuation (ucs4_t UC) + -- Function: bool uc_is_property_left_of_pair (ucs4_t UC) + -- Function: bool uc_is_property_combining (ucs4_t UC) + -- Function: bool uc_is_property_composite (ucs4_t UC) + -- Function: bool uc_is_property_decimal_digit (ucs4_t UC) + -- Function: bool uc_is_property_numeric (ucs4_t UC) + -- Function: bool uc_is_property_diacritic (ucs4_t UC) + -- Function: bool uc_is_property_extender (ucs4_t UC) + -- Function: bool uc_is_property_ignorable_control (ucs4_t UC) + + +File: libunistring.info, Node: Scripts, Next: Blocks, Prev: Properties, Up: unictype.h + +8.9 Scripts +=========== + + The Unicode characters are subdivided into scripts. + + The following type is used to represent a script: + + -- Type: uc_script_t + This data type is a structure type that refers to statically + allocated read-only data. It contains the following fields: + const char *name; + + The `name' field contains the name of the script. + + The following functions look up a script. + + -- Function: const uc_script_t * uc_script (ucs4_t UC) + Returns the script of a Unicode character. Returns NULL if UC + does not belong to any script. + + -- Function: const uc_script_t * uc_script_byname (const char + *SCRIPT_NAME) + Returns the script given by its name, e.g. `"HAN"'. Returns NULL + if a script with the given name does not exist. + + The following function views a script as a set of Unicode characters. + + -- Function: bool uc_is_script (ucs4_t UC, const uc_script_t *SCRIPT) + Tests whether a Unicode character belongs to a given script. + + The following gives a global picture of all scripts. + + -- Function: void uc_all_scripts (const uc_script_t **SCRIPTS, size_t + *COUNT) + Get the list of all scripts. Stores a pointer to an array of all + scripts in `*SCRIPTS' and the length of this array in `*COUNT'. + + +File: libunistring.info, Node: Blocks, Next: ISO C and Java syntax, Prev: Scripts, Up: unictype.h + +8.10 Blocks +=========== + + The Unicode characters are subdivided into blocks. A block is an +interval of Unicode code points. + + The following type is used to represent a block. + + -- Type: uc_block_t + This data type is a structure type that refers to statically + allocated data. It contains the following fields: + ucs4_t start; + ucs4_t end; + const char *name; + + The `start' field is the first Unicode code point in the block. + + The `end' field is the last Unicode code point in the block. + + The `name' field is the name of the block. + + The following function looks up a block. + + -- Function: const uc_block_t * uc_block (ucs4_t UC) + Returns the block a character belongs to. + + The following function views a block as a set of Unicode characters. + + -- Function: bool uc_is_block (ucs4_t UC, const uc_block_t *BLOCK) + Tests whether a Unicode character belongs to a given block. + + The following gives a global picture of all block. + + -- Function: void uc_all_blocks (const uc_block_t **BLOCKS, size_t + *COUNT) + Get the list of all blocks. Stores a pointer to an array of all + blocks in `*BLOCKS' and the length of this array in `*COUNT'. + + +File: libunistring.info, Node: ISO C and Java syntax, Next: Classifications like in ISO C, Prev: Blocks, Up: unictype.h + +8.11 ISO C and Java syntax +========================== + + The following properties are taken from language standards. The +supported language standards are ISO C 99 and Java. + + -- Function: bool uc_is_c_whitespace (ucs4_t UC) + Tests whether a Unicode character is considered whitespace in ISO + C 99. + + -- Function: bool uc_is_java_whitespace (ucs4_t UC) + Tests whether a Unicode character is considered whitespace in Java. + + The following enumerated values are the possible return values of +the functions `uc_c_ident_category' and `uc_java_ident_category'. + + -- Constant: int UC_IDENTIFIER_START + This return value means that the given character is valid as first + or subsequent character in an identifier. + + -- Constant: int UC_IDENTIFIER_VALID + This return value means that the given character is valid as + subsequent character only. + + -- Constant: int UC_IDENTIFIER_INVALID + This return value means that the given character is not valid in + an identifier. + + -- Constant: int UC_IDENTIFIER_IGNORABLE + This return value (only for Java) means that the given character + is ignorable. + + The following function determine whether a given character can be a +constituent of an identifier in the given programming language. + + -- Function: int uc_c_ident_category (ucs4_t UC) + Returns the categorization of a Unicode character with respect to + the ISO C 99 identifier syntax. + + -- Function: int uc_java_ident_category (ucs4_t UC) + Returns the categorization of a Unicode character with respect to + the Java identifier syntax. + + +File: libunistring.info, Node: Classifications like in ISO C, Prev: ISO C and Java syntax, Up: unictype.h + +8.12 Classifications like in ISO C +================================== + + The following character classifications mimic those declared in the +ISO C header files `' and `'. These functions are +deprecated, because this set of functions was designed with ASCII in +mind and cannot reflect the more diverse reality of the Unicode +character set. But they can be a quick-and-dirty porting aid when +migrating from `wchar_t' APIs to Unicode strings. + + -- Function: bool uc_is_alnum (ucs4_t UC) + Tests for any character for which `uc_is_alpha' or `uc_is_digit' is + true. + + -- Function: bool uc_is_alpha (ucs4_t UC) + Tests for any character for which `uc_is_upper' or `uc_is_lower' is + true, or any character that is one of a locale-specific set of + characters for which none of `uc_is_cntrl', `uc_is_digit', + `uc_is_punct', or `uc_is_space' is true. + + -- Function: bool uc_is_cntrl (ucs4_t UC) + Tests for any control character. + + -- Function: bool uc_is_digit (ucs4_t UC) + Tests for any character that corresponds to a decimal-digit + character. + + -- Function: bool uc_is_graph (ucs4_t UC) + Tests for any character for which `uc_is_print' is true and + `uc_is_space' is false. + + -- Function: bool uc_is_lower (ucs4_t UC) + Tests for any character that corresponds to a lowercase letter or + is one of a locale-specific set of characters for which none of + `uc_is_cntrl', `uc_is_digit', `uc_is_punct', or `uc_is_space' is + true. + + -- Function: bool uc_is_print (ucs4_t UC) + Tests for any printing character. + + -- Function: bool uc_is_punct (ucs4_t UC) + Tests for any printing character that is one of a locale-specific + set of characters for which neither `uc_is_space' nor + `uc_is_alnum' is true. + + -- Function: bool uc_is_space (ucs4_t UC) + Test for any character that corresponds to a locale-specific set + of characters for which none of `uc_is_alnum', `uc_is_graph', or + `uc_is_punct' is true. + + -- Function: bool uc_is_upper (ucs4_t UC) + Tests for any character that corresponds to an uppercase letter or + is one of a locale-specific set of characters for which none of + `uc_is_cntrl', `uc_is_digit', `uc_is_punct', or `uc_is_space' is + true. + + -- Function: bool uc_is_xdigit (ucs4_t UC) + Tests for any character that corresponds to a hexadecimal-digit + character. + + -- Function: bool uc_is_blank (ucs4_t UC) + Tests for any character that corresponds to a standard blank + character or a locale-specific set of characters for which + `uc_is_alnum' is false. + + +File: libunistring.info, Node: uniwidth.h, Next: uniwbrk.h, Prev: unictype.h, Up: Top + +9 Display width `' +****************************** + + This include file declares functions that return the display width, +measured in columns, of characters or strings, when output to a device +that uses non-proportional fonts. + + Note that for some rarely used characters the actual fonts or +terminal emulators can use a different width. There is no mechanism +for communicating the display width of characters across a Unix +pseudo-terminal (tty). Also, there are scripts with complex rendering, +like the Indic scripts. For these scripts, there is no such concept as +non-proportional fonts. Therefore the results of these functions +usually work fine on most scripts and on most characters but can fail +to represent the actual display width. + + These functions are locale dependent. The ENCODING argument +identifies the encoding (e.g. `"ISO-8859-2"' for Polish). + + -- Function: int uc_width (ucs4_t UC, const char *ENCODING) + Determines and returns the number of column positions required for + UC. Returns -1 if UC is a control character that has an influence + on the column position when output. + + -- Function: int u8_width (const uint8_t *S, size_t N, const char + *ENCODING) + -- Function: int u16_width (const uint16_t *S, size_t N, const char + *ENCODING) + -- Function: int u32_width (const uint32_t *S, size_t N, const char + *ENCODING) + Determines and returns the number of column positions required for + first N units (or fewer if S ends before this) in S. This + function ignores control characters in the string. + + -- Function: int u8_strwidth (const uint8_t *S, const char *ENCODING) + -- Function: int u16_strwidth (const uint16_t *S, const char *ENCODING) + -- Function: int u32_strwidth (const uint32_t *S, const char *ENCODING) + Determines and returns the number of column positions required for + S. This function ignores control characters in the string. + + +File: libunistring.info, Node: uniwbrk.h, Next: unilbrk.h, Prev: uniwidth.h, Up: Top + +10 Word breaks in strings `' +*************************************** + + This include file declares functions for determining where in a +string "words" start and end. Here "words" are not necessarily the +same as entities that can be looked up in dictionaries, but rather +groups of consecutive characters that should not be split by text +processing operations. + +* Menu: + +* Word breaks in a string:: +* Word break property:: + + +File: libunistring.info, Node: Word breaks in a string, Next: Word break property, Up: uniwbrk.h + +10.1 Word breaks in a string +============================ + + The following functions determine the word breaks in a string. + + -- Function: void u8_wordbreaks (const uint8_t *S, size_t N, char *P) + -- Function: void u16_wordbreaks (const uint16_t *S, size_t N, char *P) + -- Function: void u32_wordbreaks (const uint32_t *S, size_t N, char *P) + -- Function: void ulc_wordbreaks (const char *S, size_t N, char *P) + Determines the word break points in S, an array of N units, and + stores the result at `P[0..N-1]'. + `P[i] = 1' + means that there is a word boundary between `S[i-1]' and + `S[i]'. + + `P[i] = 0' + means that `S[i-1]' and `S[i]' must not be separated. + `P[0]' is always set to 0. If an application wants to consider a + word break to be present at the beginning of the string (before + `S[0]') or at the end of the string (after `S[0..N-1]'), it has to + treat these cases explicitly. + + +File: libunistring.info, Node: Word break property, Prev: Word breaks in a string, Up: uniwbrk.h + +10.2 Word break property +======================== + + This is a more low-level API. The word break property is a property +defined in Unicode Standard Annex #29, section "Word Boundaries", see +`http://www.unicode.org/reports/tr29/#Word_Boundaries'. It is used for +determining the word breaks in a string. + + The following are the possible values of the word break property. +More values may be added in the future. + + -- Constant: int WBP_OTHER + -- Constant: int WBP_CR + -- Constant: int WBP_LF + -- Constant: int WBP_NEWLINE + -- Constant: int WBP_EXTEND + -- Constant: int WBP_FORMAT + -- Constant: int WBP_KATAKANA + -- Constant: int WBP_ALETTER + -- Constant: int WBP_MIDNUMLET + -- Constant: int WBP_MIDLETTER + -- Constant: int WBP_MIDNUM + -- Constant: int WBP_NUMERIC + -- Constant: int WBP_EXTENDNUMLET + + The following function looks up the word break property of a +character. + + -- Function: int uc_wordbreak_property (ucs4_t UC) + Returns the Word_Break property of a Unicode character. + + +File: libunistring.info, Node: unilbrk.h, Next: uninorm.h, Prev: uniwbrk.h, Up: Top + +11 Line breaking `' +****************************** + + This include file declares functions for determining where in a +string line breaks could or should be introduced, in order to make the +displayed string fit into a column of given width. + + These functions are locale dependent. The ENCODING argument +identifies the encoding (e.g. `"ISO-8859-2"' for Polish). + + The following enumerated values indicate whether, at a given +position, a line break is possible or not. Given an string S as an +array `S[0..N-1]' and a position I, the values have the following +meanings: + + -- Constant: int UC_BREAK_MANDATORY + This value indicates that `S[I]' is a line break character. + + -- Constant: int UC_BREAK_POSSIBLE + This value indicates that a line break may be inserted between + `S[I-1]' and `S[I]'. + + -- Constant: int UC_BREAK_HYPHENATION + This value indicates that a hyphen and a line break may be + inserted between `S[I-1]' and `S[I]'. But beware of language + dependent hyphenation rules. + + -- Constant: int UC_BREAK_PROHIBITED + This value indicates that `S[I-1]' and `S[I]' must not be + separated. + + -- Constant: int UC_BREAK_UNDEFINED + This value is not used as a return value; rather, in the + overriding argument of the `u*_width_linebreaks' functions, it + indicates the absence of an override. + + The following functions determine the positions at which line breaks +are possible. + + -- Function: void u8_possible_linebreaks (const uint8_t *S, size_t N, + const char *ENCODING, char *P) + -- Function: void u16_possible_linebreaks (const uint16_t *S, size_t + N, const char *ENCODING, char *P) + -- Function: void u32_possible_linebreaks (const uint32_t *S, size_t + N, const char *ENCODING, char *P) + -- Function: void ulc_possible_linebreaks (const char *S, size_t N, + const char *ENCODING, char *P) + Determines the line break points in S, and stores the result at + `P[0..N-1]'. Every `P[I]' is assigned one of the values + `UC_BREAK_MANDATORY', `UC_BREAK_POSSIBLE', `UC_BREAK_HYPHENATION', + `UC_BREAK_PROHIBITED'. + + The following functions determine where line breaks should be +inserted so that each line fits in a given width, when output to a +device that uses non-proportional fonts. + + -- Function: int u8_width_linebreaks (const uint8_t *S, size_t N, int + WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + -- Function: int u16_width_linebreaks (const uint16_t *S, size_t N, + int WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + -- Function: int u32_width_linebreaks (const uint32_t *S, size_t N, + int WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + -- Function: int ulc_width_linebreaks (const char *S, size_t N, int + WIDTH, int START_COLUMN, int AT_END_COLUMNS, const char + *OVERRIDE, const char *ENCODING, char *P) + Chooses the best line breaks, assuming that every character + occupies a width given by the `uc_width' function (see *note + uniwidth.h::). + + The string is `S[0..N-1]'. + + The maximum number of columns per line is given as WIDTH. The + starting column of the string is given as START_COLUMN. If the + algorithm shall keep room after the last piece, this amount of + room can be given as AT_END_COLUMNS. + + OVERRIDE is an optional override; if `OVERRIDE[I] != + UC_BREAK_UNDEFINED', `OVERRIDE[I]' takes precedence over `P[I]' as + returned by the `u*_possible_linebreaks' function. + + The given ENCODING is used for disambiguating widths in `uc_width'. + + Returns the column after the end of the string, and stores the + result at `P[0..N-1]'. Every `P[I]' is assigned one of the values + `UC_BREAK_MANDATORY', `UC_BREAK_POSSIBLE', `UC_BREAK_HYPHENATION', + `UC_BREAK_PROHIBITED'. Here the value `UC_BREAK_POSSIBLE' + indicates that a line break _should_ be inserted. + + +File: libunistring.info, Node: uninorm.h, Next: unicase.h, Prev: unilbrk.h, Up: Top + +12 Normalization forms (composition and decomposition) `' +******************************************************************** + + This include file defines functions for transforming Unicode strings +to one of the four normal forms, known as NFC, NFD, NKFC, NFKD. These +transformations involve decomposition and -- for NFC and NFKC -- +composition of Unicode characters. + +* Menu: + +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: + + +File: libunistring.info, Node: Decomposition of characters, Next: Composition of characters, Up: uninorm.h + +12.1 Decomposition of Unicode characters +======================================== + + The following enumerated values are the possible types of +decomposition of a Unicode character. + + -- Constant: int UC_DECOMP_CANONICAL + Denotes canonical decomposition. + + -- Constant: int UC_DECOMP_FONT + UCD marker: `'. Denotes a font variant (e.g. a blackletter + form). + + -- Constant: int UC_DECOMP_NOBREAK + UCD marker: `'. Denotes a no-break version of a space or + hyphen. + + -- Constant: int UC_DECOMP_INITIAL + UCD marker: `'. Denotes an initial presentation form + (Arabic). + + -- Constant: int UC_DECOMP_MEDIAL + UCD marker: `'. Denotes a medial presentation form + (Arabic). + + -- Constant: int UC_DECOMP_FINAL + UCD marker: `'. Denotes a final presentation form (Arabic). + + -- Constant: int UC_DECOMP_ISOLATED + UCD marker: `'. Denotes an isolated presentation form + (Arabic). + + -- Constant: int UC_DECOMP_CIRCLE + UCD marker: `'. Denotes an encircled form. + + -- Constant: int UC_DECOMP_SUPER + UCD marker: `'. Denotes a superscript form. + + -- Constant: int UC_DECOMP_SUB + UCD marker: `'. Denotes a subscript form. + + -- Constant: int UC_DECOMP_VERTICAL + UCD marker: `'. Denotes a vertical layout presentation + form. + + -- Constant: int UC_DECOMP_WIDE + UCD marker: `'. Denotes a wide (or zenkaku) compatibility + character. + + -- Constant: int UC_DECOMP_NARROW + UCD marker: `'. Denotes a narrow (or hankaku) + compatibility character. + + -- Constant: int UC_DECOMP_SMALL + UCD marker: `'. Denotes a small variant form (CNS + compatibility). + + -- Constant: int UC_DECOMP_SQUARE + UCD marker: `'. Denotes a CJK squared font variant. + + -- Constant: int UC_DECOMP_FRACTION + UCD marker: `'. Denotes a vulgar fraction form. + + -- Constant: int UC_DECOMP_COMPAT + UCD marker: `'. Denotes an otherwise unspecified + compatibility character. + + The following constant denotes the maximum size of decomposition of +a single Unicode character. + + -- Macro: unsigned int UC_DECOMPOSITION_MAX_LENGTH + This macro expands to a constant that is the required size of + buffer passed to the `uc_decomposition' and + `uc_canonical_decomposition' functions. + + The following functions decompose a Unicode character. + + -- Function: int uc_decomposition (ucs4_t UC, int *DECOMP_TAG, ucs4_t + *DECOMPOSITION) + Returns the character decomposition mapping of the Unicode + character UC. DECOMPOSITION must point to an array of at least + `UC_DECOMPOSITION_MAX_LENGTH' `ucs_t' elements. + + When a decomposition exists, `DECOMPOSITION[0..N-1]' and + `*DECOMP_TAG' are filled and N is returned. Otherwise -1 is + returned. + + -- Function: int uc_canonical_decomposition (ucs4_t UC, ucs4_t + *DECOMPOSITION) + Returns the canonical character decomposition mapping of the + Unicode character UC. DECOMPOSITION must point to an array of at + least `UC_DECOMPOSITION_MAX_LENGTH' `ucs_t' elements. + + When a decomposition exists, `DECOMPOSITION[0..N-1]' is filled and + N is returned. Otherwise -1 is returned. + + +File: libunistring.info, Node: Composition of characters, Next: Normalization of strings, Prev: Decomposition of characters, Up: uninorm.h + +12.2 Composition of Unicode characters +====================================== + + The following function composes a Unicode character from two Unicode +characters. + + -- Function: ucs4_t uc_composition (ucs4_t UC1, ucs4_t UC2) + Attempts to combine the Unicode characters UC1, UC2. UC1 is known + to have canonical combining class 0. + + Returns the combination of UC1 and UC2, if it exists. Returns 0 + otherwise. + + Not all decompositions can be recombined using this function. See + the Unicode file `CompositionExclusions.txt' for details. + + +File: libunistring.info, Node: Normalization of strings, Next: Normalizing comparisons, Prev: Composition of characters, Up: uninorm.h + +12.3 Normalization of strings +============================= + + The Unicode standard defines four normalization forms for Unicode +strings. The following type is used to denote a normalization form. + + -- Type: uninorm_t + An object of type `uninorm_t' denotes a Unicode normalization form. + This is a scalar type; its values can be compared with `=='. + + The following constants denote the four normalization forms. + + -- Macro: uninorm_t UNINORM_NFD + Denotes Normalization form D: canonical decomposition. + + -- Macro: uninorm_t UNINORM_NFC + Normalization form C: canonical decomposition, then canonical + composition. + + -- Macro: uninorm_t UNINORM_NFKD + Normalization form KD: compatibility decomposition. + + -- Macro: uninorm_t UNINORM_NFKC + Normalization form KC: compatibility decomposition, then canonical + composition. + + The following functions operate on `uninorm_t' objects. + + -- Function: bool uninorm_is_compat_decomposing (uninorm_t NF) + Tests whether the normalization form NF does compatibility + decomposition. + + -- Function: bool uninorm_is_composing (uninorm_t NF) + Tests whether the normalization form NF includes canonical + composition. + + -- Function: uninorm_t uninorm_decomposing_form (uninorm_t NF) + Returns the decomposing variant of the normalization form NF. + This maps NFC,NFD -> NFD and NFKC,NFKD -> NFKD. + + The following functions apply a Unicode normalization form to a +Unicode string. + + -- Function: uint8_t * u8_normalize (uninorm_t NF, const uint8_t *S, + size_t N, uint8_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint16_t * u16_normalize (uninorm_t NF, const uint16_t + *S, size_t N, uint16_t *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_normalize (uninorm_t NF, const uint32_t + *S, size_t N, uint32_t *RESULTBUF, size_t *LENGTHP) + Returns the specified normalization form of a string. + + +File: libunistring.info, Node: Normalizing comparisons, Next: Normalization of streams, Prev: Normalization of strings, Up: uninorm.h + +12.4 Normalizing comparisons +============================ + + The following functions compare Unicode string, ignoring differences +in normalization. + + -- Function: int u8_normcmp (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u16_normcmp (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u32_normcmp (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + Compares S1 and S2, ignoring differences in normalization. + + NF must be either `UNINORM_NFD' or `UNINORM_NFKD'. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + -- Function: char * u8_normxfrm (const uint8_t *S, size_t N, uninorm_t + NF, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u16_normxfrm (const uint16_t *S, size_t N, + uninorm_t NF, char *RESULTBUF, size_t *LENGTHP) + -- Function: char * u32_normxfrm (const uint32_t *S, size_t N, + uninorm_t NF, char *RESULTBUF, size_t *LENGTHP) + Converts the string S of length N to a NUL-terminated byte + sequence, in such a way that comparing `u8_normxfrm (S1)' and + `u8_normxfrm (S2)' with the `u8_cmp2' function is equivalent to + comparing S1 and S2 with the `u8_normcoll' function. + + NF must be either `UNINORM_NFC' or `UNINORM_NFKC'. + + -- Function: int u8_normcoll (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u16_normcoll (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + -- Function: int u32_normcoll (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, uninorm_t NF, int *RESULTP) + Compares S1 and S2, ignoring differences in normalization, using + the collation rules of the current locale. + + NF must be either `UNINORM_NFC' or `UNINORM_NFKC'. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + +File: libunistring.info, Node: Normalization of streams, Prev: Normalizing comparisons, Up: uninorm.h + +12.5 Normalization of streams of Unicode characters +=================================================== + + A "stream of Unicode characters" is essentially a function that +accepts an `ucs4_t' argument repeatedly, optionally combined with a +function that "flushes" the stream. + + -- Type: struct uninorm_filter + This is the data type of a stream of Unicode characters that + normalizes its input according to a given normalization form and + passes the normalized character sequence to the encapsulated + stream of Unicode characters. + + -- Function: struct uninorm_filter * uninorm_filter_create (uninorm_t + NF, int (*STREAM_FUNC) (void *STREAM_DATA, ucs4_t UC), void + *STREAM_DATA) + Creates and returns a normalization filter for Unicode characters. + + The pair (STREAM_FUNC, STREAM_DATA) is the encapsulated stream. + `STREAM_FUNC (STREAM_DATA, UC)' receives the Unicode character UC + and returns 0 if successful, or -1 with `errno' set upon failure. + + Returns the new filter, or NULL with `errno' set upon failure. + + -- Function: int uninorm_filter_write (struct uninorm_filter *FILTER, + ucs4_t UC) + Stuffs a Unicode character into a normalizing filter. Returns 0 + if successful, or -1 with `errno' set upon failure. + + -- Function: int uninorm_filter_flush (struct uninorm_filter *FILTER) + Brings data buffered in the filter to its destination, the + encapsulated stream. + + Returns 0 if successful, or -1 with `errno' set upon failure. + + Note! If after calling this function, additional characters are + written into the filter, the resulting character sequence in the + encapsulated stream will not necessarily be normalized. + + -- Function: int uninorm_filter_free (struct uninorm_filter *FILTER) + Brings data buffered in the filter to its destination, the + encapsulated stream, then closes and frees the filter. + + Returns 0 if successful, or -1 with `errno' set upon failure. + + +File: libunistring.info, Node: unicase.h, Next: uniregex.h, Prev: uninorm.h, Up: Top + +13 Case mappings `' +****************************** + + This include file defines functions for case mapping for Unicode +strings and case insensitive comparison of Unicode strings and C +strings. + + These string functions fix the problems that were mentioned in *note +char * strings::, namely, they handle the Croatian LETTER DZ WITH +CARON, the German LATIN SMALL LETTER SHARP S, the Greek sigma and the +Lithuanian i correctly. + +* Menu: + +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: + + +File: libunistring.info, Node: Case mappings of characters, Next: Case mappings of strings, Up: unicase.h + +13.1 Case mappings of characters +================================ + + The following functions implement case mappings on Unicode +characters -- for those cases only where the result of the mapping is a +again a single Unicode character. + + These mappings are locale and context independent. + + *WARNING!* These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that +treat an entire string at once and are language aware. + + -- Function: ucs4_t uc_toupper (ucs4_t UC) + Returns the uppercase mapping of the Unicode character UC. + + -- Function: ucs4_t uc_tolower (ucs4_t UC) + Returns the lowercase mapping of the Unicode character UC. + + -- Function: ucs4_t uc_totitle (ucs4_t UC) + Returns the titlecase mapping of the Unicode character UC. + + The titlecase mapping of a character is to be used when the + character should look like upper case and the following characters + are lower cased. + + For most characters, this is the same as the uppercase mapping. + There are only few characters where the title case variant and the + uuper case variant are different. These characters occur in the + Latin writing of the Croatian, Bosnian, and Serbian languages. + + Lower case Title case Upper case + ------------------------------------------------------------------ + LATIN SMALL LETTER LJ LATIN CAPITAL LETTER LATIN CAPITAL LETTER + L WITH SMALL LETTER J LJ + LATIN SMALL LETTER NJ LATIN CAPITAL LETTER LATIN CAPITAL LETTER + N WITH SMALL LETTER J NJ + LATIN SMALL LETTER DZ LATIN CAPITAL LETTER LATIN CAPITAL LETTER + D WITH SMALL LETTER Z DZ + LATIN SMALL LETTER LATIN CAPITAL LETTER LATIN CAPITAL LETTER + DZ WITH CARON D WITH SMALL LETTER DZ WITH CARON + Z WITH CARON + + +File: libunistring.info, Node: Case mappings of strings, Next: Case mappings of substrings, Prev: Case mappings of characters, Up: unicase.h + +13.2 Case mappings of strings +============================= + + Case mapping should always be performed on entire strings, not on +individual characters. The functions in this sections do so. + + These functions allow to apply a normalization after the case +mapping. The reason is that if you want to treat `ä' and `Ä' the +same, you most often also want to treat the composed and decomposed +forms of such a character, U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +and U+0041 LATIN CAPITAL LETTER A U+0308 COMBINING DIAERESIS the same. +The NF argument designates the normalization. + + These functions are locale dependent. The ISO639_LANGUAGE argument +identifies the language (e.g. `"tr"' for Turkish). NULL means to use +locale independent case mappings. + + -- Function: const char * uc_locale_language () + Returns the ISO 639 language code of the current locale. Returns + `""' if it is unknown, or in the "C" locale. + + -- Function: uint8_t * u8_toupper (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_toupper (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_toupper (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the uppercase mapping of a string. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + -- Function: uint8_t * u8_tolower (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_tolower (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_tolower (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the lowercase mapping of a string. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + -- Function: uint8_t * u8_totitle (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_totitle (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_totitle (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the titlecase mapping of a string. + + Mapping to title case means that, in each word, the first cased + character is being mapped to title case and the remaining + characters of the word are being mapped to lower case. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + +File: libunistring.info, Node: Case mappings of substrings, Next: Case insensitive comparison, Prev: Case mappings of strings, Up: unicase.h + +13.3 Case mappings of substrings +================================ + + Case mapping of a substring cannot simply be performed by extracting +the substring and then applying the case mapping function to it. This +does not work because case mapping requires some information about the +surrounding characters. The following functions allow to apply case +mappings to substrings of a given string, while taking into account the +characters that precede it (the "prefix") and the characters that +follow it (the "suffix"). + + -- Type: casing_prefix_context_t + This data type denotes the case-mapping context that is given by a + prefix string. It is an immediate type that can be copied by + simple assignment, without involving memory allocation. It is not + an array type. + + -- Constant: casing_prefix_context_t unicase_empty_prefix_context + This constant is the case-mapping context that corresponds to an + empty prefix string. + + The following functions return `casing_prefix_context_t' objects: + + -- Function: casing_prefix_context_t u8_casing_prefix_context (const + uint8_t *S, size_t N) + -- Function: casing_prefix_context_t u16_casing_prefix_context (const + uint16_t *S, size_t N) + -- Function: casing_prefix_context_t u32_casing_prefix_context (const + uint32_t *S, size_t N) + Returns the case-mapping context of a given prefix string. + + -- Function: casing_prefix_context_t u8_casing_prefixes_context (const + uint8_t *S, size_t N, casing_prefix_context_t A_CONTEXT) + -- Function: casing_prefix_context_t u16_casing_prefixes_context + (const uint16_t *S, size_t N, casing_prefix_context_t + A_CONTEXT) + -- Function: casing_prefix_context_t u32_casing_prefixes_context + (const uint32_t *S, size_t N, casing_prefix_context_t + A_CONTEXT) + Returns the case-mapping context of the prefix concat(A, S), given + the case-mapping context of the prefix A. + + -- Type: casing_suffix_context_t + This data type denotes the case-mapping context that is given by a + suffix string. It is an immediate type that can be copied by + simple assignment, without involving memory allocation. It is not + an array type. + + -- Constant: casing_suffix_context_t unicase_empty_suffix_context + This constant is the case-mapping context that corresponds to an + empty suffix string. + + The following functions return `casing_suffix_context_t' objects: + + -- Function: casing_suffix_context_t u8_casing_suffix_context (const + uint8_t *S, size_t N) + -- Function: casing_suffix_context_t u16_casing_suffix_context (const + uint16_t *S, size_t N) + -- Function: casing_suffix_context_t u32_casing_suffix_context (const + uint32_t *S, size_t N) + Returns the case-mapping context of a given suffix string. + + -- Function: casing_suffix_context_t u8_casing_suffixes_context (const + uint8_t *S, size_t N, casing_suffix_context_t A_CONTEXT) + -- Function: casing_suffix_context_t u16_casing_suffixes_context + (const uint16_t *S, size_t N, casing_suffix_context_t + A_CONTEXT) + -- Function: casing_suffix_context_t u32_casing_suffixes_context + (const uint32_t *S, size_t N, casing_suffix_context_t + A_CONTEXT) + Returns the case-mapping context of the suffix concat(S, A), given + the case-mapping context of the suffix A. + + The following functions perform a case mapping, considering the +prefix context and the suffix context. + + -- Function: uint8_t * u8_ct_toupper (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_toupper (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_toupper (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the uppercase mapping of a string that is surrounded by a + prefix and a suffix. + + -- Function: uint8_t * u8_ct_tolower (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_tolower (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_tolower (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the lowercase mapping of a string that is surrounded by a + prefix and a suffix. + + -- Function: uint8_t * u8_ct_totitle (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_totitle (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_totitle (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the titlecase mapping of a string that is surrounded by a + prefix and a suffix. + + For example, to uppercase the UTF-8 substring between `s + +start_index' and `s + end_index' of a string that extends from `s' to +`s + u8_strlen (s)', you can use the statements + + size_t result_length; + uint8_t result = + u8_ct_toupper (s + start_index, end_index - start_index, + u8_casing_prefix_context (s, start_index), + u8_casing_suffix_context (s + end_index, + u8_strlen (s) - end_index), + iso639_language, NULL, NULL, &result_length); + + +File: libunistring.info, Node: Case insensitive comparison, Next: Case detection, Prev: Case mappings of substrings, Up: unicase.h + +13.4 Case insensitive comparison +================================ + + The following functions implement comparison that ignores +differences in case and normalization. + + -- Function: uint8_t * u8_casefold (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, + size_t *LENGTHP) + -- Function: uint16_t * u16_casefold (const uint16_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint16_t + *RESULTBUF, size_t *LENGTHP) + -- Function: uint32_t * u32_casefold (const uint32_t *S, size_t N, + const char *ISO639_LANGUAGE, uninorm_t NF, uint32_t + *RESULTBUF, size_t *LENGTHP) + Returns the case folded string. + + Comparing `u8_casefold (S1)' and `u8_casefold (S2)' with the + `u8_cmp2' function is equivalent to comparing S1 and S2 with + `u8_casecmp'. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + -- Function: uint8_t * u8_ct_casefold (const uint8_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint8_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint16_t * u16_ct_casefold (const uint16_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint16_t *RESULTBUF, size_t + *LENGTHP) + -- Function: uint32_t * u32_ct_casefold (const uint32_t *S, size_t N, + casing_prefix_context_t PREFIX_CONTEXT, + casing_suffix_context_t SUFFIX_CONTEXT, const char + *ISO639_LANGUAGE, uninorm_t NF, uint32_t *RESULTBUF, size_t + *LENGTHP) + Returns the case folded string. The case folding takes into + account the case mapping contexts of the prefix and suffix strings. + + -- Function: int u8_casecmp (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u16_casecmp (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u32_casecmp (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int ulc_casecmp (const char *S1, size_t N1, const char + *S2, size_t N2, const char *ISO639_LANGUAGE, uninorm_t NF, + int *RESULTP) + Compares S1 and S2, ignoring differences in case and normalization. + + The NF argument identifies the normalization form to apply after + the case-mapping. It can also be NULL, for no normalization. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + The following functions additionally take into account the sorting +rules of the current locale. + + -- Function: char * u8_casexfrm (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + -- Function: char * u16_casexfrm (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + -- Function: char * u32_casexfrm (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + -- Function: char * ulc_casexfrm (const char *S, size_t N, const char + *ISO639_LANGUAGE, uninorm_t NF, char *RESULTBUF, size_t + *LENGTHP) + Converts the string S of length N to a NUL-terminated byte + sequence, in such a way that comparing `u8_casexfrm (S1)' and + `u8_casexfrm (S2)' with the gnulib function `memcmp2' is + equivalent to comparing S1 and S2 with `u8_casecoll'. + + NF must be either `UNINORM_NFC', `UNINORM_NFKC', or NULL for no + normalization. + + -- Function: int u8_casecoll (const uint8_t *S1, size_t N1, const + uint8_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u16_casecoll (const uint16_t *S1, size_t N1, const + uint16_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int u32_casecoll (const uint32_t *S1, size_t N1, const + uint32_t *S2, size_t N2, const char *ISO639_LANGUAGE, + uninorm_t NF, int *RESULTP) + -- Function: int ulc_casecoll (const char *S1, size_t N1, const char + *S2, size_t N2, const char *ISO639_LANGUAGE, uninorm_t NF, + int *RESULTP) + Compares S1 and S2, ignoring differences in case and normalization, + using the collation rules of the current locale. + + The NF argument identifies the normalization form to apply after + the case-mapping. It must be either `UNINORM_NFC' or + `UNINORM_NFKC'. It can also be NULL, for no normalization. + + If successful, sets `*RESULTP' to -1 if S1 < S2, 0 if S1 = S2, 1 + if S1 > S2, and returns 0. Upon failure, returns -1 with `errno' + set. + + +File: libunistring.info, Node: Case detection, Prev: Case insensitive comparison, Up: unicase.h + +13.5 Case detection +=================== + + The following functions determine whether a Unicode string is +entirely in upper case. or entirely in lower case, or entirely in title +case, or already case-folded. + + -- Function: int u8_is_uppercase (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_uppercase (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_uppercase (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if mapping NFD(S) to upper case is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + -- Function: int u8_is_lowercase (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_lowercase (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_lowercase (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if mapping NFD(S) to lower case is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + -- Function: int u8_is_titlecase (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_titlecase (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_titlecase (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if mapping NFD(S) to title case is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + -- Function: int u8_is_casefolded (const uint8_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_casefolded (const uint16_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_casefolded (const uint32_t *S, size_t N, const + char *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if applying case folding to NFD(S) is a + no-op, or to false otherwise, and returns 0. Upon failure, + returns -1 with `errno' set. + + The following functions determine whether case mappings have any +effect on a Unicode string. + + -- Function: int u8_is_cased (const uint8_t *S, size_t N, const char + *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u16_is_cased (const uint16_t *S, size_t N, const char + *ISO639_LANGUAGE, bool *RESULTP) + -- Function: int u32_is_cased (const uint32_t *S, size_t N, const char + *ISO639_LANGUAGE, bool *RESULTP) + Sets `*RESULTP' to true if case matters for S, that is, if mapping + NFD(S) to either upper case or lower case or title case is not a + no-op. Set `*RESULTP' to false if NFD(S) maps to itself under the + upper case mapping, under the lower case mapping, and under the + title case mapping; in other words, when NFD(S) consists entirely + of caseless characters. Upon failure, returns -1 with `errno' set. + + +File: libunistring.info, Node: uniregex.h, Next: Using the library, Prev: unicase.h, Up: Top + +14 Regular expressions `' +************************************* + + This include file is not yet implemented. + + +File: libunistring.info, Node: Using the library, Next: More functionality, Prev: uniregex.h, Up: Top + +15 Using the library +******************** + + This chapter explains some practical considerations, regarding the +installation and compiler options that are needed in order to use this +library. + +* Menu: + +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: + + +File: libunistring.info, Node: Installation, Next: Compiler options, Up: Using the library + +15.1 Installation +================= + + Before you can use the library, it must be installed. First, you +have to make sure all dependencies are installed. They are listed in +the file `DEPENDENCIES'. + + Then you can proceed to build and install the library, as described +in the file `INSTALL'. For installation on Windows systems, please +refer to the file `README.woe32'. + + +File: libunistring.info, Node: Compiler options, Next: Include files, Prev: Installation, Up: Using the library + +15.2 Compiler options +===================== + + Let's denote as `LIBUNISTRING_PREFIX' the value of the `--prefix' +option that you passed to `configure' while installing this package. +If you didn't pass any `--prefix' option, then the package is installed +in `/usr/local'. + + Let's denote as `LIBUNISTRING_INCLUDEDIR' the directory where the +include files were installed. This is usually the same as +`${LIBUNISTRING_PREFIX}/include'. Except that if you passed an +`--includedir' option to `configure', it is the value of that option. + + Let's further denote as `LIBUNISTRING_LIBDIR' the directory where +the library itself was installed. This is the value that you passed +with the `--libdir' option to `configure', or otherwise the same as +`${LIBUNISTRING_PREFIX}/lib'. Recall that when building in 64-bit mode +on a 64-bit GNU/Linux system that supports executables in either 64-bit +mode or 32-bit mode, you should have used the option +`--libdir=${LIBUNISTRING_PREFIX}/lib64'. + + So that the compiler finds the include files, you have to pass it the +option `-I${LIBUNISTRING_INCLUDEDIR}'. + + So that the compiler finds the library during its linking pass, you +have to pass it the options `-L${LIBUNISTRING_LIBDIR} -lunistring'. On +some systems, in some configurations, you also have to pass options +needed for linking with `libiconv'. The autoconf macro +`gl_LIBUNISTRING' (see *note Autoconf macro::) deals with this +particularity. + + +File: libunistring.info, Node: Include files, Next: Autoconf macro, Prev: Compiler options, Up: Using the library + +15.3 Include files +================== + + Most of the include files have been presented in the introduction, +see *note Introduction::, and subsequent detailed chapters. + + Another include file is `'. It contains the +version number of the libunistring library. + + -- Macro: int _LIBUNISTRING_VERSION + This constant contains the version of libunistring that is being + used at compile time. It encodes the major and minor parts of the + version number only. These parts are encoded in the form + `(major<<8) + minor'. + + -- Constant: int _libunistring_version + This constant contains the version of libunistring that is being + used at run time. It encodes the major and minor parts of the + version number only. These parts are encoded in the form + `(major<<8) + minor'. + + It is possible that `_libunistring_version' is greater than +`_LIBUNISTRING_VERSION'. This can happen when you use `libunistring' +as a shared library, and a newer, binary backward-compatible version +has been installed after your program that uses `libunistring' was +installed. + + +File: libunistring.info, Node: Autoconf macro, Next: Reporting problems, Prev: Include files, Up: Using the library + +15.4 Autoconf macro +=================== + + GNU Gnulib provides an autoconf macro that tests for the availability +of `libunistring'. It is contained in the Gnulib module +`libunistring', see +`http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring'. + + The macro is called `gl_LIBUNISTRING'. It searches for an installed +libunistring. If found, it sets and AC_SUBSTs `HAVE_LIBUNISTRING=yes' +and the `LIBUNISTRING' and `LTLIBUNISTRING' variables and augments the +`CPPFLAGS' variable, and defines the C macro `HAVE_LIBUNISTRING' to 1. +Otherwise, it sets and AC_SUBSTs `HAVE_LIBUNISTRING=no' and +`LIBUNISTRING' and `LTLIBUNISTRING' to empty. + + The complexities that `gl_LIBUNISTRING' deals with are the following: + + * On some operating systems, in some configurations, libunistring + depends on `libiconv', and the options for linking with libiconv + must be mentioned explicitly on the link command line. + + * GNU `libunistring', if installed, is not necessarily already in the + search path (`CPPFLAGS' for the include file search path, + `LDFLAGS' for the library search path). + + * GNU `libunistring', if installed, is not necessarily already in the + run time library search path. To avoid the need for setting an + environment variable like `LD_LIBRARY_PATH', the macro adds the + appropriate run time search path options to the `LIBUNISTRING' + variable. This works on most systems. + + +File: libunistring.info, Node: Reporting problems, Prev: Autoconf macro, Up: Using the library + +15.5 Reporting problems +======================= + + If you encounter any problem, please don't hesitate to send a +detailed bug report to the `bug-libunistring@gnu.org' mailing list. +You can alternatively also use the bug tracker at the project page +`https://savannah.gnu.org/projects/libunistring'. + + Please always include the version number of this library, and a short +description of your operating system and compilation environment with +corresponding version numbers. + + For problems that appear while building and installing +`libunistring', for which you don't find the remedy in the `INSTALL' +file, please include a description of the options that you passed to +the `configure' script. + + +File: libunistring.info, Node: More functionality, Next: Licenses, Prev: Using the library, Up: Top + +16 More advanced functionality +****************************** + + For bidirectional reordering of strings, we recommend the GNU +FriBidi library: `http://www.fribidi.org/'. + + For the rendering of Unicode strings outside of the context of a +given toolkit (KDE/Qt or GNOME/Gtk), we recommend the Pango library: +`http://www.pango.org/'. + + +File: libunistring.info, Node: Licenses, Next: Index, Prev: More functionality, Up: Top + +Appendix A Licenses +******************* + + The files of this package are covered by the licenses indicated in +each particular file or directory. Here is a summary: + + * The `libunistring' library is covered by the GNU Lesser General + Public License (LGPL). A copy of the license is included in *note + GNU LGPL::. + + * This manual is free documentation. It is dually licensed under the + GNU FDL and the GNU GPL. This means that you can redistribute this + manual under either of these two licenses, at your choice. + This manual is covered by the GNU FDL. Permission is granted to + copy, distribute and/or modify this document under the terms of the + GNU Free Documentation License (FDL), either version 1.2 of the + License, or (at your option) any later version published by the + Free Software Foundation (FSF); with no Invariant Sections, with no + Front-Cover Text, and with no Back-Cover Texts. A copy of the + license is included in *note GNU FDL::. + This manual is covered by the GNU GPL. You can redistribute it + and/or modify it under the terms of the GNU General Public License + (GPL), either version 3 of the License, or (at your option) any + later version published by the Free Software Foundation (FSF). A + copy of the license is included in *note GNU GPL::. + +* Menu: + +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License + + +File: libunistring.info, Node: GNU GPL, Next: GNU LGPL, Up: Licenses + +A.1 GNU GENERAL PUBLIC LICENSE +============================== + + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. `http://fsf.org/' + + Everyone is permitted to copy and distribute verbatim copies of this + license document, but changing it is not allowed. + +Preamble +======== + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains +free software for all its users. We, the Free Software Foundation, use +the GNU General Public License for most of our software; it applies +also to any other work released this way by its authors. You can apply +it to your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the software, +or if you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those domains +in future versions of the GPL, as needed to protect the freedom of +users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + +TERMS AND CONDITIONS +==================== + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public + License. + + "Copyright" also means copyright-like laws that apply to other + kinds of works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this + License. Each licensee is addressed as "you". "Licensees" and + "recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the + work in a fashion requiring copyright permission, other than the + making of an exact copy. The resulting work is called a "modified + version" of the earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work + based on the Program. + + To "propagate" a work means to do anything with it that, without + permission, would make you directly or secondarily liable for + infringement under applicable copyright law, except executing it + on a computer or modifying a private copy. Propagation includes + copying, distribution (with or without modification), making + available to the public, and in some countries other activities as + well. + + To "convey" a work means any kind of propagation that enables other + parties to make or receive copies. Mere interaction with a user + through a computer network, with no transfer of a copy, is not + conveying. + + An interactive user interface displays "Appropriate Legal Notices" + to the extent that it includes a convenient and prominently visible + feature that (1) displays an appropriate copyright notice, and (2) + tells the user that there is no warranty for the work (except to + the extent that warranties are provided), that licensees may + convey the work under this License, and how to view a copy of this + License. If the interface presents a list of user commands or + options, such as a menu, a prominent item in the list meets this + criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work + for making modifications to it. "Object code" means any + non-source form of a work. + + A "Standard Interface" means an interface that either is an + official standard defined by a recognized standards body, or, in + the case of interfaces specified for a particular programming + language, one that is widely used among developers working in that + language. + + The "System Libraries" of an executable work include anything, + other than the work as a whole, that (a) is included in the normal + form of packaging a Major Component, but which is not part of that + Major Component, and (b) serves only to enable use of the work + with that Major Component, or to implement a Standard Interface + for which an implementation is available to the public in source + code form. A "Major Component", in this context, means a major + essential component (kernel, window system, and so on) of the + specific operating system (if any) on which the executable work + runs, or a compiler used to produce the work, or an object code + interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all + the source code needed to generate, install, and (for an executable + work) run the object code and to modify the work, including + scripts to control those activities. However, it does not include + the work's System Libraries, or general-purpose tools or generally + available free programs which are used unmodified in performing + those activities but which are not part of the work. For example, + Corresponding Source includes interface definition files + associated with source files for the work, and the source code for + shared libraries and dynamically linked subprograms that the work + is specifically designed to require, such as by intimate data + communication or control flow between those subprograms and other + parts of the work. + + The Corresponding Source need not include anything that users can + regenerate automatically from other parts of the Corresponding + Source. + + The Corresponding Source for a work in source code form is that + same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of + copyright on the Program, and are irrevocable provided the stated + conditions are met. This License explicitly affirms your unlimited + permission to run the unmodified Program. The output from running + a covered work is covered by this License only if the output, + given its content, constitutes a covered work. This License + acknowledges your rights of fair use or other equivalent, as + provided by copyright law. + + You may make, run and propagate covered works that you do not + convey, without conditions so long as your license otherwise + remains in force. You may convey covered works to others for the + sole purpose of having them make modifications exclusively for + you, or provide you with facilities for running those works, + provided that you comply with the terms of this License in + conveying all material for which you do not control copyright. + Those thus making or running the covered works for you must do so + exclusively on your behalf, under your direction and control, on + terms that prohibit them from making any copies of your + copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under + the conditions stated below. Sublicensing is not allowed; section + 10 makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological + measure under any applicable law fulfilling obligations under + article 11 of the WIPO copyright treaty adopted on 20 December + 1996, or similar laws prohibiting or restricting circumvention of + such measures. + + When you convey a covered work, you waive any legal power to forbid + circumvention of technological measures to the extent such + circumvention is effected by exercising rights under this License + with respect to the covered work, and you disclaim any intention + to limit operation or modification of the work as a means of + enforcing, against the work's users, your or third parties' legal + rights to forbid circumvention of technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you + receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice; + keep intact all notices stating that this License and any + non-permissive terms added in accord with section 7 apply to the + code; keep intact all notices of the absence of any warranty; and + give all recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, + and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to + produce it from the Program, in the form of source code under the + terms of section 4, provided that you also meet all of these + conditions: + + a. The work must carry prominent notices stating that you + modified it, and giving a relevant date. + + b. The work must carry prominent notices stating that it is + released under this License and any conditions added under + section 7. This requirement modifies the requirement in + section 4 to "keep intact all notices". + + c. You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable + section 7 additional terms, to the whole of the work, and all + its parts, regardless of how they are packaged. This License + gives no permission to license the work in any other way, but + it does not invalidate such permission if you have separately + received it. + + d. If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has + interactive interfaces that do not display Appropriate Legal + Notices, your work need not make them do so. + + A compilation of a covered work with other separate and independent + works, which are not by their nature extensions of the covered + work, and which are not combined with it such as to form a larger + program, in or on a volume of a storage or distribution medium, is + called an "aggregate" if the compilation and its resulting + copyright are not used to limit the access or legal rights of the + compilation's users beyond what the individual works permit. + Inclusion of a covered work in an aggregate does not cause this + License to apply to the other parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms + of sections 4 and 5, provided that you also convey the + machine-readable Corresponding Source under the terms of this + License, in one of these ways: + + a. Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b. Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for + as long as you offer spare parts or customer support for that + product model, to give anyone who possesses the object code + either (1) a copy of the Corresponding Source for all the + software in the product that is covered by this License, on a + durable physical medium customarily used for software + interchange, for a price no more than your reasonable cost of + physically performing this conveying of source, or (2) access + to copy the Corresponding Source from a network server at no + charge. + + c. Convey individual copies of the object code with a copy of + the written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, + and only if you received the object code with such an offer, + in accord with subsection 6b. + + d. Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access + to the Corresponding Source in the same way through the same + place at no further charge. You need not require recipients + to copy the Corresponding Source along with the object code. + If the place to copy the object code is a network server, the + Corresponding Source may be on a different server (operated + by you or a third party) that supports equivalent copying + facilities, provided you maintain clear directions next to + the object code saying where to find the Corresponding Source. + Regardless of what server hosts the Corresponding Source, you + remain obligated to ensure that it is available for as long + as needed to satisfy these requirements. + + e. Convey the object code using peer-to-peer transmission, + provided you inform other peers where the object code and + Corresponding Source of the work are being offered to the + general public at no charge under subsection 6d. + + + A separable portion of the object code, whose source code is + excluded from the Corresponding Source as a System Library, need + not be included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means + any tangible personal property which is normally used for personal, + family, or household purposes, or (2) anything designed or sold for + incorporation into a dwelling. In determining whether a product + is a consumer product, doubtful cases shall be resolved in favor of + coverage. For a particular product received by a particular user, + "normally used" refers to a typical or common use of that class of + product, regardless of the status of the particular user or of the + way in which the particular user actually uses, or expects or is + expected to use, the product. A product is a consumer product + regardless of whether the product has substantial commercial, + industrial or non-consumer uses, unless such uses represent the + only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, + procedures, authorization keys, or other information required to + install and execute modified versions of a covered work in that + User Product from a modified version of its Corresponding Source. + The information must suffice to ensure that the continued + functioning of the modified object code is in no case prevented or + interfered with solely because modification has been made. + + If you convey an object code work under this section in, or with, + or specifically for use in, a User Product, and the conveying + occurs as part of a transaction in which the right of possession + and use of the User Product is transferred to the recipient in + perpetuity or for a fixed term (regardless of how the transaction + is characterized), the Corresponding Source conveyed under this + section must be accompanied by the Installation Information. But + this requirement does not apply if neither you nor any third party + retains the ability to install modified object code on the User + Product (for example, the work has been installed in ROM). + + The requirement to provide Installation Information does not + include a requirement to continue to provide support service, + warranty, or updates for a work that has been modified or + installed by the recipient, or for the User Product in which it + has been modified or installed. Access to a network may be denied + when the modification itself materially and adversely affects the + operation of the network or violates the rules and protocols for + communication across the network. + + Corresponding Source conveyed, and Installation Information + provided, in accord with this section must be in a format that is + publicly documented (and with an implementation available to the + public in source code form), and must require no special password + or key for unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of + this License by making exceptions from one or more of its + conditions. Additional permissions that are applicable to the + entire Program shall be treated as though they were included in + this License, to the extent that they are valid under applicable + law. If additional permissions apply only to part of the Program, + that part may be used separately under those permissions, but the + entire Program remains governed by this License without regard to + the additional permissions. + + When you convey a copy of a covered work, you may at your option + remove any additional permissions from that copy, or from any part + of it. (Additional permissions may be written to require their own + removal in certain cases when you modify the work.) You may place + additional permissions on material, added by you to a covered work, + for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material + you add to a covered work, you may (if authorized by the copyright + holders of that material) supplement the terms of this License + with terms: + + a. Disclaiming warranty or limiting liability differently from + the terms of sections 15 and 16 of this License; or + + b. Requiring preservation of specified reasonable legal notices + or author attributions in that material or in the Appropriate + Legal Notices displayed by works containing it; or + + c. Prohibiting misrepresentation of the origin of that material, + or requiring that modified versions of such material be + marked in reasonable ways as different from the original + version; or + + d. Limiting the use for publicity purposes of names of licensors + or authors of the material; or + + e. Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f. Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified + versions of it) with contractual assumptions of liability to + the recipient, for any liability that these contractual + assumptions directly impose on those licensors and authors. + + All other non-permissive additional terms are considered "further + restrictions" within the meaning of section 10. If the Program as + you received it, or any part of it, contains a notice stating that + it is governed by this License along with a term that is a further + restriction, you may remove that term. If a license document + contains a further restriction but permits relicensing or + conveying under this License, you may add to a covered work + material governed by the terms of that license document, provided + that the further restriction does not survive such relicensing or + conveying. + + If you add terms to a covered work in accord with this section, you + must place, in the relevant source files, a statement of the + additional terms that apply to those files, or a notice indicating + where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in + the form of a separately written license, or stated as exceptions; + the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly + provided under this License. Any attempt otherwise to propagate or + modify it is void, and will automatically terminate your rights + under this License (including any patent licenses granted under + the third paragraph of section 11). + + However, if you cease all violation of this License, then your + license from a particular copyright holder is reinstated (a) + provisionally, unless and until the copyright holder explicitly + and finally terminates your license, and (b) permanently, if the + copyright holder fails to notify you of the violation by some + reasonable means prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from + that copyright holder, and you cure the violation prior to 30 days + after your receipt of the notice. + + Termination of your rights under this section does not terminate + the licenses of parties who have received copies or rights from + you under this License. If your rights have been terminated and + not permanently reinstated, you do not qualify to receive new + licenses for the same material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or + run a copy of the Program. Ancillary propagation of a covered work + occurring solely as a consequence of using peer-to-peer + transmission to receive a copy likewise does not require + acceptance. However, nothing other than this License grants you + permission to propagate or modify any covered work. These actions + infringe copyright if you do not accept this License. Therefore, + by modifying or propagating a covered work, you indicate your + acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically + receives a license from the original licensors, to run, modify and + propagate that work, subject to this License. You are not + responsible for enforcing compliance by third parties with this + License. + + An "entity transaction" is a transaction transferring control of an + organization, or substantially all assets of one, or subdividing an + organization, or merging organizations. If propagation of a + covered work results from an entity transaction, each party to that + transaction who receives a copy of the work also receives whatever + licenses to the work the party's predecessor in interest had or + could give under the previous paragraph, plus a right to + possession of the Corresponding Source of the work from the + predecessor in interest, if the predecessor has it or can get it + with reasonable efforts. + + You may not impose any further restrictions on the exercise of the + rights granted or affirmed under this License. For example, you + may not impose a license fee, royalty, or other charge for + exercise of rights granted under this License, and you may not + initiate litigation (including a cross-claim or counterclaim in a + lawsuit) alleging that any patent claim is infringed by making, + using, selling, offering for sale, or importing the Program or any + portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this + License of the Program or a work on which the Program is based. + The work thus licensed is called the contributor's "contributor + version". + + A contributor's "essential patent claims" are all patent claims + owned or controlled by the contributor, whether already acquired or + hereafter acquired, that would be infringed by some manner, + permitted by this License, of making, using, or selling its + contributor version, but do not include claims that would be + infringed only as a consequence of further modification of the + contributor version. For purposes of this definition, "control" + includes the right to grant patent sublicenses in a manner + consistent with the requirements of this License. + + Each contributor grants you a non-exclusive, worldwide, + royalty-free patent license under the contributor's essential + patent claims, to make, use, sell, offer for sale, import and + otherwise run, modify and propagate the contents of its + contributor version. + + In the following three paragraphs, a "patent license" is any + express agreement or commitment, however denominated, not to + enforce a patent (such as an express permission to practice a + patent or covenant not to sue for patent infringement). To + "grant" such a patent license to a party means to make such an + agreement or commitment not to enforce a patent against the party. + + If you convey a covered work, knowingly relying on a patent + license, and the Corresponding Source of the work is not available + for anyone to copy, free of charge and under the terms of this + License, through a publicly available network server or other + readily accessible means, then you must either (1) cause the + Corresponding Source to be so available, or (2) arrange to deprive + yourself of the benefit of the patent license for this particular + work, or (3) arrange, in a manner consistent with the requirements + of this License, to extend the patent license to downstream + recipients. "Knowingly relying" means you have actual knowledge + that, but for the patent license, your conveying the covered work + in a country, or your recipient's use of the covered work in a + country, would infringe one or more identifiable patents in that + country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or + arrangement, you convey, or propagate by procuring conveyance of, a + covered work, and grant a patent license to some of the parties + receiving the covered work authorizing them to use, propagate, + modify or convey a specific copy of the covered work, then the + patent license you grant is automatically extended to all + recipients of the covered work and works based on it. + + A patent license is "discriminatory" if it does not include within + the scope of its coverage, prohibits the exercise of, or is + conditioned on the non-exercise of one or more of the rights that + are specifically granted under this License. You may not convey a + covered work if you are a party to an arrangement with a third + party that is in the business of distributing software, under + which you make payment to the third party based on the extent of + your activity of conveying the work, and under which the third + party grants, to any of the parties who would receive the covered + work from you, a discriminatory patent license (a) in connection + with copies of the covered work conveyed by you (or copies made + from those copies), or (b) primarily for and in connection with + specific products or compilations that contain the covered work, + unless you entered into that arrangement, or that patent license + was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting + any implied license or other defenses to infringement that may + otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, + agreement or otherwise) that contradict the conditions of this + License, they do not excuse you from the conditions of this + License. If you cannot convey a covered work so as to satisfy + simultaneously your obligations under this License and any other + pertinent obligations, then as a consequence you may not convey it + at all. For example, if you agree to terms that obligate you to + collect a royalty for further conveying from those to whom you + convey the Program, the only way you could satisfy both those + terms and this License would be to refrain entirely from conveying + the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have + permission to link or combine any covered work with a work licensed + under version 3 of the GNU Affero General Public License into a + single combined work, and to convey the resulting work. The terms + of this License will continue to apply to the part which is the + covered work, but the special requirements of the GNU Affero + General Public License, section 13, concerning interaction through + a network will apply to the combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new + versions of the GNU General Public License from time to time. + Such new versions will be similar in spirit to the present + version, but may differ in detail to address new problems or + concerns. + + Each version is given a distinguishing version number. If the + Program specifies that a certain numbered version of the GNU + General Public License "or any later version" applies to it, you + have the option of following the terms and conditions either of + that numbered version or of any later version published by the + Free Software Foundation. If the Program does not specify a + version number of the GNU General Public License, you may choose + any version ever published by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future + versions of the GNU General Public License can be used, that + proxy's public statement of acceptance of a version permanently + authorizes you to choose that version for the Program. + + Later license versions may give you additional or different + permissions. However, no additional obligations are imposed on any + author or copyright holder as a result of your choosing to follow a + later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY + APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE + COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE + RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. + SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN + WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES + AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU + FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR + CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE + THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA + BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD + PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER + PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF + THE POSSIBILITY OF SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided + above cannot be given local legal effect according to their terms, + reviewing courts shall apply local law that most closely + approximates an absolute waiver of all civil liability in + connection with the Program, unless a warranty or assumption of + liability accompanies a copy of the Program in return for a fee. + + +END OF TERMS AND CONDITIONS +=========================== + +How to Apply These Terms to Your New Programs +============================================= + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. + Copyright (C) YEAR NAME OF AUTHOR + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see `http://www.gnu.org/licenses/'. + + Also add information on how to contact you by electronic and paper +mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + PROGRAM Copyright (C) YEAR NAME OF AUTHOR + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + + The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an "about box". + + You should also get your employer (if you work as a programmer) or +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. For more information on this, and how to apply and follow +the GNU GPL, see `http://www.gnu.org/licenses/'. + + The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use the +GNU Lesser General Public License instead of this License. But first, +please read `http://www.gnu.org/philosophy/why-not-lgpl.html'. + + +File: libunistring.info, Node: GNU LGPL, Next: GNU FDL, Prev: GNU GPL, Up: Licenses + +A.2 GNU LESSER GENERAL PUBLIC LICENSE +===================================== + + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. `http://fsf.org/' + + Everyone is permitted to copy and distribute verbatim copies of this + license document, but changing it is not allowed. + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU + Lesser General Public License, and the "GNU GPL" refers to version + 3 of the GNU General Public License. + + "The Library" refers to a covered work governed by this License, + other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface + provided by the Library, but which is not otherwise based on the + Library. Defining a subclass of a class defined by the Library is + deemed a mode of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an + Application with the Library. The particular version of the + Library with which the Combined Work was made is also called the + "Linked Version". + + The "Minimal Corresponding Source" for a Combined Work means the + Corresponding Source for the Combined Work, excluding any source + code for portions of the Combined Work that, considered in + isolation, are based on the Application, and not on the Linked + Version. + + The "Corresponding Application Code" for a Combined Work means the + object code and/or source code for the Application, including any + data and utility programs needed for reproducing the Combined Work + from the Application, but excluding the System Libraries of the + Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this + License without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a + facility refers to a function or data to be supplied by an + Application that uses the facility (other than as an argument + passed when the facility is invoked), then you may convey a copy + of the modified version: + + a. under this License, provided that you make a good faith + effort to ensure that, in the event an Application does not + supply the function or data, the facility still operates, and + performs whatever part of its purpose remains meaningful, or + + b. under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material + from a header file that is part of the Library. You may convey + such object code under terms of your choice, provided that, if the + incorporated material is not limited to numerical parameters, data + structure layouts and accessors, or small macros, inline functions + and templates (ten or fewer lines in length), you do both of the + following: + + a. Give prominent notice with each copy of the object code that + the Library is used in it and that the Library and its use are + covered by this License. + + b. Accompany the object code with a copy of the GNU GPL and this + license document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, + taken together, effectively do not restrict modification of the + portions of the Library contained in the Combined Work and reverse + engineering for debugging such modifications, if you also do each + of the following: + + a. Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b. Accompany the Combined Work with a copy of the GNU GPL and + this license document. + + c. For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to + the copies of the GNU GPL and this license document. + + d. Do one of the following: + + 0. Convey the Minimal Corresponding Source under the terms + of this License, and the Corresponding Application Code + in a form suitable for, and under terms that permit, the + user to recombine or relink the Application with a + modified version of the Linked Version to produce a + modified Combined Work, in the manner specified by + section 6 of the GNU GPL for conveying Corresponding + Source. + + 1. Use a suitable shared library mechanism for linking with + the Library. A suitable mechanism is one that (a) uses + at run time a copy of the Library already present on the + user's computer system, and (b) will operate properly + with a modified version of the Library that is + interface-compatible with the Linked Version. + + e. Provide Installation Information, but only if you would + otherwise be required to provide such information under + section 6 of the GNU GPL, and only to the extent that such + information is necessary to install and execute a modified + version of the Combined Work produced by recombining or + relinking the Application with a modified version of the + Linked Version. (If you use option 4d0, the Installation + Information must accompany the Minimal Corresponding Source + and Corresponding Application Code. If you use option 4d1, + you must provide the Installation Information in the manner + specified by section 6 of the GNU GPL for conveying + Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the + Library side by side in a single library together with other + library facilities that are not Applications and are not covered + by this License, and convey such a combined library under terms of + your choice, if you do both of the following: + + a. Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities, conveyed under the terms of this License. + + b. Give prominent notice with the combined library that part of + it is a work based on the Library, and explaining where to + find the accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new + versions of the GNU Lesser General Public License from time to + time. Such new versions will be similar in spirit to the present + version, but may differ in detail to address new problems or + concerns. + + Each version is given a distinguishing version number. If the + Library as you received it specifies that a certain numbered + version of the GNU Lesser General Public License "or any later + version" applies to it, you have the option of following the terms + and conditions either of that published version or of any later + version published by the Free Software Foundation. If the Library + as you received it does not specify a version number of the GNU + Lesser General Public License, you may choose any version of the + GNU Lesser General Public License ever published by the Free + Software Foundation. + + If the Library as you received it specifies that a proxy can decide + whether future versions of the GNU Lesser General Public License + shall apply, that proxy's public statement of acceptance of any + version is permanent authorization for you to choose that version + for the Library. + + + +File: libunistring.info, Node: GNU FDL, Prev: GNU LGPL, Up: Licenses + +A.3 GNU Free Documentation License +================================== + + Version 1.3, 3 November 2008 + + Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. + `http://fsf.org/' + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + 0. PREAMBLE + + The purpose of this License is to make a manual, textbook, or other + functional and useful document "free" in the sense of freedom: to + assure everyone the effective freedom to copy and redistribute it, + with or without modifying it, either commercially or + noncommercially. Secondarily, this License preserves for the + author and publisher a way to get credit for their work, while not + being considered responsible for modifications made by others. + + This License is a kind of "copyleft", which means that derivative + works of the document must themselves be free in the same sense. + It complements the GNU General Public License, which is a copyleft + license designed for free software. + + We have designed this License in order to use it for manuals for + free software, because free software needs free documentation: a + free program should come with manuals providing the same freedoms + that the software does. But this License is not limited to + software manuals; it can be used for any textual work, regardless + of subject matter or whether it is published as a printed book. + We recommend this License principally for works whose purpose is + instruction or reference. + + 1. APPLICABILITY AND DEFINITIONS + + This License applies to any manual or other work, in any medium, + that contains a notice placed by the copyright holder saying it + can be distributed under the terms of this License. Such a notice + grants a world-wide, royalty-free license, unlimited in duration, + to use that work under the conditions stated herein. The + "Document", below, refers to any such manual or work. Any member + of the public is a licensee, and is addressed as "you". You + accept the license if you copy, modify or distribute the work in a + way requiring permission under copyright law. + + A "Modified Version" of the Document means any work containing the + Document or a portion of it, either copied verbatim, or with + modifications and/or translated into another language. + + A "Secondary Section" is a named appendix or a front-matter section + of the Document that deals exclusively with the relationship of the + publishers or authors of the Document to the Document's overall + subject (or to related matters) and contains nothing that could + fall directly within that overall subject. (Thus, if the Document + is in part a textbook of mathematics, a Secondary Section may not + explain any mathematics.) The relationship could be a matter of + historical connection with the subject or with related matters, or + of legal, commercial, philosophical, ethical or political position + regarding them. + + The "Invariant Sections" are certain Secondary Sections whose + titles are designated, as being those of Invariant Sections, in + the notice that says that the Document is released under this + License. If a section does not fit the above definition of + Secondary then it is not allowed to be designated as Invariant. + The Document may contain zero Invariant Sections. If the Document + does not identify any Invariant Sections then there are none. + + The "Cover Texts" are certain short passages of text that are + listed, as Front-Cover Texts or Back-Cover Texts, in the notice + that says that the Document is released under this License. A + Front-Cover Text may be at most 5 words, and a Back-Cover Text may + be at most 25 words. + + A "Transparent" copy of the Document means a machine-readable copy, + represented in a format whose specification is available to the + general public, that is suitable for revising the document + straightforwardly with generic text editors or (for images + composed of pixels) generic paint programs or (for drawings) some + widely available drawing editor, and that is suitable for input to + text formatters or for automatic translation to a variety of + formats suitable for input to text formatters. A copy made in an + otherwise Transparent file format whose markup, or absence of + markup, has been arranged to thwart or discourage subsequent + modification by readers is not Transparent. An image format is + not Transparent if used for any substantial amount of text. A + copy that is not "Transparent" is called "Opaque". + + Examples of suitable formats for Transparent copies include plain + ASCII without markup, Texinfo input format, LaTeX input format, + SGML or XML using a publicly available DTD, and + standard-conforming simple HTML, PostScript or PDF designed for + human modification. Examples of transparent image formats include + PNG, XCF and JPG. Opaque formats include proprietary formats that + can be read and edited only by proprietary word processors, SGML or + XML for which the DTD and/or processing tools are not generally + available, and the machine-generated HTML, PostScript or PDF + produced by some word processors for output purposes only. + + The "Title Page" means, for a printed book, the title page itself, + plus such following pages as are needed to hold, legibly, the + material this License requires to appear in the title page. For + works in formats which do not have any title page as such, "Title + Page" means the text near the most prominent appearance of the + work's title, preceding the beginning of the body of the text. + + The "publisher" means any person or entity that distributes copies + of the Document to the public. + + A section "Entitled XYZ" means a named subunit of the Document + whose title either is precisely XYZ or contains XYZ in parentheses + following text that translates XYZ in another language. (Here XYZ + stands for a specific section name mentioned below, such as + "Acknowledgements", "Dedications", "Endorsements", or "History".) + To "Preserve the Title" of such a section when you modify the + Document means that it remains a section "Entitled XYZ" according + to this definition. + + The Document may include Warranty Disclaimers next to the notice + which states that this License applies to the Document. These + Warranty Disclaimers are considered to be included by reference in + this License, but only as regards disclaiming warranties: any other + implication that these Warranty Disclaimers may have is void and + has no effect on the meaning of this License. + + 2. VERBATIM COPYING + + You may copy and distribute the Document in any medium, either + commercially or noncommercially, provided that this License, the + copyright notices, and the license notice saying this License + applies to the Document are reproduced in all copies, and that you + add no other conditions whatsoever to those of this License. You + may not use technical measures to obstruct or control the reading + or further copying of the copies you make or distribute. However, + you may accept compensation in exchange for copies. If you + distribute a large enough number of copies you must also follow + the conditions in section 3. + + You may also lend copies, under the same conditions stated above, + and you may publicly display copies. + + 3. COPYING IN QUANTITY + + If you publish printed copies (or copies in media that commonly + have printed covers) of the Document, numbering more than 100, and + the Document's license notice requires Cover Texts, you must + enclose the copies in covers that carry, clearly and legibly, all + these Cover Texts: Front-Cover Texts on the front cover, and + Back-Cover Texts on the back cover. Both covers must also clearly + and legibly identify you as the publisher of these copies. The + front cover must present the full title with all words of the + title equally prominent and visible. You may add other material + on the covers in addition. Copying with changes limited to the + covers, as long as they preserve the title of the Document and + satisfy these conditions, can be treated as verbatim copying in + other respects. + + If the required texts for either cover are too voluminous to fit + legibly, you should put the first ones listed (as many as fit + reasonably) on the actual cover, and continue the rest onto + adjacent pages. + + If you publish or distribute Opaque copies of the Document + numbering more than 100, you must either include a + machine-readable Transparent copy along with each Opaque copy, or + state in or with each Opaque copy a computer-network location from + which the general network-using public has access to download + using public-standard network protocols a complete Transparent + copy of the Document, free of added material. If you use the + latter option, you must take reasonably prudent steps, when you + begin distribution of Opaque copies in quantity, to ensure that + this Transparent copy will remain thus accessible at the stated + location until at least one year after the last time you + distribute an Opaque copy (directly or through your agents or + retailers) of that edition to the public. + + It is requested, but not required, that you contact the authors of + the Document well before redistributing any large number of + copies, to give them a chance to provide you with an updated + version of the Document. + + 4. MODIFICATIONS + + You may copy and distribute a Modified Version of the Document + under the conditions of sections 2 and 3 above, provided that you + release the Modified Version under precisely this License, with + the Modified Version filling the role of the Document, thus + licensing distribution and modification of the Modified Version to + whoever possesses a copy of it. In addition, you must do these + things in the Modified Version: + + A. Use in the Title Page (and on the covers, if any) a title + distinct from that of the Document, and from those of + previous versions (which should, if there were any, be listed + in the History section of the Document). You may use the + same title as a previous version if the original publisher of + that version gives permission. + + B. List on the Title Page, as authors, one or more persons or + entities responsible for authorship of the modifications in + the Modified Version, together with at least five of the + principal authors of the Document (all of its principal + authors, if it has fewer than five), unless they release you + from this requirement. + + C. State on the Title page the name of the publisher of the + Modified Version, as the publisher. + + D. Preserve all the copyright notices of the Document. + + E. Add an appropriate copyright notice for your modifications + adjacent to the other copyright notices. + + F. Include, immediately after the copyright notices, a license + notice giving the public permission to use the Modified + Version under the terms of this License, in the form shown in + the Addendum below. + + G. Preserve in that license notice the full lists of Invariant + Sections and required Cover Texts given in the Document's + license notice. + + H. Include an unaltered copy of this License. + + I. Preserve the section Entitled "History", Preserve its Title, + and add to it an item stating at least the title, year, new + authors, and publisher of the Modified Version as given on + the Title Page. If there is no section Entitled "History" in + the Document, create one stating the title, year, authors, + and publisher of the Document as given on its Title Page, + then add an item describing the Modified Version as stated in + the previous sentence. + + J. Preserve the network location, if any, given in the Document + for public access to a Transparent copy of the Document, and + likewise the network locations given in the Document for + previous versions it was based on. These may be placed in + the "History" section. You may omit a network location for a + work that was published at least four years before the + Document itself, or if the original publisher of the version + it refers to gives permission. + + K. For any section Entitled "Acknowledgements" or "Dedications", + Preserve the Title of the section, and preserve in the + section all the substance and tone of each of the contributor + acknowledgements and/or dedications given therein. + + L. Preserve all the Invariant Sections of the Document, + unaltered in their text and in their titles. Section numbers + or the equivalent are not considered part of the section + titles. + + M. Delete any section Entitled "Endorsements". Such a section + may not be included in the Modified Version. + + N. Do not retitle any existing section to be Entitled + "Endorsements" or to conflict in title with any Invariant + Section. + + O. Preserve any Warranty Disclaimers. + + If the Modified Version includes new front-matter sections or + appendices that qualify as Secondary Sections and contain no + material copied from the Document, you may at your option + designate some or all of these sections as invariant. To do this, + add their titles to the list of Invariant Sections in the Modified + Version's license notice. These titles must be distinct from any + other section titles. + + You may add a section Entitled "Endorsements", provided it contains + nothing but endorsements of your Modified Version by various + parties--for example, statements of peer review or that the text + has been approved by an organization as the authoritative + definition of a standard. + + You may add a passage of up to five words as a Front-Cover Text, + and a passage of up to 25 words as a Back-Cover Text, to the end + of the list of Cover Texts in the Modified Version. Only one + passage of Front-Cover Text and one of Back-Cover Text may be + added by (or through arrangements made by) any one entity. If the + Document already includes a cover text for the same cover, + previously added by you or by arrangement made by the same entity + you are acting on behalf of, you may not add another; but you may + replace the old one, on explicit permission from the previous + publisher that added the old one. + + The author(s) and publisher(s) of the Document do not by this + License give permission to use their names for publicity for or to + assert or imply endorsement of any Modified Version. + + 5. COMBINING DOCUMENTS + + You may combine the Document with other documents released under + this License, under the terms defined in section 4 above for + modified versions, provided that you include in the combination + all of the Invariant Sections of all of the original documents, + unmodified, and list them all as Invariant Sections of your + combined work in its license notice, and that you preserve all + their Warranty Disclaimers. + + The combined work need only contain one copy of this License, and + multiple identical Invariant Sections may be replaced with a single + copy. If there are multiple Invariant Sections with the same name + but different contents, make the title of each such section unique + by adding at the end of it, in parentheses, the name of the + original author or publisher of that section if known, or else a + unique number. Make the same adjustment to the section titles in + the list of Invariant Sections in the license notice of the + combined work. + + In the combination, you must combine any sections Entitled + "History" in the various original documents, forming one section + Entitled "History"; likewise combine any sections Entitled + "Acknowledgements", and any sections Entitled "Dedications". You + must delete all sections Entitled "Endorsements." + + 6. COLLECTIONS OF DOCUMENTS + + You may make a collection consisting of the Document and other + documents released under this License, and replace the individual + copies of this License in the various documents with a single copy + that is included in the collection, provided that you follow the + rules of this License for verbatim copying of each of the + documents in all other respects. + + You may extract a single document from such a collection, and + distribute it individually under this License, provided you insert + a copy of this License into the extracted document, and follow + this License in all other respects regarding verbatim copying of + that document. + + 7. AGGREGATION WITH INDEPENDENT WORKS + + A compilation of the Document or its derivatives with other + separate and independent documents or works, in or on a volume of + a storage or distribution medium, is called an "aggregate" if the + copyright resulting from the compilation is not used to limit the + legal rights of the compilation's users beyond what the individual + works permit. When the Document is included in an aggregate, this + License does not apply to the other works in the aggregate which + are not themselves derivative works of the Document. + + If the Cover Text requirement of section 3 is applicable to these + copies of the Document, then if the Document is less than one half + of the entire aggregate, the Document's Cover Texts may be placed + on covers that bracket the Document within the aggregate, or the + electronic equivalent of covers if the Document is in electronic + form. Otherwise they must appear on printed covers that bracket + the whole aggregate. + + 8. TRANSLATION + + Translation is considered a kind of modification, so you may + distribute translations of the Document under the terms of section + 4. Replacing Invariant Sections with translations requires special + permission from their copyright holders, but you may include + translations of some or all Invariant Sections in addition to the + original versions of these Invariant Sections. You may include a + translation of this License, and all the license notices in the + Document, and any Warranty Disclaimers, provided that you also + include the original English version of this License and the + original versions of those notices and disclaimers. In case of a + disagreement between the translation and the original version of + this License or a notice or disclaimer, the original version will + prevail. + + If a section in the Document is Entitled "Acknowledgements", + "Dedications", or "History", the requirement (section 4) to + Preserve its Title (section 1) will typically require changing the + actual title. + + 9. TERMINATION + + You may not copy, modify, sublicense, or distribute the Document + except as expressly provided under this License. Any attempt + otherwise to copy, modify, sublicense, or distribute it is void, + and will automatically terminate your rights under this License. + + However, if you cease all violation of this License, then your + license from a particular copyright holder is reinstated (a) + provisionally, unless and until the copyright holder explicitly + and finally terminates your license, and (b) permanently, if the + copyright holder fails to notify you of the violation by some + reasonable means prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from + that copyright holder, and you cure the violation prior to 30 days + after your receipt of the notice. + + Termination of your rights under this section does not terminate + the licenses of parties who have received copies or rights from + you under this License. If your rights have been terminated and + not permanently reinstated, receipt of a copy of some or all of + the same material does not give you any rights to use it. + + 10. FUTURE REVISIONS OF THIS LICENSE + + The Free Software Foundation may publish new, revised versions of + the GNU Free Documentation License from time to time. Such new + versions will be similar in spirit to the present version, but may + differ in detail to address new problems or concerns. See + `http://www.gnu.org/copyleft/'. + + Each version of the License is given a distinguishing version + number. If the Document specifies that a particular numbered + version of this License "or any later version" applies to it, you + have the option of following the terms and conditions either of + that specified version or of any later version that has been + published (not as a draft) by the Free Software Foundation. If + the Document does not specify a version number of this License, + you may choose any version ever published (not as a draft) by the + Free Software Foundation. If the Document specifies that a proxy + can decide which future versions of this License can be used, that + proxy's public statement of acceptance of a version permanently + authorizes you to choose that version for the Document. + + 11. RELICENSING + + "Massive Multiauthor Collaboration Site" (or "MMC Site") means any + World Wide Web server that publishes copyrightable works and also + provides prominent facilities for anybody to edit those works. A + public wiki that anybody can edit is an example of such a server. + A "Massive Multiauthor Collaboration" (or "MMC") contained in the + site means any set of copyrightable works thus published on the MMC + site. + + "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 + license published by Creative Commons Corporation, a not-for-profit + corporation with a principal place of business in San Francisco, + California, as well as future copyleft versions of that license + published by that same organization. + + "Incorporate" means to publish or republish a Document, in whole or + in part, as part of another Document. + + An MMC is "eligible for relicensing" if it is licensed under this + License, and if all works that were first published under this + License somewhere other than this MMC, and subsequently + incorporated in whole or in part into the MMC, (1) had no cover + texts or invariant sections, and (2) were thus incorporated prior + to November 1, 2008. + + The operator of an MMC Site may republish an MMC contained in the + site under CC-BY-SA on the same site at any time before August 1, + 2009, provided the MMC is eligible for relicensing. + + +ADDENDUM: How to use this License for your documents +==================================================== + + To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and license +notices just after the title page: + + Copyright (C) YEAR YOUR NAME. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. + + If you have Invariant Sections, Front-Cover Texts and Back-Cover +Texts, replace the "with...Texts." line with this: + + with the Invariant Sections being LIST THEIR TITLES, with + the Front-Cover Texts being LIST, and with the Back-Cover Texts + being LIST. + + If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + + If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, to +permit their use in free software. + + +File: libunistring.info, Node: Index, Prev: Licenses, Up: Top + +Index +***** + +[index] +* Menu: + +* ambiguous width: uniwidth.h. (line 10) +* argument conventions: Conventions. (line 9) +* autoconf macro: Autoconf macro. (line 6) +* bidirectional category: Bidirectional category. + (line 6) +* bidirectional reordering: More functionality. (line 6) +* block: Blocks. (line 6) +* breaks, line: unilbrk.h. (line 6) +* breaks, word: uniwbrk.h. (line 6) +* bug reports: Reporting problems. (line 6) +* bug tracker: Reporting problems. (line 6) +* C string functions: char * strings. (line 6) +* C, programming language: ISO C and Java syntax. + (line 6) +* C-like API: Classifications like in ISO C. + (line 6) +* canonical combining class: Canonical combining class. + (line 6) +* case detection: Case detection. (line 6) +* case mappings: Case mappings of strings. + (line 6) +* casing_prefix_context_t: Case mappings of substrings. + (line 15) +* casing_suffix_context_t: Case mappings of substrings. + (line 46) +* char, type: char * strings. (line 23) +* combining, Unicode characters: Composition of characters. + (line 6) +* comparing <1>: Elementary string functions on NUL terminated strings. + (line 128) +* comparing: Elementary string functions. + (line 108) +* comparing, ignoring case: Case insensitive comparison. + (line 6) +* comparing, ignoring case, with collation rules: Case insensitive comparison. + (line 66) +* comparing, ignoring normalization: Normalizing comparisons. + (line 6) +* comparing, ignoring normalization and case: Case insensitive comparison. + (line 6) +* comparing, ignoring normalization and case, with collation rules: Case insensitive comparison. + (line 66) +* comparing, ignoring normalization, with collation rules: Normalizing comparisons. + (line 23) +* comparing, with collation rules: Elementary string functions on NUL terminated strings. + (line 140) +* comparing, with collation rules, ignoring case: Case insensitive comparison. + (line 66) +* comparing, with collation rules, ignoring normalization: Normalizing comparisons. + (line 23) +* comparing, with collation rules, ignoring normalization and case: Case insensitive comparison. + (line 66) +* compiler options: Compiler options. (line 24) +* composing, Unicode characters: Composition of characters. + (line 6) +* converting <1>: uniconv.h. (line 45) +* converting: Elementary string conversions. + (line 6) +* copying <1>: Elementary string functions on NUL terminated strings. + (line 61) +* copying: Elementary string functions. + (line 72) +* counting: Elementary string functions. + (line 153) +* decomposing: Decomposition of characters. + (line 6) +* dependencies: Installation. (line 6) +* detecting case: Case detection. (line 6) +* duplicating <1>: Elementary string functions on NUL terminated strings. + (line 166) +* duplicating: Elementary string functions with memory allocation. + (line 6) +* enum iconv_ilseq_handler: uniconv.h. (line 30) +* FDL, GNU Free Documentation License: GNU FDL. (line 6) +* formatted output: unistdio.h. (line 6) +* fullwidth: uniwidth.h. (line 22) +* general category: General category. (line 6) +* gl_LIBUNISTRING: Autoconf macro. (line 11) +* GPL, GNU General Public License: GNU GPL. (line 6) +* halfwidth: uniwidth.h. (line 22) +* identifiers: ISO C and Java syntax. + (line 6) +* installation: Installation. (line 10) +* internationalization: Unicode and i18n. (line 6) +* iterating <1>: Elementary string functions on NUL terminated strings. + (line 15) +* iterating: Elementary string functions. + (line 6) +* Java, programming language: ISO C and Java syntax. + (line 6) +* LGPL, GNU Lesser General Public License: GNU LGPL. (line 6) +* License, GNU FDL: GNU FDL. (line 6) +* License, GNU GPL: GNU GPL. (line 6) +* License, GNU LGPL: GNU LGPL. (line 6) +* Licenses: Licenses. (line 6) +* line breaks: unilbrk.h. (line 6) +* locale: Locale encodings. (line 6) +* locale categories: Locale encodings. (line 10) +* locale encoding <1>: uniconv.h. (line 10) +* locale encoding: Locale encodings. (line 28) +* locale language: Case mappings of strings. + (line 16) +* locale, multibyte: char * strings. (line 13) +* locale_charset: uniconv.h. (line 13) +* lowercasing: Case mappings of strings. + (line 6) +* mailing list: Reporting problems. (line 6) +* mirroring, of Unicode character: Mirrored character. (line 6) +* normal forms: uninorm.h. (line 6) +* normalizing: uninorm.h. (line 6) +* output, formatted: unistdio.h. (line 6) +* properties, of Unicode character: Properties. (line 6) +* regular expression: uniregex.h. (line 6) +* rendering: More functionality. (line 9) +* return value conventions: Conventions. (line 47) +* scripts: Scripts. (line 6) +* searching, for a character <1>: Elementary string functions on NUL terminated strings. + (line 176) +* searching, for a character: Elementary string functions. + (line 140) +* searching, for a substring: Elementary string functions on NUL terminated strings. + (line 232) +* stream, normalizing a: Normalization of streams. + (line 6) +* struct uninorm_filter: Normalization of streams. + (line 11) +* titlecasing: Case mappings of strings. + (line 6) +* u16_asnprintf: unistdio.h. (line 132) +* u16_asprintf: unistdio.h. (line 129) +* u16_casecmp: Case insensitive comparison. + (line 51) +* u16_casecoll: Case insensitive comparison. + (line 95) +* u16_casefold: Case insensitive comparison. + (line 15) +* u16_casexfrm: Case insensitive comparison. + (line 75) +* u16_casing_prefix_context: Case mappings of substrings. + (line 30) +* u16_casing_prefixes_context: Case mappings of substrings. + (line 39) +* u16_casing_suffix_context: Case mappings of substrings. + (line 61) +* u16_casing_suffixes_context: Case mappings of substrings. + (line 70) +* u16_check: Elementary string checks. + (line 11) +* u16_chr: Elementary string functions. + (line 145) +* u16_cmp: Elementary string functions. + (line 115) +* u16_cmp2: Elementary string functions. + (line 131) +* u16_conv_from_encoding: uniconv.h. (line 54) +* u16_conv_to_encoding: uniconv.h. (line 91) +* u16_cpy: Elementary string functions. + (line 78) +* u16_cpy_alloc: Elementary string functions with memory allocation. + (line 10) +* u16_ct_casefold: Case insensitive comparison. + (line 37) +* u16_ct_tolower: Case mappings of substrings. + (line 107) +* u16_ct_totitle: Case mappings of substrings. + (line 125) +* u16_ct_toupper: Case mappings of substrings. + (line 89) +* u16_endswith: Elementary string functions on NUL terminated strings. + (line 258) +* u16_is_cased: Case detection. (line 57) +* u16_is_casefolded: Case detection. (line 44) +* u16_is_lowercase: Case detection. (line 24) +* u16_is_titlecase: Case detection. (line 34) +* u16_is_uppercase: Case detection. (line 14) +* u16_mblen: Elementary string functions. + (line 11) +* u16_mbsnlen: Elementary string functions. + (line 157) +* u16_mbtouc: Elementary string functions. + (line 38) +* u16_mbtouc_unsafe: Elementary string functions. + (line 23) +* u16_mbtoucr: Elementary string functions. + (line 45) +* u16_move: Elementary string functions. + (line 89) +* u16_next: Elementary string functions on NUL terminated strings. + (line 24) +* u16_normalize: Normalization of strings. + (line 50) +* u16_normcmp: Normalizing comparisons. + (line 13) +* u16_normcoll: Normalizing comparisons. + (line 40) +* u16_normxfrm: Normalizing comparisons. + (line 27) +* u16_possible_linebreaks: unilbrk.h. (line 46) +* u16_prev: Elementary string functions on NUL terminated strings. + (line 36) +* u16_set: Elementary string functions. + (line 101) +* u16_snprintf: unistdio.h. (line 126) +* u16_sprintf: unistdio.h. (line 123) +* u16_startswith: Elementary string functions on NUL terminated strings. + (line 250) +* u16_stpcpy: Elementary string functions on NUL terminated strings. + (line 76) +* u16_stpncpy: Elementary string functions on NUL terminated strings. + (line 99) +* u16_strcat: Elementary string functions on NUL terminated strings. + (line 110) +* u16_strchr: Elementary string functions on NUL terminated strings. + (line 180) +* u16_strcmp: Elementary string functions on NUL terminated strings. + (line 132) +* u16_strcoll: Elementary string functions on NUL terminated strings. + (line 142) +* u16_strconv_from_encoding: uniconv.h. (line 129) +* u16_strconv_from_locale: uniconv.h. (line 157) +* u16_strconv_to_encoding: uniconv.h. (line 142) +* u16_strconv_to_locale: uniconv.h. (line 167) +* u16_strcpy: Elementary string functions on NUL terminated strings. + (line 66) +* u16_strcspn: Elementary string functions on NUL terminated strings. + (line 201) +* u16_strdup: Elementary string functions on NUL terminated strings. + (line 170) +* u16_strlen: Elementary string functions on NUL terminated strings. + (line 47) +* u16_strmblen: Elementary string functions on NUL terminated strings. + (line 11) +* u16_strmbtouc: Elementary string functions on NUL terminated strings. + (line 17) +* u16_strncat: Elementary string functions on NUL terminated strings. + (line 121) +* u16_strncmp: Elementary string functions on NUL terminated strings. + (line 159) +* u16_strncpy: Elementary string functions on NUL terminated strings. + (line 88) +* u16_strnlen: Elementary string functions on NUL terminated strings. + (line 55) +* u16_strpbrk: Elementary string functions on NUL terminated strings. + (line 225) +* u16_strrchr: Elementary string functions on NUL terminated strings. + (line 188) +* u16_strspn: Elementary string functions on NUL terminated strings. + (line 213) +* u16_strstr: Elementary string functions on NUL terminated strings. + (line 239) +* u16_strtok: Elementary string functions on NUL terminated strings. + (line 268) +* u16_strwidth: uniwidth.h. (line 39) +* u16_to_u32: Elementary string conversions. + (line 23) +* u16_to_u8: Elementary string conversions. + (line 19) +* u16_tolower: Case mappings of strings. + (line 44) +* u16_totitle: Case mappings of strings. + (line 58) +* u16_toupper: Case mappings of strings. + (line 30) +* u16_u16_asnprintf: unistdio.h. (line 159) +* u16_u16_asprintf: unistdio.h. (line 156) +* u16_u16_snprintf: unistdio.h. (line 153) +* u16_u16_sprintf: unistdio.h. (line 150) +* u16_u16_vasnprintf: unistdio.h. (line 171) +* u16_u16_vasprintf: unistdio.h. (line 168) +* u16_u16_vsnprintf: unistdio.h. (line 165) +* u16_u16_vsprintf: unistdio.h. (line 162) +* u16_uctomb: Elementary string functions. + (line 62) +* u16_vasnprintf: unistdio.h. (line 144) +* u16_vasprintf: unistdio.h. (line 141) +* u16_vsnprintf: unistdio.h. (line 138) +* u16_vsprintf: unistdio.h. (line 135) +* u16_width: uniwidth.h. (line 31) +* u16_width_linebreaks: unilbrk.h. (line 65) +* u16_wordbreaks: Word breaks in a string. + (line 10) +* u32_asnprintf: unistdio.h. (line 185) +* u32_asprintf: unistdio.h. (line 182) +* u32_casecmp: Case insensitive comparison. + (line 54) +* u32_casecoll: Case insensitive comparison. + (line 98) +* u32_casefold: Case insensitive comparison. + (line 18) +* u32_casexfrm: Case insensitive comparison. + (line 78) +* u32_casing_prefix_context: Case mappings of substrings. + (line 32) +* u32_casing_prefixes_context: Case mappings of substrings. + (line 42) +* u32_casing_suffix_context: Case mappings of substrings. + (line 63) +* u32_casing_suffixes_context: Case mappings of substrings. + (line 73) +* u32_check: Elementary string checks. + (line 12) +* u32_chr: Elementary string functions. + (line 147) +* u32_cmp: Elementary string functions. + (line 117) +* u32_cmp2: Elementary string functions. + (line 133) +* u32_conv_from_encoding: uniconv.h. (line 57) +* u32_conv_to_encoding: uniconv.h. (line 94) +* u32_cpy: Elementary string functions. + (line 80) +* u32_cpy_alloc: Elementary string functions with memory allocation. + (line 11) +* u32_ct_casefold: Case insensitive comparison. + (line 42) +* u32_ct_tolower: Case mappings of substrings. + (line 112) +* u32_ct_totitle: Case mappings of substrings. + (line 130) +* u32_ct_toupper: Case mappings of substrings. + (line 94) +* u32_endswith: Elementary string functions on NUL terminated strings. + (line 260) +* u32_is_cased: Case detection. (line 59) +* u32_is_casefolded: Case detection. (line 46) +* u32_is_lowercase: Case detection. (line 26) +* u32_is_titlecase: Case detection. (line 36) +* u32_is_uppercase: Case detection. (line 16) +* u32_mblen: Elementary string functions. + (line 12) +* u32_mbsnlen: Elementary string functions. + (line 158) +* u32_mbtouc: Elementary string functions. + (line 39) +* u32_mbtouc_unsafe: Elementary string functions. + (line 25) +* u32_mbtoucr: Elementary string functions. + (line 46) +* u32_move: Elementary string functions. + (line 91) +* u32_next: Elementary string functions on NUL terminated strings. + (line 25) +* u32_normalize: Normalization of strings. + (line 52) +* u32_normcmp: Normalizing comparisons. + (line 15) +* u32_normcoll: Normalizing comparisons. + (line 42) +* u32_normxfrm: Normalizing comparisons. + (line 29) +* u32_possible_linebreaks: unilbrk.h. (line 48) +* u32_prev: Elementary string functions on NUL terminated strings. + (line 38) +* u32_set: Elementary string functions. + (line 102) +* u32_snprintf: unistdio.h. (line 179) +* u32_sprintf: unistdio.h. (line 176) +* u32_startswith: Elementary string functions on NUL terminated strings. + (line 252) +* u32_stpcpy: Elementary string functions on NUL terminated strings. + (line 78) +* u32_stpncpy: Elementary string functions on NUL terminated strings. + (line 101) +* u32_strcat: Elementary string functions on NUL terminated strings. + (line 112) +* u32_strchr: Elementary string functions on NUL terminated strings. + (line 181) +* u32_strcmp: Elementary string functions on NUL terminated strings. + (line 133) +* u32_strcoll: Elementary string functions on NUL terminated strings. + (line 143) +* u32_strconv_from_encoding: uniconv.h. (line 131) +* u32_strconv_from_locale: uniconv.h. (line 158) +* u32_strconv_to_encoding: uniconv.h. (line 144) +* u32_strconv_to_locale: uniconv.h. (line 168) +* u32_strcpy: Elementary string functions on NUL terminated strings. + (line 68) +* u32_strcspn: Elementary string functions on NUL terminated strings. + (line 203) +* u32_strdup: Elementary string functions on NUL terminated strings. + (line 171) +* u32_strlen: Elementary string functions on NUL terminated strings. + (line 48) +* u32_strmblen: Elementary string functions on NUL terminated strings. + (line 12) +* u32_strmbtouc: Elementary string functions on NUL terminated strings. + (line 18) +* u32_strncat: Elementary string functions on NUL terminated strings. + (line 123) +* u32_strncmp: Elementary string functions on NUL terminated strings. + (line 161) +* u32_strncpy: Elementary string functions on NUL terminated strings. + (line 90) +* u32_strnlen: Elementary string functions on NUL terminated strings. + (line 56) +* u32_strpbrk: Elementary string functions on NUL terminated strings. + (line 227) +* u32_strrchr: Elementary string functions on NUL terminated strings. + (line 189) +* u32_strspn: Elementary string functions on NUL terminated strings. + (line 215) +* u32_strstr: Elementary string functions on NUL terminated strings. + (line 241) +* u32_strtok: Elementary string functions on NUL terminated strings. + (line 270) +* u32_strwidth: uniwidth.h. (line 40) +* u32_to_u16: Elementary string conversions. + (line 31) +* u32_to_u8: Elementary string conversions. + (line 27) +* u32_tolower: Case mappings of strings. + (line 47) +* u32_totitle: Case mappings of strings. + (line 61) +* u32_toupper: Case mappings of strings. + (line 33) +* u32_u32_asnprintf: unistdio.h. (line 212) +* u32_u32_asprintf: unistdio.h. (line 209) +* u32_u32_snprintf: unistdio.h. (line 206) +* u32_u32_sprintf: unistdio.h. (line 203) +* u32_u32_vasnprintf: unistdio.h. (line 224) +* u32_u32_vasprintf: unistdio.h. (line 221) +* u32_u32_vsnprintf: unistdio.h. (line 218) +* u32_u32_vsprintf: unistdio.h. (line 215) +* u32_uctomb: Elementary string functions. + (line 63) +* u32_vasnprintf: unistdio.h. (line 197) +* u32_vasprintf: unistdio.h. (line 194) +* u32_vsnprintf: unistdio.h. (line 191) +* u32_vsprintf: unistdio.h. (line 188) +* u32_width: uniwidth.h. (line 33) +* u32_width_linebreaks: unilbrk.h. (line 68) +* u32_wordbreaks: Word breaks in a string. + (line 11) +* u8_asnprintf: unistdio.h. (line 79) +* u8_asprintf: unistdio.h. (line 76) +* u8_casecmp: Case insensitive comparison. + (line 48) +* u8_casecoll: Case insensitive comparison. + (line 92) +* u8_casefold: Case insensitive comparison. + (line 12) +* u8_casexfrm: Case insensitive comparison. + (line 72) +* u8_casing_prefix_context: Case mappings of substrings. + (line 28) +* u8_casing_prefixes_context: Case mappings of substrings. + (line 36) +* u8_casing_suffix_context: Case mappings of substrings. + (line 59) +* u8_casing_suffixes_context: Case mappings of substrings. + (line 67) +* u8_check: Elementary string checks. + (line 10) +* u8_chr: Elementary string functions. + (line 143) +* u8_cmp: Elementary string functions. + (line 113) +* u8_cmp2: Elementary string functions. + (line 129) +* u8_conv_from_encoding: uniconv.h. (line 51) +* u8_conv_to_encoding: uniconv.h. (line 88) +* u8_cpy: Elementary string functions. + (line 76) +* u8_cpy_alloc: Elementary string functions with memory allocation. + (line 9) +* u8_ct_casefold: Case insensitive comparison. + (line 32) +* u8_ct_tolower: Case mappings of substrings. + (line 102) +* u8_ct_totitle: Case mappings of substrings. + (line 120) +* u8_ct_toupper: Case mappings of substrings. + (line 84) +* u8_endswith: Elementary string functions on NUL terminated strings. + (line 256) +* u8_is_cased: Case detection. (line 55) +* u8_is_casefolded: Case detection. (line 42) +* u8_is_lowercase: Case detection. (line 22) +* u8_is_titlecase: Case detection. (line 32) +* u8_is_uppercase: Case detection. (line 12) +* u8_mblen: Elementary string functions. + (line 10) +* u8_mbsnlen: Elementary string functions. + (line 156) +* u8_mbtouc: Elementary string functions. + (line 37) +* u8_mbtouc_unsafe: Elementary string functions. + (line 21) +* u8_mbtoucr: Elementary string functions. + (line 44) +* u8_move: Elementary string functions. + (line 87) +* u8_next: Elementary string functions on NUL terminated strings. + (line 23) +* u8_normalize: Normalization of strings. + (line 48) +* u8_normcmp: Normalizing comparisons. + (line 11) +* u8_normcoll: Normalizing comparisons. + (line 38) +* u8_normxfrm: Normalizing comparisons. + (line 25) +* u8_possible_linebreaks: unilbrk.h. (line 44) +* u8_prev: Elementary string functions on NUL terminated strings. + (line 34) +* u8_set: Elementary string functions. + (line 100) +* u8_snprintf: unistdio.h. (line 73) +* u8_sprintf: unistdio.h. (line 70) +* u8_startswith: Elementary string functions on NUL terminated strings. + (line 248) +* u8_stpcpy: Elementary string functions on NUL terminated strings. + (line 74) +* u8_stpncpy: Elementary string functions on NUL terminated strings. + (line 97) +* u8_strcat: Elementary string functions on NUL terminated strings. + (line 108) +* u8_strchr: Elementary string functions on NUL terminated strings. + (line 179) +* u8_strcmp: Elementary string functions on NUL terminated strings. + (line 131) +* u8_strcoll: Elementary string functions on NUL terminated strings. + (line 141) +* u8_strconv_from_encoding: uniconv.h. (line 127) +* u8_strconv_from_locale: uniconv.h. (line 156) +* u8_strconv_to_encoding: uniconv.h. (line 140) +* u8_strconv_to_locale: uniconv.h. (line 166) +* u8_strcpy: Elementary string functions on NUL terminated strings. + (line 64) +* u8_strcspn: Elementary string functions on NUL terminated strings. + (line 199) +* u8_strdup: Elementary string functions on NUL terminated strings. + (line 169) +* u8_strlen: Elementary string functions on NUL terminated strings. + (line 46) +* u8_strmblen: Elementary string functions on NUL terminated strings. + (line 10) +* u8_strmbtouc: Elementary string functions on NUL terminated strings. + (line 16) +* u8_strncat: Elementary string functions on NUL terminated strings. + (line 119) +* u8_strncmp: Elementary string functions on NUL terminated strings. + (line 157) +* u8_strncpy: Elementary string functions on NUL terminated strings. + (line 86) +* u8_strnlen: Elementary string functions on NUL terminated strings. + (line 54) +* u8_strpbrk: Elementary string functions on NUL terminated strings. + (line 223) +* u8_strrchr: Elementary string functions on NUL terminated strings. + (line 187) +* u8_strspn: Elementary string functions on NUL terminated strings. + (line 211) +* u8_strstr: Elementary string functions on NUL terminated strings. + (line 237) +* u8_strtok: Elementary string functions on NUL terminated strings. + (line 266) +* u8_strwidth: uniwidth.h. (line 38) +* u8_to_u16: Elementary string conversions. + (line 11) +* u8_to_u32: Elementary string conversions. + (line 15) +* u8_tolower: Case mappings of strings. + (line 41) +* u8_totitle: Case mappings of strings. + (line 55) +* u8_toupper: Case mappings of strings. + (line 27) +* u8_u8_asnprintf: unistdio.h. (line 106) +* u8_u8_asprintf: unistdio.h. (line 103) +* u8_u8_snprintf: unistdio.h. (line 100) +* u8_u8_sprintf: unistdio.h. (line 97) +* u8_u8_vasnprintf: unistdio.h. (line 118) +* u8_u8_vasprintf: unistdio.h. (line 115) +* u8_u8_vsnprintf: unistdio.h. (line 112) +* u8_u8_vsprintf: unistdio.h. (line 109) +* u8_uctomb: Elementary string functions. + (line 61) +* u8_vasnprintf: unistdio.h. (line 91) +* u8_vasprintf: unistdio.h. (line 88) +* u8_vsnprintf: unistdio.h. (line 85) +* u8_vsprintf: unistdio.h. (line 82) +* u8_width: uniwidth.h. (line 29) +* u8_width_linebreaks: unilbrk.h. (line 62) +* u8_wordbreaks: Word breaks in a string. + (line 9) +* uc_all_blocks: Blocks. (line 38) +* uc_all_scripts: Scripts. (line 37) +* uc_bidi_category: Bidirectional category. + (line 88) +* uc_bidi_category_byname: Bidirectional category. + (line 82) +* uc_bidi_category_name: Bidirectional category. + (line 79) +* uc_block: Blocks. (line 27) +* uc_block_t: Blocks. (line 12) +* uc_c_ident_category: ISO C and Java syntax. + (line 39) +* uc_canonical_decomposition: Decomposition of characters. + (line 92) +* uc_combining_class: Canonical combining class. + (line 89) +* uc_composition: Composition of characters. + (line 10) +* uc_decimal_value: Decimal digit value. (line 11) +* uc_decomposition: Decomposition of characters. + (line 82) +* uc_digit_value: Digit value. (line 11) +* uc_fraction_t: Numeric value. (line 14) +* uc_general_category: Object oriented API. (line 207) +* uc_general_category_and: Object oriented API. (line 179) +* uc_general_category_and_not: Object oriented API. (line 186) +* uc_general_category_byname: Object oriented API. (line 201) +* uc_general_category_name: Object oriented API. (line 195) +* uc_general_category_or: Object oriented API. (line 173) +* uc_general_category_t: Object oriented API. (line 7) +* uc_is_alnum: Classifications like in ISO C. + (line 14) +* uc_is_alpha: Classifications like in ISO C. + (line 18) +* uc_is_bidi_category: Bidirectional category. + (line 91) +* uc_is_blank: Classifications like in ISO C. + (line 64) +* uc_is_block: Blocks. (line 32) +* uc_is_c_whitespace: ISO C and Java syntax. + (line 10) +* uc_is_cntrl: Classifications like in ISO C. + (line 24) +* uc_is_digit: Classifications like in ISO C. + (line 27) +* uc_is_general_category: Object oriented API. (line 213) +* uc_is_general_category_withtable: Bit mask API. (line 52) +* uc_is_graph: Classifications like in ISO C. + (line 31) +* uc_is_java_whitespace: ISO C and Java syntax. + (line 14) +* uc_is_lower: Classifications like in ISO C. + (line 35) +* uc_is_print: Classifications like in ISO C. + (line 41) +* uc_is_property: Properties as objects. + (line 140) +* uc_is_property_alphabetic: Properties as functions. + (line 10) +* uc_is_property_ascii_hex_digit: Properties as functions. + (line 74) +* uc_is_property_bidi_arabic_digit: Properties as functions. + (line 60) +* uc_is_property_bidi_arabic_right_to_left: Properties as functions. + (line 56) +* uc_is_property_bidi_block_separator: Properties as functions. + (line 62) +* uc_is_property_bidi_boundary_neutral: Properties as functions. + (line 66) +* uc_is_property_bidi_common_separator: Properties as functions. + (line 61) +* uc_is_property_bidi_control: Properties as functions. + (line 53) +* uc_is_property_bidi_embedding_or_override: Properties as functions. + (line 68) +* uc_is_property_bidi_eur_num_separator: Properties as functions. + (line 58) +* uc_is_property_bidi_eur_num_terminator: Properties as functions. + (line 59) +* uc_is_property_bidi_european_digit: Properties as functions. + (line 57) +* uc_is_property_bidi_hebrew_right_to_left: Properties as functions. + (line 55) +* uc_is_property_bidi_left_to_right: Properties as functions. + (line 54) +* uc_is_property_bidi_non_spacing_mark: Properties as functions. + (line 65) +* uc_is_property_bidi_other_neutral: Properties as functions. + (line 69) +* uc_is_property_bidi_pdf: Properties as functions. + (line 67) +* uc_is_property_bidi_segment_separator: Properties as functions. + (line 63) +* uc_is_property_bidi_whitespace: Properties as functions. + (line 64) +* uc_is_property_combining: Properties as functions. + (line 104) +* uc_is_property_composite: Properties as functions. + (line 105) +* uc_is_property_currency_symbol: Properties as functions. + (line 99) +* uc_is_property_dash: Properties as functions. + (line 91) +* uc_is_property_decimal_digit: Properties as functions. + (line 106) +* uc_is_property_default_ignorable_code_point: Properties as functions. + (line 14) +* uc_is_property_deprecated: Properties as functions. + (line 17) +* uc_is_property_diacritic: Properties as functions. + (line 108) +* uc_is_property_extender: Properties as functions. + (line 109) +* uc_is_property_format_control: Properties as functions. + (line 90) +* uc_is_property_grapheme_base: Properties as functions. + (line 46) +* uc_is_property_grapheme_extend: Properties as functions. + (line 47) +* uc_is_property_grapheme_link: Properties as functions. + (line 49) +* uc_is_property_hex_digit: Properties as functions. + (line 73) +* uc_is_property_hyphen: Properties as functions. + (line 92) +* uc_is_property_id_continue: Properties as functions. + (line 36) +* uc_is_property_id_start: Properties as functions. + (line 34) +* uc_is_property_ideographic: Properties as functions. + (line 78) +* uc_is_property_ids_binary_operator: Properties as functions. + (line 81) +* uc_is_property_ids_trinary_operator: Properties as functions. + (line 82) +* uc_is_property_ignorable_control: Properties as functions. + (line 110) +* uc_is_property_iso_control: Properties as functions. + (line 89) +* uc_is_property_join_control: Properties as functions. + (line 45) +* uc_is_property_left_of_pair: Properties as functions. + (line 103) +* uc_is_property_line_separator: Properties as functions. + (line 94) +* uc_is_property_logical_order_exception: Properties as functions. + (line 18) +* uc_is_property_lowercase: Properties as functions. + (line 27) +* uc_is_property_math: Properties as functions. + (line 100) +* uc_is_property_non_break: Properties as functions. + (line 88) +* uc_is_property_not_a_character: Properties as functions. + (line 12) +* uc_is_property_numeric: Properties as functions. + (line 107) +* uc_is_property_other_alphabetic: Properties as functions. + (line 11) +* uc_is_property_other_default_ignorable_code_point: Properties as functions. + (line 16) +* uc_is_property_other_grapheme_extend: Properties as functions. + (line 48) +* uc_is_property_other_id_continue: Properties as functions. + (line 37) +* uc_is_property_other_id_start: Properties as functions. + (line 35) +* uc_is_property_other_lowercase: Properties as functions. + (line 28) +* uc_is_property_other_math: Properties as functions. + (line 101) +* uc_is_property_other_uppercase: Properties as functions. + (line 26) +* uc_is_property_paired_punctuation: Properties as functions. + (line 102) +* uc_is_property_paragraph_separator: Properties as functions. + (line 95) +* uc_is_property_pattern_syntax: Properties as functions. + (line 41) +* uc_is_property_pattern_white_space: Properties as functions. + (line 40) +* uc_is_property_private_use: Properties as functions. + (line 20) +* uc_is_property_punctuation: Properties as functions. + (line 93) +* uc_is_property_quotation_mark: Properties as functions. + (line 96) +* uc_is_property_radical: Properties as functions. + (line 80) +* uc_is_property_sentence_terminal: Properties as functions. + (line 97) +* uc_is_property_soft_dotted: Properties as functions. + (line 30) +* uc_is_property_space: Properties as functions. + (line 87) +* uc_is_property_terminal_punctuation: Properties as functions. + (line 98) +* uc_is_property_titlecase: Properties as functions. + (line 29) +* uc_is_property_unassigned_code_value: Properties as functions. + (line 21) +* uc_is_property_unified_ideograph: Properties as functions. + (line 79) +* uc_is_property_uppercase: Properties as functions. + (line 25) +* uc_is_property_variation_selector: Properties as functions. + (line 19) +* uc_is_property_white_space: Properties as functions. + (line 9) +* uc_is_property_xid_continue: Properties as functions. + (line 39) +* uc_is_property_xid_start: Properties as functions. + (line 38) +* uc_is_property_zero_width: Properties as functions. + (line 86) +* uc_is_punct: Classifications like in ISO C. + (line 44) +* uc_is_script: Scripts. (line 31) +* uc_is_space: Classifications like in ISO C. + (line 49) +* uc_is_upper: Classifications like in ISO C. + (line 54) +* uc_is_xdigit: Classifications like in ISO C. + (line 60) +* uc_java_ident_category: ISO C and Java syntax. + (line 43) +* uc_locale_language: Case mappings of strings. + (line 21) +* uc_mirror_char: Mirrored character. (line 14) +* uc_numeric_value: Numeric value. (line 23) +* uc_property_byname: Properties as objects. + (line 123) +* uc_property_is_valid: Properties as objects. + (line 133) +* uc_property_t: Properties as objects. + (line 9) +* uc_script: Scripts. (line 20) +* uc_script_byname: Scripts. (line 25) +* uc_script_t: Scripts. (line 11) +* uc_tolower: Case mappings of characters. + (line 20) +* uc_totitle: Case mappings of characters. + (line 23) +* uc_toupper: Case mappings of characters. + (line 17) +* uc_width: uniwidth.h. (line 23) +* uc_wordbreak_property: Word break property. (line 32) +* UCS-4: Unicode. (line 14) +* ucs4_t: unitypes.h. (line 16) +* uint16_t: unitypes.h. (line 10) +* uint32_t: unitypes.h. (line 11) +* uint8_t: unitypes.h. (line 9) +* ulc_asnprintf: unistdio.h. (line 53) +* ulc_asprintf: unistdio.h. (line 50) +* ulc_casecmp: Case insensitive comparison. + (line 57) +* ulc_casecoll: Case insensitive comparison. + (line 101) +* ulc_casexfrm: Case insensitive comparison. + (line 81) +* ulc_fprintf: unistdio.h. (line 229) +* ulc_possible_linebreaks: unilbrk.h. (line 50) +* ulc_snprintf: unistdio.h. (line 48) +* ulc_sprintf: unistdio.h. (line 45) +* ulc_vasnprintf: unistdio.h. (line 65) +* ulc_vasprintf: unistdio.h. (line 62) +* ulc_vfprintf: unistdio.h. (line 232) +* ulc_vsnprintf: unistdio.h. (line 59) +* ulc_vsprintf: unistdio.h. (line 56) +* ulc_width_linebreaks: unilbrk.h. (line 71) +* ulc_wordbreaks: Word breaks in a string. + (line 12) +* Unicode: Unicode. (line 6) +* Unicode character, bidirectional category: Bidirectional category. + (line 6) +* Unicode character, block: Blocks. (line 24) +* Unicode character, canonical combining class: Canonical combining class. + (line 6) +* Unicode character, case mappings: Case mappings of characters. + (line 6) +* Unicode character, classification: General category. (line 6) +* Unicode character, classification like in C: Classifications like in ISO C. + (line 6) +* Unicode character, general category: General category. (line 6) +* Unicode character, mirroring: Mirrored character. (line 6) +* Unicode character, name: uniname.h. (line 6) +* Unicode character, properties: Properties. (line 6) +* Unicode character, script: Scripts. (line 17) +* Unicode character, validity in C identifiers: ISO C and Java syntax. + (line 38) +* Unicode character, validity in Java identifiers: ISO C and Java syntax. + (line 42) +* Unicode character, value <1>: Numeric value. (line 6) +* Unicode character, value <2>: Digit value. (line 6) +* Unicode character, value: Decimal digit value. (line 6) +* Unicode character, width: uniwidth.h. (line 22) +* unicode_character_name: uniname.h. (line 19) +* unicode_name_character: uniname.h. (line 25) +* uninorm_decomposing_form: Normalization of strings. + (line 40) +* uninorm_filter_create: Normalization of streams. + (line 19) +* uninorm_filter_flush: Normalization of streams. + (line 33) +* uninorm_filter_free: Normalization of streams. + (line 43) +* uninorm_filter_write: Normalization of streams. + (line 29) +* uninorm_is_compat_decomposing: Normalization of strings. + (line 32) +* uninorm_is_composing: Normalization of strings. + (line 36) +* uninorm_t: Normalization of strings. + (line 10) +* uppercasing: Case mappings of strings. + (line 6) +* use cases: Introduction. (line 44) +* UTF-16: Unicode. (line 14) +* UTF-16, strings: Unicode strings. (line 6) +* UTF-32: Unicode. (line 14) +* UTF-32, strings: Unicode strings. (line 6) +* UTF-8: Unicode. (line 14) +* UTF-8, strings: Unicode strings. (line 6) +* validity: Elementary string checks. + (line 6) +* value, of libunistring: Introduction. (line 44) +* value, of Unicode character <1>: Numeric value. (line 6) +* value, of Unicode character <2>: Digit value. (line 6) +* value, of Unicode character: Decimal digit value. (line 6) +* verification: Elementary string checks. + (line 6) +* wchar_t, type: The wchar_t mess. (line 6) +* width: uniwidth.h. (line 6) +* word breaks: uniwbrk.h. (line 6) +* wrapping: unilbrk.h. (line 6) + + + +Tag Table: +Node: Top270 +Node: Introduction3239 +Node: Unicode5236 +Node: Unicode and i18n7116 +Node: Locale encodings8579 +Node: In-memory representation10787 +Node: char * strings11896 +Node: The wchar_t mess17153 +Node: Unicode strings19357 +Node: Conventions20508 +Node: unitypes.h22708 +Node: unistr.h23280 +Node: Elementary string checks23837 +Node: Elementary string conversions24459 +Node: Elementary string functions25761 +Node: Elementary string functions with memory allocation32732 +Node: Elementary string functions on NUL terminated strings33354 +Node: uniconv.h45090 +Node: unistdio.h52801 +Node: uniname.h61004 +Node: unictype.h62337 +Node: General category63246 +Node: Object oriented API64289 +Node: Bit mask API72751 +Node: Canonical combining class75005 +Node: Bidirectional category78219 +Node: Decimal digit value81276 +Node: Digit value81837 +Node: Numeric value82398 +Node: Mirrored character83289 +Node: Properties83962 +Node: Properties as objects84653 +Node: Properties as functions91031 +Node: Scripts96582 +Node: Blocks97968 +Node: ISO C and Java syntax99291 +Node: Classifications like in ISO C101001 +Node: uniwidth.h103705 +Node: uniwbrk.h105742 +Node: Word breaks in a string106269 +Node: Word break property107320 +Node: unilbrk.h108416 +Node: uninorm.h112587 +Node: Decomposition of characters113219 +Node: Composition of characters116595 +Node: Normalization of strings117304 +Node: Normalizing comparisons119366 +Node: Normalization of streams121722 +Node: unicase.h123810 +Node: Case mappings of characters124495 +Node: Case mappings of strings126542 +Node: Case mappings of substrings129875 +Node: Case insensitive comparison136805 +Node: Case detection142156 +Node: uniregex.h145424 +Node: Using the library145647 +Node: Installation146058 +Node: Compiler options146531 +Node: Include files148090 +Node: Autoconf macro149314 +Node: Reporting problems150872 +Node: More functionality151669 +Node: Licenses152112 +Node: GNU GPL153747 +Node: GNU LGPL191292 +Node: GNU FDL199738 +Node: Index224863 + +End Tag Table + + +Local Variables: +coding: utf-8 +End: diff --git a/doc/libunistring.texi b/doc/libunistring.texi new file mode 100644 index 00000000..8eb8061f --- /dev/null +++ b/doc/libunistring.texi @@ -0,0 +1,989 @@ +\input texinfo @c -*-texinfo-*- +@comment %**start of header +@setfilename libunistring.info +@documentencoding UTF-8 +@settitle GNU libunistring +@finalout +@c Indices: +@c am = autoconf macro @amindex +@c cp = concept @cindex +@c fn = function @findex +@c tp = type @tindex +@c Unused predefined indices: +@c ky = keystroke @kindex +@c pg = program @pindex +@c vr = variable @vindex +@defcodeindex am +@syncodeindex am cp +@syncodeindex fn cp +@syncodeindex tp cp +@ifclear texi2html +@firstparagraphindent insert +@end ifclear +@c texi2html-1.76 does not support @arrow{}. +@ifset texi2html +@macro arrow{} +→ +@end macro +@end ifset +@comment %**end of header + +@include version.texi + +@c Location of the POSIX specification on the web. +@set POSIXURL http://www.opengroup.org/onlinepubs/9699919799 + +@c Macro for referencing a POSIX function. +@c We don't write it as func(), see section "GNU Manuals" of the +@c GNU coding standards. +@ifinfo +@macro posixfunc{func} +@code{\func\} +@end macro +@end ifinfo +@ifnotinfo +@macro posixfunc{func} +@uref{@value{POSIXURL}/functions/\func\.html,,@code{\func\}} +@end macro +@end ifnotinfo + +@c Macro for referencing a normal function. +@c We don't write it as func(), see section "GNU Manuals" of the +@c GNU coding standards. +@macro func{func} +@code{\func\} +@end macro + +@c Macro for an advisory ragged line break in TeX mode. +@c Needed because there are long unbreakable pieces of text (such as URLs or +@c formulas), TeX is too shy to move them to a new line. TeX considers only +@c two choices: a line break in aligned mode (which it rejects due to aesthetic +@c reasons) and writing into the margin. What we want in many cases is a line +@c break without filling the first line. Like what @* delivers. But we want it +@c only when needed, so that it disappears when unrelated changes in the same +@c paragraph cause a line break in a nearby position. And we need it only in +@c TeX mode. info and HTML modes are fine. +@c This trick is from Karl Berry. +@iftex +@macro texnl +@hfil@penalty9000@hfilneg +@end macro +@end iftex +@ifnottex +@macro texnl +@end macro +@end ifnottex + +@ifinfo +@dircategory Software development +@direntry +* GNU libunistring: (libunistring). Unicode string library. +@end direntry +@end ifinfo + +@ifinfo +This manual is for GNU libunistring. + +@ignore +@c This was: @copying but it triggers a makeinfo 4.13 bug +Copyright (C) 2001-2009 Free Software Foundation, Inc. + +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. + +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in @ref{GNU FDL}. + +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in @ref{GNU GPL}. +@end ignore +@end ifinfo + +@titlepage +@title GNU libunistring, version @value{VERSION} +@subtitle updated @value{UPDATED} +@subtitle Edition @value{EDITION}, @value{UPDATED} +@author Bruno Haible + +@ifnothtml +@page +@vskip 0pt plus 1filll +@c @insertcopying +Copyright (C) 2001-2009 Free Software Foundation, Inc. + +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. + +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in @ref{GNU FDL}. + +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in @ref{GNU GPL}. +@end ifnothtml +@end titlepage + +@c Table of Contents +@contents + +@ifnottex +@node Top +@top GNU libunistring +@end ifnottex + +@menu +* Introduction:: Who may need Unicode strings? +* Conventions:: Conventions used in this manual +* unitypes.h:: Elementary types +* unistr.h:: Elementary Unicode string functions +* uniconv.h:: Conversions between Unicode and encodings +* unistdio.h:: Output with Unicode strings +* uniname.h:: Names of Unicode characters +* unictype.h:: Unicode character classification and properties +* uniwidth.h:: Display width +* uniwbrk.h:: Word breaks in strings +* unilbrk.h:: Line breaking +* uninorm.h:: Normalization forms +* unicase.h:: Case mappings +* uniregex.h:: Regular expressions +* Using the library:: How to link with the library and use it? +* More functionality:: More advanced functionality +* Licenses:: Licenses + +* Index:: General Index + +@detailmenu + --- The Detailed Node Listing --- + +Introduction + +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with @code{char *} strings +* The wchar_t mess:: Why @code{wchar_t *} strings are useless +* Unicode strings:: How are Unicode strings represented? + +unistr.h + +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: + +unictype.h + +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: + +General category + +* Object oriented API:: +* Bit mask API:: + +Properties + +* Properties as objects:: +* Properties as functions:: + +uniwbrk.h + +* Word breaks in a string:: +* Word break property:: + +uninorm.h + +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: + +unicase,h + +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: + +Using the library + +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: + +Licenses + +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License + +@end detailmenu +@end menu + +@node Introduction +@chapter Introduction + +This library provides functions for manipulating Unicode strings and +for manipulating C strings according to the Unicode standard. + +It consists of the following parts: + +@table @code +@item +elementary string functions +@item +conversion from/to legacy encodings +@item +formatted output to strings +@item +character names +@item +character classification and properties +@item +string width when using nonproportional fonts +@item +word breaks +@item +line breaking algorithm +@item +normalization (composition and decomposition) +@item +case folding +@item +regular expressions (not yet implemented) +@end table + +@cindex use cases +@cindex value, of libunistring +libunistring is for you if your application involves non-trivial text +processing, such as upper/lower case conversions, line breaking, operations +on words, or more advanced analysis of text. Text provided by the user can, +in general, contain characters of all kinds of scripts. The text processing +functions provided by this library handle all scripts and all languages. + +libunistring is for you if your application already uses the ISO C / POSIX +@code{}, @code{} functions and the text it operates on is +provided by the user and can be in any language. + +libunistring is also for you if your application uses Unicode strings as +internal in-memory representation. + +@menu +* Unicode:: What is Unicode? +* Unicode and i18n:: Unicode and internationalization +* Locale encodings:: What is a locale encoding? +* In-memory representation:: How to represent strings in memory? +* char * strings:: What to keep in mind with @code{char *} strings +* The wchar_t mess:: Why @code{wchar_t *} strings are useless +* Unicode strings:: How are Unicode strings represented? +@end menu + +@node Unicode +@section Unicode + +@cindex Unicode +Unicode is a standardized repertoire of characters that contains characters +from all scripts of the world, from Latin letters to Chinese ideographs +and Babylonian cuneiform glyphs. It also specifies how these characters +are to be rendered on a screen or on paper, and how common text processing +(word selection, line breaking, uppercasing of page titles etc.) is supposed +to behave on Unicode text. + +Unicode also specifies three ways of storing sequences of Unicode +characters in a computer whose basic unit of data is an 8-bit byte: +@cindex UTF-8 +@cindex UTF-16 +@cindex UTF-32 +@cindex UCS-4 +@table @asis +@item UTF-8 +Every character is represented as 1 to 4 bytes. +@item UTF-16 +Every character is represented as 1 to 2 units of 16 bits. +@item UTF-32, a.k.a@. UCS-4 +Every character is represented as 1 unit of 32 bits. +@end table + +For encoding Unicode text in a file, UTF-8 is usually used. For encoding +Unicode strings in memory for a program, either of the three encoding forms +can be reasonably used. + +Unicode is widely used on the web. Prior to the use of Unicode, web pages +were in many different encodings (ISO-8859-1 for English, French, Spanish, +ISO-8859-2 for Polish, ISO-8859-7 for Greek, KOI8-R for Russian, GB2312 or +BIG5 for Chinese, ISO-2022-JP-2 or EUC-JP or Shift_JIS for Japanese, and many +many others). It was next to impossible to create a document that contained +Chinese and Polish text in the same document. Due to the many encodings for +Japanese, even the processing of pure Japanese text was error prone. + +References: +@itemize @bullet +@item +The Unicode standard:@texnl{} @url{http://www.unicode.org/} +@item +Definition of UTF-8:@texnl{} @url{http://www.rfc-editor.org/rfc/rfc3629.txt} +@item +Definition of UTF-16:@texnl{} @url{http://www.rfc-editor.org/rfc/rfc2781.txt} +@item +Markus Kuhn's UTF-8 and Unicode FAQ:@texnl{} +@url{http://www.cl.cam.ac.uk/~mgk25/unicode.html} +@end itemize + +@node Unicode and i18n +@section Unicode and Internationalization + +@cindex internationalization +Internationalization is the process of changing the source code of a program +so that it can meet the expectations of users in any culture, if culture +specific data (translations, images etc.) are provided. + +Use of Unicode is not strictly required for internationalization, but it +makes internationalization much easier, because operations that need to +look at specific characters (like hyphenation, spell checking, or the +automatic conversion of double-quotes to opening and closing double-quote +characters) don't need to consider multiple possible encodings of the text. + +Use of Unicode also enables multilingualization: the ability of having text +in multiple languages present in the same document or even in the same line +of text. + +But use of Unicode is not everything. Internationalization usually consists +of three features: +@itemize @bullet +@item +Use of Unicode where needed for text processing. This is what this library +is for. +@item +Use of message catalogs for messages shown to the user, This is what +GNU gettext is about. +@item +Use of locale specific conventions for date and time formats, for numeric +formatting, or for sorting of text. This can be done adequately with the +POSIX APIs and the implementation of locales in the GNU C library. +@end itemize + +@node Locale encodings +@section Locale encodings + +@cindex locale +A locale is a set of cultural conventions. According to POSIX, for a program, +at any moment, there is one locale being designated as the ``current locale''. +(Actually, POSIX supports also one locale per thread, but this feature is not +yet universally implemented and not widely used.) +@cindex locale categories +The locale is partitioned into several aspects, called the ``categories'' +of the locale. The main various aspects are: +@itemize +@item +The character encoding and the character properties. This is the +@code{LC_CTYPE} category. +@item +The sorting rules for text. This is the @code{LC_COLLATE} category. +@item +The language specific translations of messages. This is the +@code{LC_MESSAGES} category. +@item +The formatting rules for numbers, such as the decimal separator. This is +the @code{LC_NUMERIC} category. +@item +The formatting rules for amounts of money. This is the @code{LC_MONETARY} +category. +@item +The formatting of date and time. This is the @code{LC_TIME} category. +@end itemize + +@cindex locale encoding +In particular, the @code{LC_CTYPE} category of the current locale determines +the character encoding. This is the encoding of @samp{char *} strings. +We also call it the ``locale encoding''. GNU libunistring has a function, +@func{locale_charset}, that returns a standardized (platform independent) +name for this encoding. + +All locale encodings used on glibc systems are essentially ASCII compatible: +Most graphic ASCII characters have the same representation, as a single byte, +in that encoding as in ASCII. + +Among the possible locale encodings are UTF-8 and GB18030. Both allow +to represent any Unicode character as a sequence of bytes. UTF-8 is used in +most of the world, whereas GB18030 is used in the People's Republic of China, +because it is backward compatible with the GB2312 encoding that was used in +this country earlier. + +The legacy locale encodings, ISO-8859-15 (which supplanted ISO-8859-1 in +most of Europe), ISO-8859-2, KOI8-R, EUC-JP, etc., are still in use in +many places, though. + +UTF-16 and UTF-32 are not used as locale encodings, because they are not +ASCII compatible. + +@node In-memory representation +@section Choice of in-memory representation of strings + +There are three ways of representing strings in memory of a running +program. +@itemize +@item +As @samp{char *} strings. Such strings are represented in locale encoding. +This approach is employed when not much text processing is done by the +program. When some Unicode aware processing is to be done, a string is +converted to Unicode on the fly and back to locale encoding afterwards. +@item +As UTF-8 or UTF-16 or UTF-32 strings. This implies that conversion from +locale encoding to Unicode is performed on input, and in the opposite +direction on output. This approach is employed when the program does +a significant amount of text processing, or when the program has multiple +threads operating on the same data but in different locales. +@item +As @samp{wchar_t *}, a.k.a@. ``wide strings''. This approach is misguided, +see @ref{The wchar_t mess}. +@end itemize + +@node char * strings +@section @samp{char *} strings + +@cindex C string functions +The classical C strings, with its C library support standardized by +ISO C and POSIX, can be used in internationalized programs with some +precautions. The problem with this API is that many of the C library +functions for strings don't work correctly on strings in locale +encodings, leading to bugs that only people in some cultures of the +world will experience. + +@cindex locale, multibyte +The first problem with the C library API is the support of multibyte +locales. According to the locale encoding, in general, every character +is represented by one or more bytes (up to 4 bytes in practice --- but +use @code{MB_LEN_MAX} instead of the number 4 in the code). +When every character is represented by only 1 byte, we speak of an +``unibyte locale'', otherwise of a ``multibyte locale''. It is important +to realize that the majority of Unix installations nowadays use UTF-8 +or GB18030 as locale encoding; therefore, the majority of users are +using multibyte locales. + +@cindex char, type +The important fact to remember is: +@cartouche +@emph{A @samp{char} is a byte, not a character.} +@end cartouche + +As a consequence: +@itemize +@item +The @code{} API is useless in this context; it does not work in +multibyte locales. +@item +The @posixfunc{strlen} function does not return the number of characters +in a string. Nor does it return the number of screen columns occupied +by a string after it is output. It merely returns the number of +@emph{bytes} occupied by a string. +@item +Truncating a string, for example, with @posixfunc{strncpy}, can have the +effect of truncating it in the middle of a multibyte character. Such +a string will, when output, have a garbled character at its end, often +represented by a hollow box. +@item +@posixfunc{strchr} and @posixfunc{strrchr} do not work with multibyte strings +if the locale encoding is GB18030 and the character to be searched is +a digit. +@item +@posixfunc{strstr} does not work with multibyte strings if the locale encoding +is different from UTF-8. +@item +@posixfunc{strcspn}, @posixfunc{strpbrk}, @posixfunc{strspn} cannot work +correctly in multibyte locales: they assume the second argument is a list of +single-byte characters. Even in this simple case, they do not work with +multibyte strings if the locale encoding is GB18030 and one of the +characters to be searched is a digit. +@item +@posixfunc{strsep} and @posixfunc{strtok_r} do not work with multibyte strings +unless all of the delimiter characters are ASCII characters < 0x30. +@item +The @posixfunc{strcasecmp}, @posixfunc{strncasecmp}, and @posixfunc{strcasestr} +functions do not work with multibyte strings. +@end itemize + +The workarounds can be found in GNU gnulib +@url{http://www.gnu.org/software/gnulib/}. +@itemize +@item +gnulib has modules @samp{mbchar}, @samp{mbiter}, @samp{mbuiter} that +represent multibyte characters and allow to iterate across a multibyte +string with the same ease as through a unibyte string. +@item +gnulib has functions @func{mbslen} and @func{mbswidth} that can be +used instead of @posixfunc{strlen} when the number of characters or the +number of screen columns of a string is requested. +@item +gnulib has functions @func{mbschr} and @func{mbsrrchr} that are +like @posixfunc{strchr} and @posixfunc{strrchr}, but work in multibyte locales. +@item +gnulib has a function @func{mbsstr}, like @posixfunc{strstr}, but works +in multibyte locales. +@item +gnulib has functions @func{mbscspn}, @func{mbspbrk}, @func{mbsspn} +that are like @posixfunc{strcspn}, @posixfunc{strpbrk}, @posixfunc{strspn}, but +work in multibyte locales. +@item +gnulib has functions @func{mbssep} and @func{mbstok_r} that are +like @posixfunc{strsep} and @posixfunc{strtok_r} but work in multibyte locales. +@item +gnulib has functions @func{mbscasecmp}, @func{mbsncasecmp}, +@func{mbspcasecmp}, and @func{mbscasestr} that are like @posixfunc{strcasecmp}, +@posixfunc{strncasecmp}, and @posixfunc{strcasestr}, but +work in multibyte locales. Still, the function @code{ulc_casecmp} is +preferable to these functions; see below. +@end itemize + +The second problem with the C library API is that it has some assumptions built-in that are not valid in some languages: +@itemize +@item +It assumes that there are only two forms of every character: uppercase +and lowercase. This is not true for Croatian, where the character +@sc{LETTER DZ WITH CARON} comes in three forms: +@sc{LATIN CAPITAL LETTER DZ WITH CARON} (DZ), +@sc{LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} (Dz), +@sc{LATIN SMALL LETTER DZ WITH CARON} (dz). +@item +It assumes that uppercasing of 1 character leads to 1 character. This +is not true for German, where the @sc{LATIN SMALL LETTER SHARP S}, when +uppercased, becomes @samp{SS}. +@item +It assumes that there is 1:1 mapping between uppercase and lowercase forms. +This is not true for the Greek sigma: @sc{GREEK CAPITAL LETTER SIGMA} is +the uppercase of both @sc{GREEK SMALL LETTER SIGMA} and +@sc{GREEK SMALL LETTER FINAL SIGMA}. +@item +It assumes that the upper/lowercase mappings are position independent. +This is not true for the Greek sigma and the Lithuanian i. +@end itemize + +The correct way to deal with this problem is +@enumerate +@item +to provide functions for titlecasing, as well as for upper- and +lowercasing, +@item +to view case transformations as functions that operates on strings, +rather than on characters. +@end enumerate + +This is implemented in this library, through the functions declared in @code{}, see @ref{unicase.h}. + +@node The wchar_t mess +@section The @code{wchar_t} mess + +@cindex wchar_t, type +The ISO C and POSIX standard creators made an attempt to fix the first +problem mentioned in the previous section. They introduced +@itemize +@item +a type @samp{wchar_t}, designed to encapsulate an entire character, +@item +a ``wide string'' type @samp{wchar_t *}, and +@item +functions declared in @code{} that were meant to supplant the +ones in @code{}. +@end itemize + +Unfortunately, this API and its implementation has numerous problems: + +@itemize +@item +On AIX and Windows platforms, @code{wchar_t} is a 16-bit type. This +means that it can never accommodate an entire Unicode character. Either +the @code{wchar_t *} strings are limited to characters in UCS-2 (the +``Basic Multilingual Plane'' of Unicode), or --- if @code{wchar_t *} +strings are encoded in UTF-16 --- a @code{wchar_t} represents only half +of a character in the worst case, making the @code{} functions +pointless. + +@item +On Solaris and FreeBSD, the @code{wchar_t} encoding is locale dependent +and undocumented. This means, if you want to know any property of a +@code{wchar_t} character, other than the properties defined by +@code{} --- such as whether it's a dash, currency symbol, +paragraph separator, or similar ---, you have to convert it to +@code{char *} encoding first, by use of the function @posixfunc{wctomb}. + +@item +When you read a stream of wide characters, through the functions +@posixfunc{fgetwc} and @posixfunc{fgetws}, and when the input stream/file is +not in the expected encoding, you have no way to determine the invalid +byte sequence and do some corrective action. If you use these +functions, your program becomes ``garbage in - more garbage out'' or +``garbage in - abort''. +@end itemize + +As a consequence, it is better to use multibyte strings, as explained in +the previous section. Such multibyte strings can bypass limitations +of the @code{wchar_t} type, if you use functions defined in gnulib and +libunistring for text processing. They can also faithfully transport +malformed characters that were present in the input, without requiring +the program to produce garbage or abort. + +@node Unicode strings +@section Unicode strings + +libunistring supports Unicode strings in three representations: +@cindex UTF-8, strings +@cindex UTF-16, strings +@cindex UTF-32, strings +@itemize +@item +UTF-8 strings, through the type @samp{uint8_t *}. The units are bytes +(@code{uint8_t}). +@item +UTF-16 strings, through the type @samp{uint16_t *}, The units are 16-bit +memory words (@code{uint16_t}). +@item +UTF-32 strings, through the type @samp{uint32_t *}. The units are 32-bit +memory words (@code{uint32_t}). +@end itemize + +As with C strings, there are two variants: +@itemize +@item +Unicode strings with a terminating NUL character are represented as +a pointer to the first unit of the string. There is a unit containing +a 0 value at the end. It is considered part of the string for all +memory allocation purposes, but is not considered part of the string +for all other logical purposes. +@item +Unicode strings where embedded NUL characters are allowed. These +are represented by a pointer to the first unit and the number of units +(not bytes!) of the string. In this setting, there is no trailing +zero-valued unit used as ``end marker''. +@end itemize + +@node Conventions +@chapter Conventions + +This chapter explains conventions valid throughout the libunistring library. + +@cindex argument conventions +Variables of type @code{char *} denote C strings in locale encoding. +See @ref{Locale encodings}. + +Variables of type @code{uint8_t *} denote UTF-8 strings. Their units +are bytes. + +Variables of type @code{uint16_t *} denote UTF-16 strings, without byte +order mark. Their units are 2-byte words. + +Variables of type @code{uint32_t *} denote UTF-32 strings, without byte +order mark. Their units are 4-byte words. + +Argument pairs @code{(@var{s}, @var{n})} denote a string +@code{@var{s}[0..@var{n}-1]} with exactly @var{n} units. + +All functions with prefix @samp{ulc_} operate on C strings in locale +encoding. + +All functions with prefix @samp{u8_} operate on UTF-8 strings. + +All functions with prefix @samp{u16_} operate on UTF-16 strings. + +All functions with prefix @samp{u32_} operate on UTF-32 strings. + +For every function with prefix @samp{u8_}, operating on UTF-8 strings, +there is also a corresponding function with prefix @samp{u16_}, +operating on UTF-16 strings, and a corresponding function with prefix +@samp{u32_}, operating on UTF-32 strings. Their description is +analogous; in this documentation we describe only the function that +operates on UTF-8 strings, for brevity. + +A declaration with a variable @var{n} denotes the three concrete +declarations with @var{n} = 8, @var{n} = 16, @var{n} = 32. + +All parameters starting with @samp{str} and the parameters of +functions starting with @code{u8_str}/@code{u16_str}/@code{u32_str} +denote a NUL terminated string. + +@cindex return value conventions +Error values are always returned through the @code{errno} variable, +usually with a return value that indicates the presence of an error +(NULL for functions that return an pointer, or -1 for functions that +return an @code{int}). + +Functions returning a string result take a +@code{(@var{resultbuf}, @var{lengthp})} +argument pair. If @var{resultbuf} is not NULL and the result fits +into @code{*@var{lengthp}} units, it is put in @var{resultbuf}, and +@var{resultbuf} is returned. Otherwise, a freshly allocated string +is returned. In both cases, @code{*@var{lengthp}} is set to the +length (number of units) of the returned string. In case of error, +NULL is returned and @code{errno} is set. + +@include unitypes.texi +@include unistr.texi +@include uniconv.texi +@include unistdio.texi +@include uniname.texi +@include unictype.texi +@include uniwidth.texi +@include uniwbrk.texi +@include unilbrk.texi +@include uninorm.texi +@include unicase.texi +@include uniregex.texi + +@node Using the library +@chapter Using the library + +This chapter explains some practical considerations, regarding the +installation and compiler options that are needed in order to use this +library. + +@menu +* Installation:: +* Compiler options:: +* Include files:: +* Autoconf macro:: +* Reporting problems:: +@end menu + +@node Installation +@section Installation + +@cindex dependencies +Before you can use the library, it must be installed. First, you have to +make sure all dependencies are installed. They are listed in the file +@file{DEPENDENCIES}. + +@cindex installation +Then you can proceed to build and install the library, as described in the +file @file{INSTALL}. For installation on Windows systems, please refer to +the file @file{README.woe32}. + +@node Compiler options +@section Compiler options + +Let's denote as @code{LIBUNISTRING_PREFIX} the value of the @samp{--prefix} +option that you passed to @code{configure} while installing this package. +If you didn't pass any @samp{--prefix} option, then the package is installed +in @file{/usr/local}. + +Let's denote as @code{LIBUNISTRING_INCLUDEDIR} the directory where the +include files were installed. This is usually the same as +@code{$@{LIBUNISTRING_PREFIX@}/include}. Except that if you passed an +@samp{--includedir} option to @code{configure}, it is the value of that +option. + +Let's further denote as @code{LIBUNISTRING_LIBDIR} the directory where +the library itself was installed. This is the value that you passed +with the @samp{--libdir} option to @code{configure}, or otherwise the +same as @code{$@{LIBUNISTRING_PREFIX@}/lib}. Recall that when building +in 64-bit mode on a 64-bit GNU/Linux system that supports executables +in either 64-bit mode or 32-bit mode, you should have used the option +@code{--libdir=$@{LIBUNISTRING_PREFIX@}/lib64}. + +@cindex compiler options +So that the compiler finds the include files, you have to pass it the +option @code{-I$@{LIBUNISTRING_INCLUDEDIR@}}. + +So that the compiler finds the library during its linking pass, you have +to pass it the options @code{-L$@{LIBUNISTRING_LIBDIR@} -lunistring}. +On some systems, in some configurations, you also have to pass options +needed for linking with @code{libiconv}. The autoconf macro +@code{gl_LIBUNISTRING} (see @ref{Autoconf macro}) deals with this +particularity. + +@node Include files +@section Include files + +Most of the include files have been presented in the introduction, see +@ref{Introduction}, and subsequent detailed chapters. + +Another include file is @code{}. It contains the +version number of the libunistring library. + +@deftypevr Macro int _LIBUNISTRING_VERSION +This constant contains the version of libunistring that is being used +at compile time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form @code{(major<<8) + minor}. +@end deftypevr + +@deftypevr Constant int _libunistring_version +This constant contains the version of libunistring that is being used +at run time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form @code{(major<<8) + minor}. +@end deftypevr + +It is possible that @code{_libunistring_version} is greater than +@code{_LIBUNISTRING_VERSION}. This can happen when you use +@code{libunistring} as a shared library, and a newer, binary +backward-compatible version has been installed after your program +that uses @code{libunistring} was installed. + +@node Autoconf macro +@section Autoconf macro + +@cindex autoconf macro +GNU Gnulib provides an autoconf macro that tests for the availability +of @code{libunistring}. It is contained in the Gnulib module +@samp{libunistring}, see@texnl{} +@url{http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring}. + +@amindex gl_LIBUNISTRING +The macro is called @code{gl_LIBUNISTRING}. It searches for an installed +libunistring. If found, it sets and AC_SUBSTs @code{HAVE_LIBUNISTRING=yes} +and the @code{LIBUNISTRING} and @code{LTLIBUNISTRING} variables and augments +the @code{CPPFLAGS} variable, and defines the C macro +@code{HAVE_LIBUNISTRING} to 1. Otherwise, it sets and AC_SUBSTs +@code{HAVE_LIBUNISTRING=no} and @code{LIBUNISTRING} and @code{LTLIBUNISTRING} +to empty. + +The complexities that @code{gl_LIBUNISTRING} deals with are the following: + +@itemize @bullet +@item +On some operating systems, in some configurations, libunistring depends +on @code{libiconv}, and the options for linking with libiconv must be +mentioned explicitly on the link command line. + +@item +GNU @code{libunistring}, if installed, is not necessarily already in the +search path (@code{CPPFLAGS} for the include file search path, +@code{LDFLAGS} for the library search path). + +@item +GNU @code{libunistring}, if installed, is not necessarily already in the +run time library search path. To avoid the need for setting an environment +variable like @code{LD_LIBRARY_PATH}, the macro adds the appropriate +run time search path options to the @code{LIBUNISTRING} variable. This works +on most systems. +@end itemize + +@node Reporting problems +@section Reporting problems + +@cindex bug reports +@cindex bug tracker +@cindex mailing list +If you encounter any problem, please don't hesitate to send a detailed +bug report to the @code{bug-libunistring@@gnu.org} mailing list. You can +alternatively also use the bug tracker at the project page +@url{https://savannah.gnu.org/projects/libunistring}. + +Please always include the version number of this library, and a short +description of your operating system and compilation environment with +corresponding version numbers. + +For problems that appear while building and installing @code{libunistring}, +for which you don't find the remedy in the @file{INSTALL} file, please include +a description of the options that you passed to the @samp{configure} script. + +@node More functionality +@chapter More advanced functionality + +@cindex bidirectional reordering +For bidirectional reordering of strings, we recommend the GNU FriBidi library: +@url{http://www.fribidi.org/}. + +@cindex rendering +For the rendering of Unicode strings outside of the context of a given toolkit +(KDE/Qt or GNOME/Gtk), we recommend the Pango library: +@url{http://www.pango.org/}. + +@node Licenses +@appendix Licenses +@cindex Licenses + +The files of this package are covered by the licenses indicated in each +particular file or directory. Here is a summary: + +@itemize @bullet +@item +The @code{libunistring} library is covered by the +GNU Lesser General Public License (LGPL). +A copy of the license is included in @ref{GNU LGPL}. + +@item +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. +@* +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in @ref{GNU FDL}. +@* +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in @ref{GNU GPL}. +@end itemize + +@menu +* GNU GPL:: GNU General Public License +* GNU LGPL:: GNU Lesser General Public License +* GNU FDL:: GNU Free Documentation License +@end menu + +@page +@node GNU GPL +@appendixsec GNU GENERAL PUBLIC LICENSE +@cindex GPL, GNU General Public License +@cindex License, GNU GPL +@include gpl.texi +@page +@node GNU LGPL +@appendixsec GNU LESSER GENERAL PUBLIC LICENSE +@cindex LGPL, GNU Lesser General Public License +@cindex License, GNU LGPL +@include lgpl.texi +@page +@node GNU FDL +@appendixsec GNU Free Documentation License +@cindex FDL, GNU Free Documentation License +@cindex License, GNU FDL +@include fdl.texi + +@node Index +@unnumbered Index + +@printindex cp + +@bye + +@c Local Variables: +@c indent-tabs-mode: nil +@c whitespace-check-buffer-indent: nil +@c End: diff --git a/doc/libunistring_1.html b/doc/libunistring_1.html new file mode 100644 index 00000000..646fdc65 --- /dev/null +++ b/doc/libunistring_1.html @@ -0,0 +1,531 @@ + + + + + +GNU libunistring: 1. Introduction + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

1. Introduction

+ +

This library provides functions for manipulating Unicode strings and +for manipulating C strings according to the Unicode standard. +

+

It consists of the following parts: +

+
+
<unistr.h>
+

elementary string functions +

+
<uniconv.h>
+

conversion from/to legacy encodings +

+
<unistdio.h>
+

formatted output to strings +

+
<uniname.h>
+

character names +

+
<unictype.h>
+

character classification and properties +

+
<uniwidth.h>
+

string width when using nonproportional fonts +

+
<uniwbrk.h>
+

word breaks +

+
<unilbrk.h>
+

line breaking algorithm +

+
<uninorm.h>
+

normalization (composition and decomposition) +

+
<unicase.h>
+

case folding +

+
<uniregex.h>
+

regular expressions (not yet implemented) +

+
+ + + +

libunistring is for you if your application involves non-trivial text +processing, such as upper/lower case conversions, line breaking, operations +on words, or more advanced analysis of text. Text provided by the user can, +in general, contain characters of all kinds of scripts. The text processing +functions provided by this library handle all scripts and all languages. +

+

libunistring is for you if your application already uses the ISO C / POSIX +<ctype.h>, <wctype.h> functions and the text it operates on is +provided by the user and can be in any language. +

+

libunistring is also for you if your application uses Unicode strings as +internal in-memory representation. +

+ +
+ + +

1.1 Unicode

+ +

Unicode is a standardized repertoire of characters that contains characters +from all scripts of the world, from Latin letters to Chinese ideographs +and Babylonian cuneiform glyphs. It also specifies how these characters +are to be rendered on a screen or on paper, and how common text processing +(word selection, line breaking, uppercasing of page titles etc.) is supposed +to behave on Unicode text. +

+

Unicode also specifies three ways of storing sequences of Unicode +characters in a computer whose basic unit of data is an 8-bit byte: + + + + +

+
UTF-8
+

Every character is represented as 1 to 4 bytes. +

+
UTF-16
+

Every character is represented as 1 to 2 units of 16 bits. +

+
UTF-32, a.k.a. UCS-4
+

Every character is represented as 1 unit of 32 bits. +

+
+ +

For encoding Unicode text in a file, UTF-8 is usually used. For encoding +Unicode strings in memory for a program, either of the three encoding forms +can be reasonably used. +

+

Unicode is widely used on the web. Prior to the use of Unicode, web pages +were in many different encodings (ISO-8859-1 for English, French, Spanish, +ISO-8859-2 for Polish, ISO-8859-7 for Greek, KOI8-R for Russian, GB2312 or +BIG5 for Chinese, ISO-2022-JP-2 or EUC-JP or Shift_JIS for Japanese, and many +many others). It was next to impossible to create a document that contained +Chinese and Polish text in the same document. Due to the many encodings for +Japanese, even the processing of pure Japanese text was error prone. +

+

References: +

+ +
+ + +

1.2 Unicode and Internationalization

+ +

Internationalization is the process of changing the source code of a program +so that it can meet the expectations of users in any culture, if culture +specific data (translations, images etc.) are provided. +

+

Use of Unicode is not strictly required for internationalization, but it +makes internationalization much easier, because operations that need to +look at specific characters (like hyphenation, spell checking, or the +automatic conversion of double-quotes to opening and closing double-quote +characters) don't need to consider multiple possible encodings of the text. +

+

Use of Unicode also enables multilingualization: the ability of having text +in multiple languages present in the same document or even in the same line +of text. +

+

But use of Unicode is not everything. Internationalization usually consists +of three features: +

    +
  • +Use of Unicode where needed for text processing. This is what this library +is for. +
  • +Use of message catalogs for messages shown to the user, This is what +GNU gettext is about. +
  • +Use of locale specific conventions for date and time formats, for numeric +formatting, or for sorting of text. This can be done adequately with the +POSIX APIs and the implementation of locales in the GNU C library. +
+ +
+ + +

1.3 Locale encodings

+ +

A locale is a set of cultural conventions. According to POSIX, for a program, +at any moment, there is one locale being designated as the “current locale”. +(Actually, POSIX supports also one locale per thread, but this feature is not +yet universally implemented and not widely used.) + +The locale is partitioned into several aspects, called the “categories” +of the locale. The main various aspects are: +

    +
  • +The character encoding and the character properties. This is the +LC_CTYPE category. +
  • +The sorting rules for text. This is the LC_COLLATE category. +
  • +The language specific translations of messages. This is the +LC_MESSAGES category. +
  • +The formatting rules for numbers, such as the decimal separator. This is +the LC_NUMERIC category. +
  • +The formatting rules for amounts of money. This is the LC_MONETARY +category. +
  • +The formatting of date and time. This is the LC_TIME category. +
+ + +

In particular, the LC_CTYPE category of the current locale determines +the character encoding. This is the encoding of ‘char *’ strings. +We also call it the “locale encoding”. GNU libunistring has a function, +locale_charset, that returns a standardized (platform independent) +name for this encoding. +

+

All locale encodings used on glibc systems are essentially ASCII compatible: +Most graphic ASCII characters have the same representation, as a single byte, +in that encoding as in ASCII. +

+

Among the possible locale encodings are UTF-8 and GB18030. Both allow +to represent any Unicode character as a sequence of bytes. UTF-8 is used in +most of the world, whereas GB18030 is used in the People's Republic of China, +because it is backward compatible with the GB2312 encoding that was used in +this country earlier. +

+

The legacy locale encodings, ISO-8859-15 (which supplanted ISO-8859-1 in +most of Europe), ISO-8859-2, KOI8-R, EUC-JP, etc., are still in use in +many places, though. +

+

UTF-16 and UTF-32 are not used as locale encodings, because they are not +ASCII compatible. +

+
+ + +

1.4 Choice of in-memory representation of strings

+ +

There are three ways of representing strings in memory of a running +program. +

    +
  • +As ‘char *’ strings. Such strings are represented in locale encoding. +This approach is employed when not much text processing is done by the +program. When some Unicode aware processing is to be done, a string is +converted to Unicode on the fly and back to locale encoding afterwards. +
  • +As UTF-8 or UTF-16 or UTF-32 strings. This implies that conversion from +locale encoding to Unicode is performed on input, and in the opposite +direction on output. This approach is employed when the program does +a significant amount of text processing, or when the program has multiple +threads operating on the same data but in different locales. +
  • +As ‘wchar_t *’, a.k.a. “wide strings”. This approach is misguided, +see The wchar_t mess. +
+ +
+ + +

1.5 ‘char *’ strings

+ +

The classical C strings, with its C library support standardized by +ISO C and POSIX, can be used in internationalized programs with some +precautions. The problem with this API is that many of the C library +functions for strings don't work correctly on strings in locale +encodings, leading to bugs that only people in some cultures of the +world will experience. +

+ +

The first problem with the C library API is the support of multibyte +locales. According to the locale encoding, in general, every character +is represented by one or more bytes (up to 4 bytes in practice — but +use MB_LEN_MAX instead of the number 4 in the code). +When every character is represented by only 1 byte, we speak of an +“unibyte locale”, otherwise of a “multibyte locale”. It is important +to realize that the majority of Unix installations nowadays use UTF-8 +or GB18030 as locale encoding; therefore, the majority of users are +using multibyte locales. +

+ +

The important fact to remember is: +

+

A ‘char’ is a byte, not a character. +

+ +

As a consequence: +

    +
  • +The <ctype.h> API is useless in this context; it does not work in +multibyte locales. +
  • +The strlen function does not return the number of characters +in a string. Nor does it return the number of screen columns occupied +by a string after it is output. It merely returns the number of +bytes occupied by a string. +
  • +Truncating a string, for example, with strncpy, can have the +effect of truncating it in the middle of a multibyte character. Such +a string will, when output, have a garbled character at its end, often +represented by a hollow box. +
  • +strchr and strrchr do not work with multibyte strings +if the locale encoding is GB18030 and the character to be searched is +a digit. +
  • +strstr does not work with multibyte strings if the locale encoding +is different from UTF-8. +
  • +strcspn, strpbrk, strspn cannot work +correctly in multibyte locales: they assume the second argument is a list of +single-byte characters. Even in this simple case, they do not work with +multibyte strings if the locale encoding is GB18030 and one of the +characters to be searched is a digit. +
  • +strsep and strtok_r do not work with multibyte strings +unless all of the delimiter characters are ASCII characters < 0x30. +
  • +The strcasecmp, strncasecmp, and strcasestr +functions do not work with multibyte strings. +
+ +

The workarounds can be found in GNU gnulib +http://www.gnu.org/software/gnulib/. +

    +
  • +gnulib has modules ‘mbchar’, ‘mbiter’, ‘mbuiter’ that +represent multibyte characters and allow to iterate across a multibyte +string with the same ease as through a unibyte string. +
  • +gnulib has functions mbslen and mbswidth that can be +used instead of strlen when the number of characters or the +number of screen columns of a string is requested. +
  • +gnulib has functions mbschr and mbsrrchr that are +like strchr and strrchr, but work in multibyte locales. +
  • +gnulib has a function mbsstr, like strstr, but works +in multibyte locales. +
  • +gnulib has functions mbscspn, mbspbrk, mbsspn +that are like strcspn, strpbrk, strspn, but +work in multibyte locales. +
  • +gnulib has functions mbssep and mbstok_r that are +like strsep and strtok_r but work in multibyte locales. +
  • +gnulib has functions mbscasecmp, mbsncasecmp, +mbspcasecmp, and mbscasestr that are like strcasecmp, +strncasecmp, and strcasestr, but +work in multibyte locales. Still, the function ulc_casecmp is +preferable to these functions; see below. +
+ +

The second problem with the C library API is that it has some assumptions built-in that are not valid in some languages: +

    +
  • +It assumes that there are only two forms of every character: uppercase +and lowercase. This is not true for Croatian, where the character +LETTER DZ WITH CARON comes in three forms: +LATIN CAPITAL LETTER DZ WITH CARON (DZ), +LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON (Dz), +LATIN SMALL LETTER DZ WITH CARON (dz). +
  • +It assumes that uppercasing of 1 character leads to 1 character. This +is not true for German, where the LATIN SMALL LETTER SHARP S, when +uppercased, becomes ‘SS’. +
  • +It assumes that there is 1:1 mapping between uppercase and lowercase forms. +This is not true for the Greek sigma: GREEK CAPITAL LETTER SIGMA is +the uppercase of both GREEK SMALL LETTER SIGMA and +GREEK SMALL LETTER FINAL SIGMA. +
  • +It assumes that the upper/lowercase mappings are position independent. +This is not true for the Greek sigma and the Lithuanian i. +
+ +

The correct way to deal with this problem is +

    +
  1. +to provide functions for titlecasing, as well as for upper- and +lowercasing, +
  2. +to view case transformations as functions that operates on strings, +rather than on characters. +
+ +

This is implemented in this library, through the functions declared in <unicase.h>, see Case mappings <unicase.h>. +

+
+ + +

1.6 The wchar_t mess

+ +

The ISO C and POSIX standard creators made an attempt to fix the first +problem mentioned in the previous section. They introduced +

    +
  • +a type ‘wchar_t’, designed to encapsulate an entire character, +
  • +a “wide string” type ‘wchar_t *’, and +
  • +functions declared in <wctype.h> that were meant to supplant the +ones in <ctype.h>. +
+ +

Unfortunately, this API and its implementation has numerous problems: +

+
    +
  • +On AIX and Windows platforms, wchar_t is a 16-bit type. This +means that it can never accommodate an entire Unicode character. Either +the wchar_t * strings are limited to characters in UCS-2 (the +“Basic Multilingual Plane” of Unicode), or — if wchar_t * +strings are encoded in UTF-16 — a wchar_t represents only half +of a character in the worst case, making the <wctype.h> functions +pointless. + +
  • +On Solaris and FreeBSD, the wchar_t encoding is locale dependent +and undocumented. This means, if you want to know any property of a +wchar_t character, other than the properties defined by +<wctype.h> — such as whether it's a dash, currency symbol, +paragraph separator, or similar —, you have to convert it to +char * encoding first, by use of the function wctomb. + +
  • +When you read a stream of wide characters, through the functions +fgetwc and fgetws, and when the input stream/file is +not in the expected encoding, you have no way to determine the invalid +byte sequence and do some corrective action. If you use these +functions, your program becomes “garbage in - more garbage out” or +“garbage in - abort”. +
+ +

As a consequence, it is better to use multibyte strings, as explained in +the previous section. Such multibyte strings can bypass limitations +of the wchar_t type, if you use functions defined in gnulib and +libunistring for text processing. They can also faithfully transport +malformed characters that were present in the input, without requiring +the program to produce garbage or abort. +

+
+ + +

1.7 Unicode strings

+ +

libunistring supports Unicode strings in three representations: + + + +

    +
  • +UTF-8 strings, through the type ‘uint8_t *’. The units are bytes +(uint8_t). +
  • +UTF-16 strings, through the type ‘uint16_t *’, The units are 16-bit +memory words (uint16_t). +
  • +UTF-32 strings, through the type ‘uint32_t *’. The units are 32-bit +memory words (uint32_t). +
+ +

As with C strings, there are two variants: +

    +
  • +Unicode strings with a terminating NUL character are represented as +a pointer to the first unit of the string. There is a unit containing +a 0 value at the end. It is considered part of the string for all +memory allocation purposes, but is not considered part of the string +for all other logical purposes. +
  • +Unicode strings where embedded NUL characters are allowed. These +are represented by a pointer to the first unit and the number of units +(not bytes!) of the string. In this setting, there is no trailing +zero-valued unit used as “end marker”. +
+ +
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_10.html b/doc/libunistring_10.html new file mode 100644 index 00000000..bf22ca1b --- /dev/null +++ b/doc/libunistring_10.html @@ -0,0 +1,192 @@ + + + + + +GNU libunistring: 10. Word breaks in strings <uniwbrk.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

10. Word breaks in strings <uniwbrk.h>

+ +

This include file declares functions for determining where in a string +“words” start and end. Here “words” are not necessarily the same as +entities that can be looked up in dictionaries, but rather groups of +consecutive characters that should not be split by text processing +operations. +

+ +
+ + +

10.1 Word breaks in a string

+ +

The following functions determine the word breaks in a string. +

+
+
Function: void u8_wordbreaks (const uint8_t *s, size_t n, char *p) + +
+
Function: void u16_wordbreaks (const uint16_t *s, size_t n, char *p) + +
+
Function: void u32_wordbreaks (const uint32_t *s, size_t n, char *p) + +
+
Function: void ulc_wordbreaks (const char *s, size_t n, char *p) + +
+

Determines the word break points in s, an array of n units, and +stores the result at p[0..n-1]. +

+
p[i] = 1
+

means that there is a word boundary between s[i-1] and +s[i]. +

+
p[i] = 0
+

means that s[i-1] and s[i] must not be separated. +

+
+

p[0] is always set to 0. If an application wants to consider a +word break to be present at the beginning of the string (before +s[0]) or at the end of the string (after +s[0..n-1]), it has to treat these cases explicitly. +

+ +
+ + +

10.2 Word break property

+ +

This is a more low-level API. The word break property is a property defined +in Unicode Standard Annex #29, section “Word Boundaries”, see +http://www.unicode.org/reports/tr29/#Word_Boundaries. It is +used for determining the word breaks in a string. +

+

The following are the possible values of the word break property. More values +may be added in the future. +

+
+
Constant: int WBP_OTHER + +
+
Constant: int WBP_CR + +
+
Constant: int WBP_LF + +
+
Constant: int WBP_NEWLINE + +
+
Constant: int WBP_EXTEND + +
+
Constant: int WBP_FORMAT + +
+
Constant: int WBP_KATAKANA + +
+
Constant: int WBP_ALETTER + +
+
Constant: int WBP_MIDNUMLET + +
+
Constant: int WBP_MIDLETTER + +
+
Constant: int WBP_MIDNUM + +
+
Constant: int WBP_NUMERIC + +
+
Constant: int WBP_EXTENDNUMLET + +
+
+ +

The following function looks up the word break property of a character. +

+
+
Function: int uc_wordbreak_property (ucs4_t uc) + +
+

Returns the Word_Break property of a Unicode character. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_11.html b/doc/libunistring_11.html new file mode 100644 index 00000000..b0016788 --- /dev/null +++ b/doc/libunistring_11.html @@ -0,0 +1,200 @@ + + + + + +GNU libunistring: 11. Line breaking <unilbrk.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

11. Line breaking <unilbrk.h>

+ +

This include file declares functions for determining where in a string +line breaks could or should be introduced, in order to make the displayed +string fit into a column of given width. +

+

These functions are locale dependent. The encoding argument identifies +the encoding (e.g. "ISO-8859-2" for Polish). +

+

The following enumerated values indicate whether, at a given position, a line +break is possible or not. Given an string s as an array +s[0..n-1] and a position i, the values have the +following meanings: +

+
+
Constant: int UC_BREAK_MANDATORY + +
+

This value indicates that s[i] is a line break character. +

+ +
+
Constant: int UC_BREAK_POSSIBLE + +
+

This value indicates that a line break may be inserted between +s[i-1] and s[i]. +

+ +
+
Constant: int UC_BREAK_HYPHENATION + +
+

This value indicates that a hyphen and a line break may be inserted between +s[i-1] and s[i]. But beware of language +dependent hyphenation rules. +

+ +
+
Constant: int UC_BREAK_PROHIBITED + +
+

This value indicates that s[i-1] and s[i] +must not be separated. +

+ +
+
Constant: int UC_BREAK_UNDEFINED + +
+

This value is not used as a return value; rather, in the overriding argument of +the u*_width_linebreaks functions, it indicates the absence of an +override. +

+ +

The following functions determine the positions at which line breaks are +possible. +

+
+
Function: void u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char *p) + +
+
Function: void u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding, char *p) + +
+
Function: void u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p) + +
+
Function: void ulc_possible_linebreaks (const char *s, size_t n, const char *encoding, char *p) + +
+

Determines the line break points in s, and stores the result at +p[0..n-1]. Every p[i] is assigned one of +the values UC_BREAK_MANDATORY, UC_BREAK_POSSIBLE, +UC_BREAK_HYPHENATION, UC_BREAK_PROHIBITED. +

+ +

The following functions determine where line breaks should be inserted so that +each line fits in a given width, when output to a device that uses +non-proportional fonts. +

+
+
Function: int u8_width_linebreaks (const uint8_t *s, size_t n, int width, int start_column, int at_end_columns, const char *override, const char *encoding, char *p) + +
+
Function: int u16_width_linebreaks (const uint16_t *s, size_t n, int width, int start_column, int at_end_columns, const char *override, const char *encoding, char *p) + +
+
Function: int u32_width_linebreaks (const uint32_t *s, size_t n, int width, int start_column, int at_end_columns, const char *override, const char *encoding, char *p) + +
+
Function: int ulc_width_linebreaks (const char *s, size_t n, int width, int start_column, int at_end_columns, const char *override, const char *encoding, char *p) + +
+

Chooses the best line breaks, assuming that every character occupies a width +given by the uc_width function (see Display width <uniwidth.h>). +

+

The string is s[0..n-1]. +

+

The maximum number of columns per line is given as width. +The starting column of the string is given as start_column. +If the algorithm shall keep room after the last piece, this amount of room can +be given as at_end_columns. +

+

override is an optional override; if +override[i] != UC_BREAK_UNDEFINED, +override[i] takes precedence over p[i] +as returned by the u*_possible_linebreaks function. +

+

The given encoding is used for disambiguating widths in uc_width. +

+

Returns the column after the end of the string, and stores the result at +p[0..n-1]. Every p[i] is assigned one of +the values UC_BREAK_MANDATORY, UC_BREAK_POSSIBLE, +UC_BREAK_HYPHENATION, UC_BREAK_PROHIBITED. Here the value +UC_BREAK_POSSIBLE indicates that a line break should be inserted. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_12.html b/doc/libunistring_12.html new file mode 100644 index 00000000..0bf1e933 --- /dev/null +++ b/doc/libunistring_12.html @@ -0,0 +1,507 @@ + + + + + +GNU libunistring: 12. Normalization forms (composition and decomposition) <uninorm.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

12. Normalization forms (composition and decomposition) <uninorm.h>

+ +

This include file defines functions for transforming Unicode strings to one +of the four normal forms, known as NFC, NFD, NKFC, NFKD. These +transformations involve decomposition and — for NFC and NFKC — composition +of Unicode characters. +

+ +
+ + +

12.1 Decomposition of Unicode characters

+ +

The following enumerated values are the possible types of decomposition of a +Unicode character. +

+
+
Constant: int UC_DECOMP_CANONICAL + +
+

Denotes canonical decomposition. +

+ +
+
Constant: int UC_DECOMP_FONT + +
+

UCD marker: <font>. Denotes a font variant (e.g. a blackletter form). +

+ +
+
Constant: int UC_DECOMP_NOBREAK + +
+

UCD marker: <noBreak>. +Denotes a no-break version of a space or hyphen. +

+ +
+
Constant: int UC_DECOMP_INITIAL + +
+

UCD marker: <initial>. +Denotes an initial presentation form (Arabic). +

+ +
+
Constant: int UC_DECOMP_MEDIAL + +
+

UCD marker: <medial>. +Denotes a medial presentation form (Arabic). +

+ +
+
Constant: int UC_DECOMP_FINAL + +
+

UCD marker: <final>. +Denotes a final presentation form (Arabic). +

+ +
+
Constant: int UC_DECOMP_ISOLATED + +
+

UCD marker: <isolated>. +Denotes an isolated presentation form (Arabic). +

+ +
+
Constant: int UC_DECOMP_CIRCLE + +
+

UCD marker: <circle>. +Denotes an encircled form. +

+ +
+
Constant: int UC_DECOMP_SUPER + +
+

UCD marker: <super>. +Denotes a superscript form. +

+ +
+
Constant: int UC_DECOMP_SUB + +
+

UCD marker: <sub>. +Denotes a subscript form. +

+ +
+
Constant: int UC_DECOMP_VERTICAL + +
+

UCD marker: <vertical>. +Denotes a vertical layout presentation form. +

+ +
+
Constant: int UC_DECOMP_WIDE + +
+

UCD marker: <wide>. +Denotes a wide (or zenkaku) compatibility character. +

+ +
+
Constant: int UC_DECOMP_NARROW + +
+

UCD marker: <narrow>. +Denotes a narrow (or hankaku) compatibility character. +

+ +
+
Constant: int UC_DECOMP_SMALL + +
+

UCD marker: <small>. +Denotes a small variant form (CNS compatibility). +

+ +
+
Constant: int UC_DECOMP_SQUARE + +
+

UCD marker: <square>. +Denotes a CJK squared font variant. +

+ +
+
Constant: int UC_DECOMP_FRACTION + +
+

UCD marker: <fraction>. +Denotes a vulgar fraction form. +

+ +
+
Constant: int UC_DECOMP_COMPAT + +
+

UCD marker: <compat>. +Denotes an otherwise unspecified compatibility character. +

+ +

The following constant denotes the maximum size of decomposition of a single +Unicode character. +

+
+
Macro: unsigned int UC_DECOMPOSITION_MAX_LENGTH + +
+

This macro expands to a constant that is the required size of buffer passed to +the uc_decomposition and uc_canonical_decomposition functions. +

+ +

The following functions decompose a Unicode character. +

+
+
Function: int uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition) + +
+

Returns the character decomposition mapping of the Unicode character uc. +decomposition must point to an array of at least +UC_DECOMPOSITION_MAX_LENGTH ucs_t elements. +

+

When a decomposition exists, decomposition[0..n-1] and +*decomp_tag are filled and n is returned. Otherwise -1 is +returned. +

+ +
+
Function: int uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition) + +
+

Returns the canonical character decomposition mapping of the Unicode character +uc. decomposition must point to an array of at least +UC_DECOMPOSITION_MAX_LENGTH ucs_t elements. +

+

When a decomposition exists, decomposition[0..n-1] is filled +and n is returned. Otherwise -1 is returned. +

+ +
+ + +

12.2 Composition of Unicode characters

+ +

The following function composes a Unicode character from two Unicode +characters. +

+
+
Function: ucs4_t uc_composition (ucs4_t uc1, ucs4_t uc2) + +
+

Attempts to combine the Unicode characters uc1, uc2. +uc1 is known to have canonical combining class 0. +

+

Returns the combination of uc1 and uc2, if it exists. +Returns 0 otherwise. +

+

Not all decompositions can be recombined using this function. See the Unicode +file ‘CompositionExclusions.txt’ for details. +

+ +
+ + +

12.3 Normalization of strings

+ +

The Unicode standard defines four normalization forms for Unicode strings. +The following type is used to denote a normalization form. +

+
+
Type: uninorm_t + +
+

An object of type uninorm_t denotes a Unicode normalization form. +This is a scalar type; its values can be compared with ==. +

+ +

The following constants denote the four normalization forms. +

+
+
Macro: uninorm_t UNINORM_NFD + +
+

Denotes Normalization form D: canonical decomposition. +

+ +
+
Macro: uninorm_t UNINORM_NFC + +
+

Normalization form C: canonical decomposition, then canonical composition. +

+ +
+
Macro: uninorm_t UNINORM_NFKD + +
+

Normalization form KD: compatibility decomposition. +

+ +
+
Macro: uninorm_t UNINORM_NFKC + +
+

Normalization form KC: compatibility decomposition, then canonical composition. +

+ +

The following functions operate on uninorm_t objects. +

+
+
Function: bool uninorm_is_compat_decomposing (uninorm_t nf) + +
+

Tests whether the normalization form nf does compatibility decomposition. +

+ +
+
Function: bool uninorm_is_composing (uninorm_t nf) + +
+

Tests whether the normalization form nf includes canonical composition. +

+ +
+
Function: uninorm_t uninorm_decomposing_form (uninorm_t nf) + +
+

Returns the decomposing variant of the normalization form nf. +This maps NFC,NFD → NFD and NFKC,NFKD → NFKD. +

+ +

The following functions apply a Unicode normalization form to a Unicode string. +

+
+
Function: uint8_t * u8_normalize (uninorm_t nf, const uint8_t *s, size_t n, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_normalize (uninorm_t nf, const uint16_t *s, size_t n, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_normalize (uninorm_t nf, const uint32_t *s, size_t n, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the specified normalization form of a string. +

+ +
+ + +

12.4 Normalizing comparisons

+ +

The following functions compare Unicode string, ignoring differences in +normalization. +

+
+
Function: int u8_normcmp (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, uninorm_t nf, int *resultp) + +
+
Function: int u16_normcmp (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, uninorm_t nf, int *resultp) + +
+
Function: int u32_normcmp (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, uninorm_t nf, int *resultp) + +
+

Compares s1 and s2, ignoring differences in normalization. +

+

nf must be either UNINORM_NFD or UNINORM_NFKD. +

+

If successful, sets *resultp to -1 if s1 < s2, +0 if s1 = s2, 1 if s1 > s2, and returns 0. +Upon failure, returns -1 with errno set. +

+ + + +
+
Function: char * u8_normxfrm (const uint8_t *s, size_t n, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+
Function: char * u16_normxfrm (const uint16_t *s, size_t n, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+
Function: char * u32_normxfrm (const uint32_t *s, size_t n, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+

Converts the string s of length n to a NUL-terminated byte +sequence, in such a way that comparing u8_normxfrm (s1) and +u8_normxfrm (s2) with the u8_cmp2 function is equivalent to +comparing s1 and s2 with the u8_normcoll function. +

+

nf must be either UNINORM_NFC or UNINORM_NFKC. +

+ +
+
Function: int u8_normcoll (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, uninorm_t nf, int *resultp) + +
+
Function: int u16_normcoll (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, uninorm_t nf, int *resultp) + +
+
Function: int u32_normcoll (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, uninorm_t nf, int *resultp) + +
+

Compares s1 and s2, ignoring differences in normalization, using +the collation rules of the current locale. +

+

nf must be either UNINORM_NFC or UNINORM_NFKC. +

+

If successful, sets *resultp to -1 if s1 < s2, +0 if s1 = s2, 1 if s1 > s2, and returns 0. +Upon failure, returns -1 with errno set. +

+ +
+ + +

12.5 Normalization of streams of Unicode characters

+ +

A “stream of Unicode characters” is essentially a function that accepts an +ucs4_t argument repeatedly, optionally combined with a function that +“flushes” the stream. +

+
+
Type: struct uninorm_filter + +
+

This is the data type of a stream of Unicode characters that normalizes its +input according to a given normalization form and passes the normalized +character sequence to the encapsulated stream of Unicode characters. +

+ +
+
Function: struct uninorm_filter * uninorm_filter_create (uninorm_t nf, int (*stream_func) (void *stream_data, ucs4_t uc), void *stream_data) + +
+

Creates and returns a normalization filter for Unicode characters. +

+

The pair (stream_func, stream_data) is the encapsulated stream. +stream_func (stream_data, uc) receives the Unicode +character uc and returns 0 if successful, or -1 with errno set +upon failure. +

+

Returns the new filter, or NULL with errno set upon failure. +

+ +
+
Function: int uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc) + +
+

Stuffs a Unicode character into a normalizing filter. +Returns 0 if successful, or -1 with errno set upon failure. +

+ +
+
Function: int uninorm_filter_flush (struct uninorm_filter *filter) + +
+

Brings data buffered in the filter to its destination, the encapsulated stream. +

+

Returns 0 if successful, or -1 with errno set upon failure. +

+

Note! If after calling this function, additional characters are written +into the filter, the resulting character sequence in the encapsulated stream +will not necessarily be normalized. +

+ +
+
Function: int uninorm_filter_free (struct uninorm_filter *filter) + +
+

Brings data buffered in the filter to its destination, the encapsulated stream, +then closes and frees the filter. +

+

Returns 0 if successful, or -1 with errno set upon failure. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_13.html b/doc/libunistring_13.html new file mode 100644 index 00000000..1597ec11 --- /dev/null +++ b/doc/libunistring_13.html @@ -0,0 +1,611 @@ + + + + + +GNU libunistring: 13. Case mappings <unicase.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

13. Case mappings <unicase.h>

+ +

This include file defines functions for case mapping for Unicode strings and +case insensitive comparison of Unicode strings and C strings. +

+

These string functions fix the problems that were mentioned in +char *’ strings, namely, they handle the Croatian +LETTER DZ WITH CARON, the German LATIN SMALL LETTER SHARP S, the +Greek sigma and the Lithuanian i correctly. +

+ +
+ + +

13.1 Case mappings of characters

+ +

The following functions implement case mappings on Unicode characters — +for those cases only where the result of the mapping is a again a single +Unicode character. +

+

These mappings are locale and context independent. +

+
+

WARNING! These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that treat an +entire string at once and are language aware. +

+ +
+
Function: ucs4_t uc_toupper (ucs4_t uc) + +
+

Returns the uppercase mapping of the Unicode character uc. +

+ +
+
Function: ucs4_t uc_tolower (ucs4_t uc) + +
+

Returns the lowercase mapping of the Unicode character uc. +

+ +
+
Function: ucs4_t uc_totitle (ucs4_t uc) + +
+

Returns the titlecase mapping of the Unicode character uc. +

+

The titlecase mapping of a character is to be used when the character should +look like upper case and the following characters are lower cased. +

+

For most characters, this is the same as the uppercase mapping. There are +only few characters where the title case variant and the uuper case variant +are different. These characters occur in the Latin writing of the Croatian, +Bosnian, and Serbian languages. +

+ + + + + + +

Lower case

Title case

Upper case +

LATIN SMALL LETTER LJ +

LATIN CAPITAL LETTER L WITH SMALL LETTER J +

LATIN CAPITAL LETTER LJ +

LATIN SMALL LETTER NJ +

LATIN CAPITAL LETTER N WITH SMALL LETTER J +

LATIN CAPITAL LETTER NJ +

LATIN SMALL LETTER DZ +

LATIN CAPITAL LETTER D WITH SMALL LETTER Z +

LATIN CAPITAL LETTER DZ +

LATIN SMALL LETTER DZ WITH CARON +

LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +

LATIN CAPITAL LETTER DZ WITH CARON +

+
+ +
+ + +

13.2 Case mappings of strings

+ +

Case mapping should always be performed on entire strings, not on individual +characters. The functions in this sections do so. +

+

These functions allow to apply a normalization after the case mapping. The +reason is that if you want to treat ‘ä’ and ‘Ä’ the same, +you most often also want to treat the composed and decomposed forms of such +a character, U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS and +U+0041 LATIN CAPITAL LETTER A U+0308 COMBINING DIAERESIS the same. +The nf argument designates the normalization. +

+ +

These functions are locale dependent. The iso639_language argument +identifies the language (e.g. "tr" for Turkish). NULL means to use +locale independent case mappings. +

+
+
Function: const char * uc_locale_language () + +
+

Returns the ISO 639 language code of the current locale. +Returns "" if it is unknown, or in the "C" locale. +

+ +
+
Function: uint8_t * u8_toupper (const uint8_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_toupper (const uint16_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_toupper (const uint32_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the uppercase mapping of a string. +

+

The nf argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +

+ +
+
Function: uint8_t * u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_tolower (const uint16_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_tolower (const uint32_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the lowercase mapping of a string. +

+

The nf argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +

+ +
+
Function: uint8_t * u8_totitle (const uint8_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_totitle (const uint16_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_totitle (const uint32_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the titlecase mapping of a string. +

+

Mapping to title case means that, in each word, the first cased character +is being mapped to title case and the remaining characters of the word +are being mapped to lower case. +

+

The nf argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +

+ +
+ + +

13.3 Case mappings of substrings

+ +

Case mapping of a substring cannot simply be performed by extracting the +substring and then applying the case mapping function to it. This does not +work because case mapping requires some information about the surrounding +characters. The following functions allow to apply case mappings to +substrings of a given string, while taking into account the characters that +precede it (the “prefix”) and the characters that follow it (the “suffix”). +

+
+
Type: casing_prefix_context_t + +
+

This data type denotes the case-mapping context that is given by a prefix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +

+ +
+
Constant: casing_prefix_context_t unicase_empty_prefix_context + +
+

This constant is the case-mapping context that corresponds to an empty prefix +string. +

+ +

The following functions return casing_prefix_context_t objects: +

+
+
Function: casing_prefix_context_t u8_casing_prefix_context (const uint8_t *s, size_t n) + +
+
Function: casing_prefix_context_t u16_casing_prefix_context (const uint16_t *s, size_t n) + +
+
Function: casing_prefix_context_t u32_casing_prefix_context (const uint32_t *s, size_t n) + +
+

Returns the case-mapping context of a given prefix string. +

+ +
+
Function: casing_prefix_context_t u8_casing_prefixes_context (const uint8_t *s, size_t n, casing_prefix_context_t a_context) + +
+
Function: casing_prefix_context_t u16_casing_prefixes_context (const uint16_t *s, size_t n, casing_prefix_context_t a_context) + +
+
Function: casing_prefix_context_t u32_casing_prefixes_context (const uint32_t *s, size_t n, casing_prefix_context_t a_context) + +
+

Returns the case-mapping context of the prefix concat(a, s), +given the case-mapping context of the prefix a. +

+ +
+
Type: casing_suffix_context_t + +
+

This data type denotes the case-mapping context that is given by a suffix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +

+ +
+
Constant: casing_suffix_context_t unicase_empty_suffix_context + +
+

This constant is the case-mapping context that corresponds to an empty suffix +string. +

+ +

The following functions return casing_suffix_context_t objects: +

+
+
Function: casing_suffix_context_t u8_casing_suffix_context (const uint8_t *s, size_t n) + +
+
Function: casing_suffix_context_t u16_casing_suffix_context (const uint16_t *s, size_t n) + +
+
Function: casing_suffix_context_t u32_casing_suffix_context (const uint32_t *s, size_t n) + +
+

Returns the case-mapping context of a given suffix string. +

+ +
+
Function: casing_suffix_context_t u8_casing_suffixes_context (const uint8_t *s, size_t n, casing_suffix_context_t a_context) + +
+
Function: casing_suffix_context_t u16_casing_suffixes_context (const uint16_t *s, size_t n, casing_suffix_context_t a_context) + +
+
Function: casing_suffix_context_t u32_casing_suffixes_context (const uint32_t *s, size_t n, casing_suffix_context_t a_context) + +
+

Returns the case-mapping context of the suffix concat(s, a), +given the case-mapping context of the suffix a. +

+ +

The following functions perform a case mapping, considering the +prefix context and the suffix context. +

+
+
Function: uint8_t * u8_ct_toupper (const uint8_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_ct_toupper (const uint16_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_ct_toupper (const uint32_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the uppercase mapping of a string that is surrounded by a prefix +and a suffix. +

+ +
+
Function: uint8_t * u8_ct_tolower (const uint8_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_ct_tolower (const uint16_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_ct_tolower (const uint32_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the lowercase mapping of a string that is surrounded by a prefix +and a suffix. +

+ +
+
Function: uint8_t * u8_ct_totitle (const uint8_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_ct_totitle (const uint16_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_ct_totitle (const uint32_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the titlecase mapping of a string that is surrounded by a prefix +and a suffix. +

+ +

For example, to uppercase the UTF-8 substring between s + start_index +and s + end_index of a string that extends from s to +s + u8_strlen (s), you can use the statements +

+
 
size_t result_length;
+uint8_t result =
+  u8_ct_toupper (s + start_index, end_index - start_index,
+                 u8_casing_prefix_context (s, start_index),
+                 u8_casing_suffix_context (s + end_index,
+                                           u8_strlen (s) - end_index),
+                 iso639_language, NULL, NULL, &result_length);
+
+ +
+ + +

13.4 Case insensitive comparison

+ +

The following functions implement comparison that ignores differences in case +and normalization. +

+
+
Function: uint8_t * u8_casefold (const uint8_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_casefold (const uint16_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_casefold (const uint32_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the case folded string. +

+

Comparing u8_casefold (s1) and u8_casefold (s2) +with the u8_cmp2 function is equivalent to comparing s1 and +s2 with u8_casecmp. +

+

The nf argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +

+ +
+
Function: uint8_t * u8_ct_casefold (const uint8_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_ct_casefold (const uint16_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_ct_casefold (const uint32_t *s, size_t n, casing_prefix_context_t prefix_context, casing_suffix_context_t suffix_context, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp) + +
+

Returns the case folded string. The case folding takes into account the +case mapping contexts of the prefix and suffix strings. +

+ +
+
Function: int u8_casecmp (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+
Function: int u16_casecmp (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+
Function: int u32_casecmp (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+
Function: int ulc_casecmp (const char *s1, size_t n1, const char *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+

Compares s1 and s2, ignoring differences in case and normalization. +

+

The nf argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +

+

If successful, sets *resultp to -1 if s1 < s2, +0 if s1 = s2, 1 if s1 > s2, and returns 0. +Upon failure, returns -1 with errno set. +

+ + + + + +

The following functions additionally take into account the sorting rules of the +current locale. +

+
+
Function: char * u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+
Function: char * u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+
Function: char * u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+
Function: char * ulc_casexfrm (const char *s, size_t n, const char *iso639_language, uninorm_t nf, char *resultbuf, size_t *lengthp) + +
+

Converts the string s of length n to a NUL-terminated byte +sequence, in such a way that comparing u8_casexfrm (s1) and +u8_casexfrm (s2) with the gnulib function memcmp2 is +equivalent to comparing s1 and s2 with u8_casecoll. +

+

nf must be either UNINORM_NFC, UNINORM_NFKC, or NULL for +no normalization. +

+ +
+
Function: int u8_casecoll (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+
Function: int u16_casecoll (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+
Function: int u32_casecoll (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+
Function: int ulc_casecoll (const char *s1, size_t n1, const char *s2, size_t n2, const char *iso639_language, uninorm_t nf, int *resultp) + +
+

Compares s1 and s2, ignoring differences in case and normalization, +using the collation rules of the current locale. +

+

The nf argument identifies the normalization form to apply after the +case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. +It can also be NULL, for no normalization. +

+

If successful, sets *resultp to -1 if s1 < s2, +0 if s1 = s2, 1 if s1 > s2, and returns 0. +Upon failure, returns -1 with errno set. +

+ +
+ + +

13.5 Case detection

+ +

The following functions determine whether a Unicode string is entirely in +upper case. or entirely in lower case, or entirely in title case, or already +case-folded. +

+
+
Function: int u8_is_uppercase (const uint8_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u16_is_uppercase (const uint16_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u32_is_uppercase (const uint32_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+

Sets *resultp to true if mapping NFD(s) to upper case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +errno set. +

+ +
+
Function: int u8_is_lowercase (const uint8_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u16_is_lowercase (const uint16_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u32_is_lowercase (const uint32_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+

Sets *resultp to true if mapping NFD(s) to lower case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +errno set. +

+ +
+
Function: int u8_is_titlecase (const uint8_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u16_is_titlecase (const uint16_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u32_is_titlecase (const uint32_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+

Sets *resultp to true if mapping NFD(s) to title case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +errno set. +

+ +
+
Function: int u8_is_casefolded (const uint8_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u16_is_casefolded (const uint16_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u32_is_casefolded (const uint32_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+

Sets *resultp to true if applying case folding to NFD(S) is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +errno set. +

+ +

The following functions determine whether case mappings have any effect on a +Unicode string. +

+
+
Function: int u8_is_cased (const uint8_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u16_is_cased (const uint16_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+
Function: int u32_is_cased (const uint32_t *s, size_t n, const char *iso639_language, bool *resultp) + +
+

Sets *resultp to true if case matters for s, that is, if +mapping NFD(s) to either upper case or lower case or title case is not +a no-op. Set *resultp to false if NFD(s) maps to itself +under the upper case mapping, under the lower case mapping, and under the title +case mapping; in other words, when NFD(s) consists entirely of caseless +characters. Upon failure, returns -1 with errno set. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_14.html b/doc/libunistring_14.html new file mode 100644 index 00000000..f0b21257 --- /dev/null +++ b/doc/libunistring_14.html @@ -0,0 +1,87 @@ + + + + + +GNU libunistring: 14. Regular expressions <uniregex.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

14. Regular expressions <uniregex.h>

+ +

This include file is not yet implemented. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_15.html b/doc/libunistring_15.html new file mode 100644 index 00000000..7c7ac329 --- /dev/null +++ b/doc/libunistring_15.html @@ -0,0 +1,232 @@ + + + + + +GNU libunistring: 15. Using the library + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

15. Using the library

+ +

This chapter explains some practical considerations, regarding the +installation and compiler options that are needed in order to use this +library. +

+ +
+ + +

15.1 Installation

+ +

Before you can use the library, it must be installed. First, you have to +make sure all dependencies are installed. They are listed in the file +‘DEPENDENCIES’. +

+ +

Then you can proceed to build and install the library, as described in the +file ‘INSTALL’. For installation on Windows systems, please refer to +the file ‘README.woe32’. +

+
+ + +

15.2 Compiler options

+ +

Let's denote as LIBUNISTRING_PREFIX the value of the ‘--prefix’ +option that you passed to configure while installing this package. +If you didn't pass any ‘--prefix’ option, then the package is installed +in ‘/usr/local’. +

+

Let's denote as LIBUNISTRING_INCLUDEDIR the directory where the +include files were installed. This is usually the same as +${LIBUNISTRING_PREFIX}/include. Except that if you passed an +‘--includedir’ option to configure, it is the value of that +option. +

+

Let's further denote as LIBUNISTRING_LIBDIR the directory where +the library itself was installed. This is the value that you passed +with the ‘--libdir’ option to configure, or otherwise the +same as ${LIBUNISTRING_PREFIX}/lib. Recall that when building +in 64-bit mode on a 64-bit GNU/Linux system that supports executables +in either 64-bit mode or 32-bit mode, you should have used the option +--libdir=${LIBUNISTRING_PREFIX}/lib64. +

+ +

So that the compiler finds the include files, you have to pass it the +option -I${LIBUNISTRING_INCLUDEDIR}. +

+

So that the compiler finds the library during its linking pass, you have +to pass it the options -L${LIBUNISTRING_LIBDIR} -lunistring. +On some systems, in some configurations, you also have to pass options +needed for linking with libiconv. The autoconf macro +gl_LIBUNISTRING (see Autoconf macro) deals with this +particularity. +

+
+ + +

15.3 Include files

+ +

Most of the include files have been presented in the introduction, see +Introduction, and subsequent detailed chapters. +

+

Another include file is <unistring/version.h>. It contains the +version number of the libunistring library. +

+
+
Macro: int _LIBUNISTRING_VERSION + +
+

This constant contains the version of libunistring that is being used +at compile time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form (major<<8) + minor. +

+ +
+
Constant: int _libunistring_version + +
+

This constant contains the version of libunistring that is being used +at run time. It encodes the major and minor parts of the version +number only. These parts are encoded in the form (major<<8) + minor. +

+ +

It is possible that _libunistring_version is greater than +_LIBUNISTRING_VERSION. This can happen when you use +libunistring as a shared library, and a newer, binary +backward-compatible version has been installed after your program +that uses libunistring was installed. +

+
+ + +

15.4 Autoconf macro

+ +

GNU Gnulib provides an autoconf macro that tests for the availability +of libunistring. It is contained in the Gnulib module +‘libunistring’, see +http://www.gnu.org/software/gnulib/MODULES.html#module=libunistring. +

+ +

The macro is called gl_LIBUNISTRING. It searches for an installed +libunistring. If found, it sets and AC_SUBSTs HAVE_LIBUNISTRING=yes +and the LIBUNISTRING and LTLIBUNISTRING variables and augments +the CPPFLAGS variable, and defines the C macro +HAVE_LIBUNISTRING to 1. Otherwise, it sets and AC_SUBSTs +HAVE_LIBUNISTRING=no and LIBUNISTRING and LTLIBUNISTRING +to empty. +

+

The complexities that gl_LIBUNISTRING deals with are the following: +

+
    +
  • +On some operating systems, in some configurations, libunistring depends +on libiconv, and the options for linking with libiconv must be +mentioned explicitly on the link command line. + +
  • +GNU libunistring, if installed, is not necessarily already in the +search path (CPPFLAGS for the include file search path, +LDFLAGS for the library search path). + +
  • +GNU libunistring, if installed, is not necessarily already in the +run time library search path. To avoid the need for setting an environment +variable like LD_LIBRARY_PATH, the macro adds the appropriate +run time search path options to the LIBUNISTRING variable. This works +on most systems. +
+ +
+ + +

15.5 Reporting problems

+ +

If you encounter any problem, please don't hesitate to send a detailed +bug report to the bug-libunistring@gnu.org mailing list. You can +alternatively also use the bug tracker at the project page +https://savannah.gnu.org/projects/libunistring. +

+

Please always include the version number of this library, and a short +description of your operating system and compilation environment with +corresponding version numbers. +

+

For problems that appear while building and installing libunistring, +for which you don't find the remedy in the ‘INSTALL’ file, please include +a description of the options that you passed to the ‘configure’ script. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_16.html b/doc/libunistring_16.html new file mode 100644 index 00000000..bfe61265 --- /dev/null +++ b/doc/libunistring_16.html @@ -0,0 +1,93 @@ + + + + + +GNU libunistring: 16. More advanced functionality + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

16. More advanced functionality

+ +

For bidirectional reordering of strings, we recommend the GNU FriBidi library: +http://www.fribidi.org/. +

+ +

For the rendering of Unicode strings outside of the context of a given toolkit +(KDE/Qt or GNOME/Gtk), we recommend the Pango library: +http://www.pango.org/. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_17.html b/doc/libunistring_17.html new file mode 100644 index 00000000..73ade2f2 --- /dev/null +++ b/doc/libunistring_17.html @@ -0,0 +1,1526 @@ + + + + + +GNU libunistring: A. Licenses + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

A. Licenses

+ +

The files of this package are covered by the licenses indicated in each +particular file or directory. Here is a summary: +

+
    +
  • +The libunistring library is covered by the +GNU Lesser General Public License (LGPL). +A copy of the license is included in GNU LESSER GENERAL PUBLIC LICENSE. + +
  • +This manual is free documentation. It is dually licensed under the +GNU FDL and the GNU GPL. This means that you can redistribute this +manual under either of these two licenses, at your choice. +
    +This manual is covered by the GNU FDL. Permission is granted to copy, +distribute and/or modify this document under the terms of the +GNU Free Documentation License (FDL), either version 1.2 of the +License, or (at your option) any later version published by the +Free Software Foundation (FSF); with no Invariant Sections, with no +Front-Cover Text, and with no Back-Cover Texts. +A copy of the license is included in GNU Free Documentation License. +
    +This manual is covered by the GNU GPL. You can redistribute it and/or +modify it under the terms of the GNU General Public License (GPL), either +version 3 of the License, or (at your option) any later version published +by the Free Software Foundation (FSF). +A copy of the license is included in GNU GENERAL PUBLIC LICENSE. +
+ + + +
+ + +

A.1 GNU GENERAL PUBLIC LICENSE

+

Version 3, 29 June 2007 +

+ +
 
Copyright © 2007 Free Software Foundation, Inc. http://fsf.org/
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+ + +

Preamble

+ +

The GNU General Public License is a free, copyleft license for +software and other kinds of works. +

+

The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom +to share and change all versions of a program—to make sure it remains +free software for all its users. We, the Free Software Foundation, +use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You +can apply it to your programs, too. +

+

When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. +

+

To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the +software, or if you modify it: responsibilities to respect the freedom +of others. +

+

For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, +receive or can get the source code. And you must show them these +terms so they know their rights. +

+

Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. +

+

For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. +

+

Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those +domains in future versions of the GPL, as needed to protect the +freedom of users. +

+

Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish +to avoid the special danger that patents applied to a free program +could make it effectively proprietary. To prevent this, the GPL +assures that patents cannot be used to render the program non-free. +

+

The precise terms and conditions for copying, distribution and +modification follow. +

+ +

TERMS AND CONDITIONS

+ +
    +
  1. Definitions. + +

    “This License” refers to version 3 of the GNU General Public License. +

    +

    “Copyright” also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. +

    +

    “The Program” refers to any copyrightable work licensed under this +License. Each licensee is addressed as “you”. “Licensees” and +“recipients” may be individuals or organizations. +

    +

    To “modify” a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a “modified version” of +the earlier work or a work “based on” the earlier work. +

    +

    A “covered work” means either the unmodified Program or a work based +on the Program. +

    +

    To “propagate” a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. +

    +

    To “convey” a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. +

    +

    An interactive user interface displays “Appropriate Legal Notices” to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. +

    +
  2. Source Code. + +

    The “source code” for a work means the preferred form of the work for +making modifications to it. “Object code” means any non-source form +of a work. +

    +

    A “Standard Interface” means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. +

    +

    The “System Libraries” of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +“Major Component”, in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. +

    +

    The “Corresponding Source” for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. +

    +

    The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. +

    +

    The Corresponding Source for a work in source code form is that same +work. +

    +
  3. Basic Permissions. + +

    All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. +

    +

    You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. +

    +

    Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. +

    +
  4. Protecting Users' Legal Rights From Anti-Circumvention Law. + +

    No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. +

    +

    When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. +

    +
  5. Conveying Verbatim Copies. + +

    You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. +

    +

    You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. +

    +
  6. Conveying Modified Source Versions. + +

    You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: +

    +
      +
    1. +The work must carry prominent notices stating that you modified it, +and giving a relevant date. + +
    2. +The work must carry prominent notices stating that it is released +under this License and any conditions added under section 7. This +requirement modifies the requirement in section 4 to “keep intact all +notices”. + +
    3. +You must license the entire work, as a whole, under this License to +anyone who comes into possession of a copy. This License will +therefore apply, along with any applicable section 7 additional terms, +to the whole of the work, and all its parts, regardless of how they +are packaged. This License gives no permission to license the work in +any other way, but it does not invalidate such permission if you have +separately received it. + +
    4. +If the work has interactive user interfaces, each must display +Appropriate Legal Notices; however, if the Program has interactive +interfaces that do not display Appropriate Legal Notices, your work +need not make them do so. +
    + +

    A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +“aggregate” if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. +

    +
  7. Conveying Non-Source Forms. + +

    You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: +

    +
      +
    1. +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by the +Corresponding Source fixed on a durable physical medium customarily +used for software interchange. + +
    2. +Convey the object code in, or embodied in, a physical product +(including a physical distribution medium), accompanied by a written +offer, valid for at least three years and valid for as long as you +offer spare parts or customer support for that product model, to give +anyone who possesses the object code either (1) a copy of the +Corresponding Source for all the software in the product that is +covered by this License, on a durable physical medium customarily used +for software interchange, for a price no more than your reasonable +cost of physically performing this conveying of source, or (2) access +to copy the Corresponding Source from a network server at no charge. + +
    3. +Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is +allowed only occasionally and noncommercially, and only if you +received the object code with such an offer, in accord with subsection +6b. + +
    4. +Convey the object code by offering access from a designated place +(gratis or for a charge), and offer equivalent access to the +Corresponding Source in the same way through the same place at no +further charge. You need not require recipients to copy the +Corresponding Source along with the object code. If the place to copy +the object code is a network server, the Corresponding Source may be +on a different server (operated by you or a third party) that supports +equivalent copying facilities, provided you maintain clear directions +next to the object code saying where to find the Corresponding Source. +Regardless of what server hosts the Corresponding Source, you remain +obligated to ensure that it is available for as long as needed to +satisfy these requirements. + +
    5. +Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of +the work are being offered to the general public at no charge under +subsection 6d. + +
    + +

    A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. +

    +

    A “User Product” is either (1) a “consumer product”, which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +“normally used” refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. +

    +

    “Installation Information” for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. +

    +

    If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). +

    +

    The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. +

    +

    Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. +

    +
  8. Additional Terms. + +

    “Additional permissions” are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. +

    +

    When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. +

    +

    Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: +

    +
      +
    1. +Disclaiming warranty or limiting liability differently from the terms +of sections 15 and 16 of this License; or + +
    2. +Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices +displayed by works containing it; or + +
    3. +Prohibiting misrepresentation of the origin of that material, or +requiring that modified versions of such material be marked in +reasonable ways as different from the original version; or + +
    4. +Limiting the use for publicity purposes of names of licensors or +authors of the material; or + +
    5. +Declining to grant rights under trademark law for use of some trade +names, trademarks, or service marks; or + +
    6. +Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with +contractual assumptions of liability to the recipient, for any +liability that these contractual assumptions directly impose on those +licensors and authors. +
    + +

    All other non-permissive additional terms are considered “further +restrictions” within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. +

    +

    If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. +

    +

    Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. +

    +
  9. Termination. + +

    You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). +

    +

    However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. +

    +

    Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. +

    +

    Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. +

    +
  10. Acceptance Not Required for Having Copies. + +

    You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. +

    +
  11. Automatic Licensing of Downstream Recipients. + +

    Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. +

    +

    An “entity transaction” is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. +

    +

    You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. +

    +
  12. Patents. + +

    A “contributor” is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's “contributor version”. +

    +

    A contributor's “essential patent claims” are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, “control” includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. +

    +

    Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. +

    +

    In the following three paragraphs, a “patent license” is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To “grant” such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. +

    +

    If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. “Knowingly relying” means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. +

    +

    If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. +

    +

    A patent license is “discriminatory” if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. +

    +

    Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. +

    +
  13. No Surrender of Others' Freedom. + +

    If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey +a covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree +to terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. +

    +
  14. Use with the GNU Affero General Public License. + +

    Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. +

    +
  15. Revised Versions of this License. + +

    The Free Software Foundation may publish revised and/or new versions +of the GNU General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. +

    +

    Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU General Public +License “or any later version” applies to it, you have the option of +following the terms and conditions either of that numbered version or +of any later version published by the Free Software Foundation. If +the Program does not specify a version number of the GNU General +Public License, you may choose any version ever published by the Free +Software Foundation. +

    +

    If the Program specifies that a proxy can decide which future versions +of the GNU General Public License can be used, that proxy's public +statement of acceptance of a version permanently authorizes you to +choose that version for the Program. +

    +

    Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. +

    +
  16. Disclaimer of Warranty. + +

    THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. +

    +
  17. Limitation of Liability. + +

    IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +

    +
  18. Interpretation of Sections 15 and 16. + +

    If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. +

    +
+ + +

END OF TERMS AND CONDITIONS

+ + +

How to Apply These Terms to Your New Programs

+ +

If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. +

+

To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the “copyright” line and a pointer to where the full notice is found. +

+
 
one line to give the program's name and a brief idea of what it does.
+Copyright (C) year name of author
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see http://www.gnu.org/licenses/.
+
+ +

Also add information on how to contact you by electronic and paper mail. +

+

If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: +

+
 
program Copyright (C) year name of author
+This program comes with ABSOLUTELY NO WARRANTY; for details type ‘show w’.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type ‘show c’ for details.
+
+ +

The hypothetical commands ‘show w’ and ‘show c’ should show +the appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an “about box”. +

+

You should also get your employer (if you work as a programmer) or school, +if any, to sign a “copyright disclaimer” for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +http://www.gnu.org/licenses/. +

+

The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use +the GNU Lesser General Public License instead of this License. But +first, please read http://www.gnu.org/philosophy/why-not-lgpl.html. + +


+ + +

A.2 GNU LESSER GENERAL PUBLIC LICENSE

+

Version 3, 29 June 2007 +

+ +
 
Copyright © 2007 Free Software Foundation, Inc. http://fsf.org/
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+ +

This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. +

+
    +
  1. Additional Definitions. + +

    As used herein, “this License” refers to version 3 of the GNU Lesser +General Public License, and the “GNU GPL” refers to version 3 of the GNU +General Public License. +

    +

    “The Library” refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. +

    +

    An “Application” is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. +

    +

    A “Combined Work” is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the “Linked +Version”. +

    +

    The “Minimal Corresponding Source” for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. +

    +

    The “Corresponding Application Code” for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. +

    +
  2. Exception to Section 3 of the GNU GPL. + +

    You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. +

    +
  3. Conveying Modified Versions. + +

    If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: +

    +
      +
    1. +under this License, provided that you make a good faith effort to +ensure that, in the event an Application does not supply the +function or data, the facility still operates, and performs +whatever part of its purpose remains meaningful, or + +
    2. +under the GNU GPL, with none of the additional permissions of +this License applicable to that copy. +
    + +
  4. Object Code Incorporating Material from Library Header Files. + +

    The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: +

    +
      +
    1. +Give prominent notice with each copy of the object code that the +Library is used in it and that the Library and its use are +covered by this License. +
    2. +Accompany the object code with a copy of the GNU GPL and this license +document. +
    + +
  5. Combined Works. + +

    You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: +

    +
      +
    1. +Give prominent notice with each copy of the Combined Work that +the Library is used in it and that the Library and its use are +covered by this License. +
    2. +Accompany the Combined Work with a copy of the GNU GPL and this license +document. +
    3. +For a Combined Work that displays copyright notices during +execution, include the copyright notice for the Library among +these notices, as well as a reference directing the user to the +copies of the GNU GPL and this license document. +
    4. +Do one of the following: + +
        +
      1. +Convey the Minimal Corresponding Source under the terms of this +License, and the Corresponding Application Code in a form +suitable for, and under terms that permit, the user to +recombine or relink the Application with a modified version of +the Linked Version to produce a modified Combined Work, in the +manner specified by section 6 of the GNU GPL for conveying +Corresponding Source. +
      2. +Use a suitable shared library mechanism for linking with the +Library. A suitable mechanism is one that (a) uses at run time +a copy of the Library already present on the user's computer +system, and (b) will operate properly with a modified version +of the Library that is interface-compatible with the Linked +Version. +
      + +
    5. +Provide Installation Information, but only if you would otherwise +be required to provide such information under section 6 of the +GNU GPL, and only to the extent that such information is +necessary to install and execute a modified version of the +Combined Work produced by recombining or relinking the +Application with a modified version of the Linked Version. (If +you use option 4d0, the Installation Information must accompany +the Minimal Corresponding Source and Corresponding Application +Code. If you use option 4d1, you must provide the Installation +Information in the manner specified by section 6 of the GNU GPL +for conveying Corresponding Source.) +
    + +
  6. Combined Libraries. + +

    You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: +

    +
      +
    1. +Accompany the combined library with a copy of the same work based +on the Library, uncombined with any other library facilities, +conveyed under the terms of this License. +
    2. +Give prominent notice with the combined library that part of it +is a work based on the Library, and explaining where to find the +accompanying uncombined form of the same work. +
    + +
  7. Revised Versions of the GNU Lesser General Public License. + +

    The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. +

    +

    Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License “or any later version” +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. +

    +

    If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. +

    +
+ +
+ + +

A.3 GNU Free Documentation License

+

Version 1.3, 3 November 2008 +

+ +
 
Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+http://fsf.org/
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+ +
    +
  1. +PREAMBLE + +

    The purpose of this License is to make a manual, textbook, or other +functional and useful document free in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. +

    +

    This License is a kind of “copyleft”, which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. +

    +

    We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. +

    +
  2. +APPLICABILITY AND DEFINITIONS + +

    This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The “Document”, below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as “you”. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. +

    +

    A “Modified Version” of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. +

    +

    A “Secondary Section” is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. +

    +

    The “Invariant Sections” are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. +

    +

    The “Cover Texts” are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. +

    +

    A “Transparent” copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not “Transparent” is called “Opaque”. +

    +

    Examples of suitable formats for Transparent copies include plain +ASCII without markup, Texinfo input format, LaTeX input +format, SGML or XML using a publicly available +DTD, and standard-conforming simple HTML, +PostScript or PDF designed for human modification. Examples +of transparent image formats include PNG, XCF and +JPG. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, SGML or +XML for which the DTD and/or processing tools are +not generally available, and the machine-generated HTML, +PostScript or PDF produced by some word processors for +output purposes only. +

    +

    The “Title Page” means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, “Title Page” means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. +

    +

    The “publisher” means any person or entity that distributes copies +of the Document to the public. +

    +

    A section “Entitled XYZ” means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as “Acknowledgements”, +“Dedications”, “Endorsements”, or “History”.) To “Preserve the Title” +of such a section when you modify the Document means that it remains a +section “Entitled XYZ” according to this definition. +

    +

    The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. +

    +
  3. +VERBATIM COPYING + +

    You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. +

    +

    You may also lend copies, under the same conditions stated above, and +you may publicly display copies. +

    +
  4. +COPYING IN QUANTITY + +

    If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. +

    +

    If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. +

    +

    If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. +

    +

    It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. +

    +
  5. +MODIFICATIONS + +

    You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: +

    +
      +
    1. +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +
    2. +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +
    3. +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +
    4. +Preserve all the copyright notices of the Document. + +
    5. +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +
    6. +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +
    7. +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +
    8. +Include an unaltered copy of this License. + +
    9. +Preserve the section Entitled “History”, Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled “History” in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +
    10. +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the “History” section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +
    11. +For any section Entitled “Acknowledgements” or “Dedications”, Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +
    12. +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +
    13. +Delete any section Entitled “Endorsements”. Such a section +may not be included in the Modified Version. + +
    14. +Do not retitle any existing section to be Entitled “Endorsements” or +to conflict in title with any Invariant Section. + +
    15. +Preserve any Warranty Disclaimers. +
    + +

    If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. +

    +

    You may add a section Entitled “Endorsements”, provided it contains +nothing but endorsements of your Modified Version by various +parties—for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. +

    +

    You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. +

    +

    The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. +

    +
  6. +COMBINING DOCUMENTS + +

    You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. +

    +

    The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. +

    +

    In the combination, you must combine any sections Entitled “History” +in the various original documents, forming one section Entitled +“History”; likewise combine any sections Entitled “Acknowledgements”, +and any sections Entitled “Dedications”. You must delete all +sections Entitled “Endorsements.” +

    +
  7. +COLLECTIONS OF DOCUMENTS + +

    You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. +

    +

    You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. +

    +
  8. +AGGREGATION WITH INDEPENDENT WORKS + +

    A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an “aggregate” if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. +

    +

    If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. +

    +
  9. +TRANSLATION + +

    Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. +

    +

    If a section in the Document is Entitled “Acknowledgements”, +“Dedications”, or “History”, the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. +

    +
  10. +TERMINATION + +

    You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. +

    +

    However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. +

    +

    Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. +

    +

    Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. +

    +
  11. +FUTURE REVISIONS OF THIS LICENSE + +

    The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +http://www.gnu.org/copyleft/. +

    +

    Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License “or any later version” applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. +

    +
  12. +RELICENSING + +

    “Massive Multiauthor Collaboration Site” (or “MMC Site”) means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +“Massive Multiauthor Collaboration” (or “MMC”) contained in the +site means any set of copyrightable works thus published on the MMC +site. +

    +

    “CC-BY-SA” means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. +

    +

    “Incorporate” means to publish or republish a Document, in whole or +in part, as part of another Document. +

    +

    An MMC is “eligible for relicensing” if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. +

    +

    The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. +

    +
+ + + +

ADDENDUM: How to use this License for your documents

+ +

To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: +

+
 
  Copyright (C)  year  your name.
+  Permission is granted to copy, distribute and/or modify this document
+  under the terms of the GNU Free Documentation License, Version 1.3
+  or any later version published by the Free Software Foundation;
+  with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
+  Texts.  A copy of the license is included in the section entitled ``GNU
+  Free Documentation License''.
+
+ +

If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the “with…Texts.” line with this: +

+
 
    with the Invariant Sections being list their titles, with
+    the Front-Cover Texts being list, and with the Back-Cover Texts
+    being list.
+
+ +

If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. +

+

If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. +

+ + +
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_18.html b/doc/libunistring_18.html new file mode 100644 index 00000000..8237bf6a --- /dev/null +++ b/doc/libunistring_18.html @@ -0,0 +1,770 @@ + + + + + +GNU libunistring: Index + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

Index

+ +
Jump to:   A +   +B +   +C +   +D +   +E +   +F +   +G +   +H +   +I +   +J +   +L +   +M +   +N +   +O +   +P +   +R +   +S +   +T +   +U +   +V +   +W +   +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Index Entry Section

A
ambiguous width9. Display width <uniwidth.h>
argument conventions2. Conventions
autoconf macro15.4 Autoconf macro

B
bidirectional category8.3 Bidirectional category
bidirectional reordering16. More advanced functionality
block8.10 Blocks
breaks, line11. Line breaking <unilbrk.h>
breaks, word10. Word breaks in strings <uniwbrk.h>
bug reports15.5 Reporting problems
bug tracker15.5 Reporting problems

C
C string functions1.5 ‘char *’ strings
C, programming language8.11 ISO C and Java syntax
C-like API8.12 Classifications like in ISO C
canonical combining class8.2 Canonical combining class
case detection13.5 Case detection
case mappings13.2 Case mappings of strings
casing_prefix_context_t13.3 Case mappings of substrings
casing_suffix_context_t13.3 Case mappings of substrings
char, type1.5 ‘char *’ strings
combining, Unicode characters12.2 Composition of Unicode characters
comparing4.3 Elementary string functions
comparing4.5 Elementary string functions on NUL terminated strings
comparing, ignoring case13.4 Case insensitive comparison
comparing, ignoring case, with collation rules13.4 Case insensitive comparison
comparing, ignoring normalization12.4 Normalizing comparisons
comparing, ignoring normalization and case13.4 Case insensitive comparison
comparing, ignoring normalization and case, with collation rules13.4 Case insensitive comparison
comparing, ignoring normalization, with collation rules12.4 Normalizing comparisons
comparing, with collation rules4.5 Elementary string functions on NUL terminated strings
comparing, with collation rules, ignoring case13.4 Case insensitive comparison
comparing, with collation rules, ignoring normalization12.4 Normalizing comparisons
comparing, with collation rules, ignoring normalization and case13.4 Case insensitive comparison
compiler options15.2 Compiler options
composing, Unicode characters12.2 Composition of Unicode characters
converting4.2 Elementary string conversions
converting5. Conversions between Unicode and encodings <uniconv.h>
copying4.3 Elementary string functions
copying4.5 Elementary string functions on NUL terminated strings
counting4.3 Elementary string functions

D
decomposing12.1 Decomposition of Unicode characters
dependencies15.1 Installation
detecting case13.5 Case detection
duplicating4.4 Elementary string functions with memory allocation
duplicating4.5 Elementary string functions on NUL terminated strings

E
enum iconv_ilseq_handler5. Conversions between Unicode and encodings <uniconv.h>

F
FDL, GNU Free Documentation LicenseA.3 GNU Free Documentation License
formatted output6. Output with Unicode strings <unistdio.h>
fullwidth9. Display width <uniwidth.h>

G
general category8.1 General category
gl_LIBUNISTRING15.4 Autoconf macro
GPL, GNU General Public LicenseA.1 GNU GENERAL PUBLIC LICENSE

H
halfwidth9. Display width <uniwidth.h>

I
identifiers8.11 ISO C and Java syntax
installation15.1 Installation
internationalization1.2 Unicode and Internationalization
iterating4.3 Elementary string functions
iterating4.5 Elementary string functions on NUL terminated strings

J
Java, programming language8.11 ISO C and Java syntax

L
LGPL, GNU Lesser General Public LicenseA.2 GNU LESSER GENERAL PUBLIC LICENSE
License, GNU FDLA.3 GNU Free Documentation License
License, GNU GPLA.1 GNU GENERAL PUBLIC LICENSE
License, GNU LGPLA.2 GNU LESSER GENERAL PUBLIC LICENSE
LicensesA. Licenses
line breaks11. Line breaking <unilbrk.h>
locale1.3 Locale encodings
locale categories1.3 Locale encodings
locale encoding1.3 Locale encodings
locale encoding5. Conversions between Unicode and encodings <uniconv.h>
locale language13.2 Case mappings of strings
locale, multibyte1.5 ‘char *’ strings
locale_charset5. Conversions between Unicode and encodings <uniconv.h>
lowercasing13.2 Case mappings of strings

M
mailing list15.5 Reporting problems
mirroring, of Unicode character8.7 Mirrored character

N
normal forms12. Normalization forms (composition and decomposition) <uninorm.h>
normalizing12. Normalization forms (composition and decomposition) <uninorm.h>

O
output, formatted6. Output with Unicode strings <unistdio.h>

P
properties, of Unicode character8.8 Properties

R
regular expression14. Regular expressions <uniregex.h>
rendering16. More advanced functionality
return value conventions2. Conventions

S
scripts8.9 Scripts
searching, for a character4.3 Elementary string functions
searching, for a character4.5 Elementary string functions on NUL terminated strings
searching, for a substring4.5 Elementary string functions on NUL terminated strings
stream, normalizing a12.5 Normalization of streams of Unicode characters
struct uninorm_filter12.5 Normalization of streams of Unicode characters

T
titlecasing13.2 Case mappings of strings

U
u16_asnprintf6. Output with Unicode strings <unistdio.h>
u16_asprintf6. Output with Unicode strings <unistdio.h>
u16_casecmp13.4 Case insensitive comparison
u16_casecoll13.4 Case insensitive comparison
u16_casefold13.4 Case insensitive comparison
u16_casexfrm13.4 Case insensitive comparison
u16_casing_prefix_context13.3 Case mappings of substrings
u16_casing_prefixes_context13.3 Case mappings of substrings
u16_casing_suffix_context13.3 Case mappings of substrings
u16_casing_suffixes_context13.3 Case mappings of substrings
u16_check4.1 Elementary string checks
u16_chr4.3 Elementary string functions
u16_cmp4.3 Elementary string functions
u16_cmp24.3 Elementary string functions
u16_conv_from_encoding5. Conversions between Unicode and encodings <uniconv.h>
u16_conv_to_encoding5. Conversions between Unicode and encodings <uniconv.h>
u16_cpy4.3 Elementary string functions
u16_cpy_alloc4.4 Elementary string functions with memory allocation
u16_ct_casefold13.4 Case insensitive comparison
u16_ct_tolower13.3 Case mappings of substrings
u16_ct_totitle13.3 Case mappings of substrings
u16_ct_toupper13.3 Case mappings of substrings
u16_endswith4.5 Elementary string functions on NUL terminated strings
u16_is_cased13.5 Case detection
u16_is_casefolded13.5 Case detection
u16_is_lowercase13.5 Case detection
u16_is_titlecase13.5 Case detection
u16_is_uppercase13.5 Case detection
u16_mblen4.3 Elementary string functions
u16_mbsnlen4.3 Elementary string functions
u16_mbtouc4.3 Elementary string functions
u16_mbtouc_unsafe4.3 Elementary string functions
u16_mbtoucr4.3 Elementary string functions
u16_move4.3 Elementary string functions
u16_next4.5 Elementary string functions on NUL terminated strings
u16_normalize12.3 Normalization of strings
u16_normcmp12.4 Normalizing comparisons
u16_normcoll12.4 Normalizing comparisons
u16_normxfrm12.4 Normalizing comparisons
u16_possible_linebreaks11. Line breaking <unilbrk.h>
u16_prev4.5 Elementary string functions on NUL terminated strings
u16_set4.3 Elementary string functions
u16_snprintf6. Output with Unicode strings <unistdio.h>
u16_sprintf6. Output with Unicode strings <unistdio.h>
u16_startswith4.5 Elementary string functions on NUL terminated strings
u16_stpcpy4.5 Elementary string functions on NUL terminated strings
u16_stpncpy4.5 Elementary string functions on NUL terminated strings
u16_strcat4.5 Elementary string functions on NUL terminated strings
u16_strchr4.5 Elementary string functions on NUL terminated strings
u16_strcmp4.5 Elementary string functions on NUL terminated strings
u16_strcoll4.5 Elementary string functions on NUL terminated strings
u16_strconv_from_encoding5. Conversions between Unicode and encodings <uniconv.h>
u16_strconv_from_locale5. Conversions between Unicode and encodings <uniconv.h>
u16_strconv_to_encoding5. Conversions between Unicode and encodings <uniconv.h>
u16_strconv_to_locale5. Conversions between Unicode and encodings <uniconv.h>
u16_strcpy4.5 Elementary string functions on NUL terminated strings
u16_strcspn4.5 Elementary string functions on NUL terminated strings
u16_strdup4.5 Elementary string functions on NUL terminated strings
u16_strlen4.5 Elementary string functions on NUL terminated strings
u16_strmblen4.5 Elementary string functions on NUL terminated strings
u16_strmbtouc4.5 Elementary string functions on NUL terminated strings
u16_strncat4.5 Elementary string functions on NUL terminated strings
u16_strncmp4.5 Elementary string functions on NUL terminated strings
u16_strncpy4.5 Elementary string functions on NUL terminated strings
u16_strnlen4.5 Elementary string functions on NUL terminated strings
u16_strpbrk4.5 Elementary string functions on NUL terminated strings
u16_strrchr4.5 Elementary string functions on NUL terminated strings
u16_strspn4.5 Elementary string functions on NUL terminated strings
u16_strstr4.5 Elementary string functions on NUL terminated strings
u16_strtok4.5 Elementary string functions on NUL terminated strings
u16_strwidth9. Display width <uniwidth.h>
u16_to_u324.2 Elementary string conversions
u16_to_u84.2 Elementary string conversions
u16_tolower13.2 Case mappings of strings
u16_totitle13.2 Case mappings of strings
u16_toupper13.2 Case mappings of strings
u16_u16_asnprintf6. Output with Unicode strings <unistdio.h>
u16_u16_asprintf6. Output with Unicode strings <unistdio.h>
u16_u16_snprintf6. Output with Unicode strings <unistdio.h>
u16_u16_sprintf6. Output with Unicode strings <unistdio.h>
u16_u16_vasnprintf6. Output with Unicode strings <unistdio.h>
u16_u16_vasprintf6. Output with Unicode strings <unistdio.h>
u16_u16_vsnprintf6. Output with Unicode strings <unistdio.h>
u16_u16_vsprintf6. Output with Unicode strings <unistdio.h>
u16_uctomb4.3 Elementary string functions
u16_vasnprintf6. Output with Unicode strings <unistdio.h>
u16_vasprintf6. Output with Unicode strings <unistdio.h>
u16_vsnprintf6. Output with Unicode strings <unistdio.h>
u16_vsprintf6. Output with Unicode strings <unistdio.h>
u16_width9. Display width <uniwidth.h>
u16_width_linebreaks11. Line breaking <unilbrk.h>
u16_wordbreaks10.1 Word breaks in a string
u32_asnprintf6. Output with Unicode strings <unistdio.h>
u32_asprintf6. Output with Unicode strings <unistdio.h>
u32_casecmp13.4 Case insensitive comparison
u32_casecoll13.4 Case insensitive comparison
u32_casefold13.4 Case insensitive comparison
u32_casexfrm13.4 Case insensitive comparison
u32_casing_prefix_context13.3 Case mappings of substrings
u32_casing_prefixes_context13.3 Case mappings of substrings
u32_casing_suffix_context13.3 Case mappings of substrings
u32_casing_suffixes_context13.3 Case mappings of substrings
u32_check4.1 Elementary string checks
u32_chr4.3 Elementary string functions
u32_cmp4.3 Elementary string functions
u32_cmp24.3 Elementary string functions
u32_conv_from_encoding5. Conversions between Unicode and encodings <uniconv.h>
u32_conv_to_encoding5. Conversions between Unicode and encodings <uniconv.h>
u32_cpy4.3 Elementary string functions
u32_cpy_alloc4.4 Elementary string functions with memory allocation
u32_ct_casefold13.4 Case insensitive comparison
u32_ct_tolower13.3 Case mappings of substrings
u32_ct_totitle13.3 Case mappings of substrings
u32_ct_toupper13.3 Case mappings of substrings
u32_endswith4.5 Elementary string functions on NUL terminated strings
u32_is_cased13.5 Case detection
u32_is_casefolded13.5 Case detection
u32_is_lowercase13.5 Case detection
u32_is_titlecase13.5 Case detection
u32_is_uppercase13.5 Case detection
u32_mblen4.3 Elementary string functions
u32_mbsnlen4.3 Elementary string functions
u32_mbtouc4.3 Elementary string functions
u32_mbtouc_unsafe4.3 Elementary string functions
u32_mbtoucr4.3 Elementary string functions
u32_move4.3 Elementary string functions
u32_next4.5 Elementary string functions on NUL terminated strings
u32_normalize12.3 Normalization of strings
u32_normcmp12.4 Normalizing comparisons
u32_normcoll12.4 Normalizing comparisons
u32_normxfrm12.4 Normalizing comparisons
u32_possible_linebreaks11. Line breaking <unilbrk.h>
u32_prev4.5 Elementary string functions on NUL terminated strings
u32_set4.3 Elementary string functions
u32_snprintf6. Output with Unicode strings <unistdio.h>
u32_sprintf6. Output with Unicode strings <unistdio.h>
u32_startswith4.5 Elementary string functions on NUL terminated strings
u32_stpcpy4.5 Elementary string functions on NUL terminated strings
u32_stpncpy4.5 Elementary string functions on NUL terminated strings
u32_strcat4.5 Elementary string functions on NUL terminated strings
u32_strchr4.5 Elementary string functions on NUL terminated strings
u32_strcmp4.5 Elementary string functions on NUL terminated strings
u32_strcoll4.5 Elementary string functions on NUL terminated strings
u32_strconv_from_encoding5. Conversions between Unicode and encodings <uniconv.h>
u32_strconv_from_locale5. Conversions between Unicode and encodings <uniconv.h>
u32_strconv_to_encoding5. Conversions between Unicode and encodings <uniconv.h>
u32_strconv_to_locale5. Conversions between Unicode and encodings <uniconv.h>
u32_strcpy4.5 Elementary string functions on NUL terminated strings
u32_strcspn4.5 Elementary string functions on NUL terminated strings
u32_strdup4.5 Elementary string functions on NUL terminated strings
u32_strlen4.5 Elementary string functions on NUL terminated strings
u32_strmblen4.5 Elementary string functions on NUL terminated strings
u32_strmbtouc4.5 Elementary string functions on NUL terminated strings
u32_strncat4.5 Elementary string functions on NUL terminated strings
u32_strncmp4.5 Elementary string functions on NUL terminated strings
u32_strncpy4.5 Elementary string functions on NUL terminated strings
u32_strnlen4.5 Elementary string functions on NUL terminated strings
u32_strpbrk4.5 Elementary string functions on NUL terminated strings
u32_strrchr4.5 Elementary string functions on NUL terminated strings
u32_strspn4.5 Elementary string functions on NUL terminated strings
u32_strstr4.5 Elementary string functions on NUL terminated strings
u32_strtok4.5 Elementary string functions on NUL terminated strings
u32_strwidth9. Display width <uniwidth.h>
u32_to_u164.2 Elementary string conversions
u32_to_u84.2 Elementary string conversions
u32_tolower13.2 Case mappings of strings
u32_totitle13.2 Case mappings of strings
u32_toupper13.2 Case mappings of strings
u32_u32_asnprintf6. Output with Unicode strings <unistdio.h>
u32_u32_asprintf6. Output with Unicode strings <unistdio.h>
u32_u32_snprintf6. Output with Unicode strings <unistdio.h>
u32_u32_sprintf6. Output with Unicode strings <unistdio.h>
u32_u32_vasnprintf6. Output with Unicode strings <unistdio.h>
u32_u32_vasprintf6. Output with Unicode strings <unistdio.h>
u32_u32_vsnprintf6. Output with Unicode strings <unistdio.h>
u32_u32_vsprintf6. Output with Unicode strings <unistdio.h>
u32_uctomb4.3 Elementary string functions
u32_vasnprintf6. Output with Unicode strings <unistdio.h>
u32_vasprintf6. Output with Unicode strings <unistdio.h>
u32_vsnprintf6. Output with Unicode strings <unistdio.h>
u32_vsprintf6. Output with Unicode strings <unistdio.h>
u32_width9. Display width <uniwidth.h>
u32_width_linebreaks11. Line breaking <unilbrk.h>
u32_wordbreaks10.1 Word breaks in a string
u8_asnprintf6. Output with Unicode strings <unistdio.h>
u8_asprintf6. Output with Unicode strings <unistdio.h>
u8_casecmp13.4 Case insensitive comparison
u8_casecoll13.4 Case insensitive comparison
u8_casefold13.4 Case insensitive comparison
u8_casexfrm13.4 Case insensitive comparison
u8_casing_prefix_context13.3 Case mappings of substrings
u8_casing_prefixes_context13.3 Case mappings of substrings
u8_casing_suffix_context13.3 Case mappings of substrings
u8_casing_suffixes_context13.3 Case mappings of substrings
u8_check4.1 Elementary string checks
u8_chr4.3 Elementary string functions
u8_cmp4.3 Elementary string functions
u8_cmp24.3 Elementary string functions
u8_conv_from_encoding5. Conversions between Unicode and encodings <uniconv.h>
u8_conv_to_encoding5. Conversions between Unicode and encodings <uniconv.h>
u8_cpy4.3 Elementary string functions
u8_cpy_alloc4.4 Elementary string functions with memory allocation
u8_ct_casefold13.4 Case insensitive comparison
u8_ct_tolower13.3 Case mappings of substrings
u8_ct_totitle13.3 Case mappings of substrings
u8_ct_toupper13.3 Case mappings of substrings
u8_endswith4.5 Elementary string functions on NUL terminated strings
u8_is_cased13.5 Case detection
u8_is_casefolded13.5 Case detection
u8_is_lowercase13.5 Case detection
u8_is_titlecase13.5 Case detection
u8_is_uppercase13.5 Case detection
u8_mblen4.3 Elementary string functions
u8_mbsnlen4.3 Elementary string functions
u8_mbtouc4.3 Elementary string functions
u8_mbtouc_unsafe4.3 Elementary string functions
u8_mbtoucr4.3 Elementary string functions
u8_move4.3 Elementary string functions
u8_next4.5 Elementary string functions on NUL terminated strings
u8_normalize12.3 Normalization of strings
u8_normcmp12.4 Normalizing comparisons
u8_normcoll12.4 Normalizing comparisons
u8_normxfrm12.4 Normalizing comparisons
u8_possible_linebreaks11. Line breaking <unilbrk.h>
u8_prev4.5 Elementary string functions on NUL terminated strings
u8_set4.3 Elementary string functions
u8_snprintf6. Output with Unicode strings <unistdio.h>
u8_sprintf6. Output with Unicode strings <unistdio.h>
u8_startswith4.5 Elementary string functions on NUL terminated strings
u8_stpcpy4.5 Elementary string functions on NUL terminated strings
u8_stpncpy4.5 Elementary string functions on NUL terminated strings
u8_strcat4.5 Elementary string functions on NUL terminated strings
u8_strchr4.5 Elementary string functions on NUL terminated strings
u8_strcmp4.5 Elementary string functions on NUL terminated strings
u8_strcoll4.5 Elementary string functions on NUL terminated strings
u8_strconv_from_encoding5. Conversions between Unicode and encodings <uniconv.h>
u8_strconv_from_locale5. Conversions between Unicode and encodings <uniconv.h>
u8_strconv_to_encoding5. Conversions between Unicode and encodings <uniconv.h>
u8_strconv_to_locale5. Conversions between Unicode and encodings <uniconv.h>
u8_strcpy4.5 Elementary string functions on NUL terminated strings
u8_strcspn4.5 Elementary string functions on NUL terminated strings
u8_strdup4.5 Elementary string functions on NUL terminated strings
u8_strlen4.5 Elementary string functions on NUL terminated strings
u8_strmblen4.5 Elementary string functions on NUL terminated strings
u8_strmbtouc4.5 Elementary string functions on NUL terminated strings
u8_strncat4.5 Elementary string functions on NUL terminated strings
u8_strncmp4.5 Elementary string functions on NUL terminated strings
u8_strncpy4.5 Elementary string functions on NUL terminated strings
u8_strnlen4.5 Elementary string functions on NUL terminated strings
u8_strpbrk4.5 Elementary string functions on NUL terminated strings
u8_strrchr4.5 Elementary string functions on NUL terminated strings
u8_strspn4.5 Elementary string functions on NUL terminated strings
u8_strstr4.5 Elementary string functions on NUL terminated strings
u8_strtok4.5 Elementary string functions on NUL terminated strings
u8_strwidth9. Display width <uniwidth.h>
u8_to_u164.2 Elementary string conversions
u8_to_u324.2 Elementary string conversions
u8_tolower13.2 Case mappings of strings
u8_totitle13.2 Case mappings of strings
u8_toupper13.2 Case mappings of strings
u8_u8_asnprintf6. Output with Unicode strings <unistdio.h>
u8_u8_asprintf6. Output with Unicode strings <unistdio.h>
u8_u8_snprintf6. Output with Unicode strings <unistdio.h>
u8_u8_sprintf6. Output with Unicode strings <unistdio.h>
u8_u8_vasnprintf6. Output with Unicode strings <unistdio.h>
u8_u8_vasprintf6. Output with Unicode strings <unistdio.h>
u8_u8_vsnprintf6. Output with Unicode strings <unistdio.h>
u8_u8_vsprintf6. Output with Unicode strings <unistdio.h>
u8_uctomb4.3 Elementary string functions
u8_vasnprintf6. Output with Unicode strings <unistdio.h>
u8_vasprintf6. Output with Unicode strings <unistdio.h>
u8_vsnprintf6. Output with Unicode strings <unistdio.h>
u8_vsprintf6. Output with Unicode strings <unistdio.h>
u8_width9. Display width <uniwidth.h>
u8_width_linebreaks11. Line breaking <unilbrk.h>
u8_wordbreaks10.1 Word breaks in a string
uc_all_blocks8.10 Blocks
uc_all_scripts8.9 Scripts
uc_bidi_category8.3 Bidirectional category
uc_bidi_category_byname8.3 Bidirectional category
uc_bidi_category_name8.3 Bidirectional category
uc_block8.10 Blocks
uc_block_t8.10 Blocks
uc_c_ident_category8.11 ISO C and Java syntax
uc_canonical_decomposition12.1 Decomposition of Unicode characters
uc_combining_class8.2 Canonical combining class
uc_composition12.2 Composition of Unicode characters
uc_decimal_value8.4 Decimal digit value
uc_decomposition12.1 Decomposition of Unicode characters
uc_digit_value8.5 Digit value
uc_fraction_t8.6 Numeric value
uc_general_category8.1.1 The object oriented API for general category
uc_general_category_and8.1.1 The object oriented API for general category
uc_general_category_and_not8.1.1 The object oriented API for general category
uc_general_category_byname8.1.1 The object oriented API for general category
uc_general_category_name8.1.1 The object oriented API for general category
uc_general_category_or8.1.1 The object oriented API for general category
uc_general_category_t8.1.1 The object oriented API for general category
uc_is_alnum8.12 Classifications like in ISO C
uc_is_alpha8.12 Classifications like in ISO C
uc_is_bidi_category8.3 Bidirectional category
uc_is_blank8.12 Classifications like in ISO C
uc_is_block8.10 Blocks
uc_is_c_whitespace8.11 ISO C and Java syntax
uc_is_cntrl8.12 Classifications like in ISO C
uc_is_digit8.12 Classifications like in ISO C
uc_is_general_category8.1.1 The object oriented API for general category
uc_is_general_category_withtable8.1.2 The bit mask API for general category
uc_is_graph8.12 Classifications like in ISO C
uc_is_java_whitespace8.11 ISO C and Java syntax
uc_is_lower8.12 Classifications like in ISO C
uc_is_print8.12 Classifications like in ISO C
uc_is_property8.8.1 Properties as objects – the object oriented API
uc_is_property_alphabetic8.8.2 Properties as functions – the functional API
uc_is_property_ascii_hex_digit8.8.2 Properties as functions – the functional API
uc_is_property_bidi_arabic_digit8.8.2 Properties as functions – the functional API
uc_is_property_bidi_arabic_right_to_left8.8.2 Properties as functions – the functional API
uc_is_property_bidi_block_separator8.8.2 Properties as functions – the functional API
uc_is_property_bidi_boundary_neutral8.8.2 Properties as functions – the functional API
uc_is_property_bidi_common_separator8.8.2 Properties as functions – the functional API
uc_is_property_bidi_control8.8.2 Properties as functions – the functional API
uc_is_property_bidi_embedding_or_override8.8.2 Properties as functions – the functional API
uc_is_property_bidi_eur_num_separator8.8.2 Properties as functions – the functional API
uc_is_property_bidi_eur_num_terminator8.8.2 Properties as functions – the functional API
uc_is_property_bidi_european_digit8.8.2 Properties as functions – the functional API
uc_is_property_bidi_hebrew_right_to_left8.8.2 Properties as functions – the functional API
uc_is_property_bidi_left_to_right8.8.2 Properties as functions – the functional API
uc_is_property_bidi_non_spacing_mark8.8.2 Properties as functions – the functional API
uc_is_property_bidi_other_neutral8.8.2 Properties as functions – the functional API
uc_is_property_bidi_pdf8.8.2 Properties as functions – the functional API
uc_is_property_bidi_segment_separator8.8.2 Properties as functions – the functional API
uc_is_property_bidi_whitespace8.8.2 Properties as functions – the functional API
uc_is_property_combining8.8.2 Properties as functions – the functional API
uc_is_property_composite8.8.2 Properties as functions – the functional API
uc_is_property_currency_symbol8.8.2 Properties as functions – the functional API
uc_is_property_dash8.8.2 Properties as functions – the functional API
uc_is_property_decimal_digit8.8.2 Properties as functions – the functional API
uc_is_property_default_ignorable_code_point8.8.2 Properties as functions – the functional API
uc_is_property_deprecated8.8.2 Properties as functions – the functional API
uc_is_property_diacritic8.8.2 Properties as functions – the functional API
uc_is_property_extender8.8.2 Properties as functions – the functional API
uc_is_property_format_control8.8.2 Properties as functions – the functional API
uc_is_property_grapheme_base8.8.2 Properties as functions – the functional API
uc_is_property_grapheme_extend8.8.2 Properties as functions – the functional API
uc_is_property_grapheme_link8.8.2 Properties as functions – the functional API
uc_is_property_hex_digit8.8.2 Properties as functions – the functional API
uc_is_property_hyphen8.8.2 Properties as functions – the functional API
uc_is_property_id_continue8.8.2 Properties as functions – the functional API
uc_is_property_id_start8.8.2 Properties as functions – the functional API
uc_is_property_ideographic8.8.2 Properties as functions – the functional API
uc_is_property_ids_binary_operator8.8.2 Properties as functions – the functional API
uc_is_property_ids_trinary_operator8.8.2 Properties as functions – the functional API
uc_is_property_ignorable_control8.8.2 Properties as functions – the functional API
uc_is_property_iso_control8.8.2 Properties as functions – the functional API
uc_is_property_join_control8.8.2 Properties as functions – the functional API
uc_is_property_left_of_pair8.8.2 Properties as functions – the functional API
uc_is_property_line_separator8.8.2 Properties as functions – the functional API
uc_is_property_logical_order_exception8.8.2 Properties as functions – the functional API
uc_is_property_lowercase8.8.2 Properties as functions – the functional API
uc_is_property_math8.8.2 Properties as functions – the functional API
uc_is_property_non_break8.8.2 Properties as functions – the functional API
uc_is_property_not_a_character8.8.2 Properties as functions – the functional API
uc_is_property_numeric8.8.2 Properties as functions – the functional API
uc_is_property_other_alphabetic8.8.2 Properties as functions – the functional API
uc_is_property_other_default_ignorable_code_point8.8.2 Properties as functions – the functional API
uc_is_property_other_grapheme_extend8.8.2 Properties as functions – the functional API
uc_is_property_other_id_continue8.8.2 Properties as functions – the functional API
uc_is_property_other_id_start8.8.2 Properties as functions – the functional API
uc_is_property_other_lowercase8.8.2 Properties as functions – the functional API
uc_is_property_other_math8.8.2 Properties as functions – the functional API
uc_is_property_other_uppercase8.8.2 Properties as functions – the functional API
uc_is_property_paired_punctuation8.8.2 Properties as functions – the functional API
uc_is_property_paragraph_separator8.8.2 Properties as functions – the functional API
uc_is_property_pattern_syntax8.8.2 Properties as functions – the functional API
uc_is_property_pattern_white_space8.8.2 Properties as functions – the functional API
uc_is_property_private_use8.8.2 Properties as functions – the functional API
uc_is_property_punctuation8.8.2 Properties as functions – the functional API
uc_is_property_quotation_mark8.8.2 Properties as functions – the functional API
uc_is_property_radical8.8.2 Properties as functions – the functional API
uc_is_property_sentence_terminal8.8.2 Properties as functions – the functional API
uc_is_property_soft_dotted8.8.2 Properties as functions – the functional API
uc_is_property_space8.8.2 Properties as functions – the functional API
uc_is_property_terminal_punctuation8.8.2 Properties as functions – the functional API
uc_is_property_titlecase8.8.2 Properties as functions – the functional API
uc_is_property_unassigned_code_value8.8.2 Properties as functions – the functional API
uc_is_property_unified_ideograph8.8.2 Properties as functions – the functional API
uc_is_property_uppercase8.8.2 Properties as functions – the functional API
uc_is_property_variation_selector8.8.2 Properties as functions – the functional API
uc_is_property_white_space8.8.2 Properties as functions – the functional API
uc_is_property_xid_continue8.8.2 Properties as functions – the functional API
uc_is_property_xid_start8.8.2 Properties as functions – the functional API
uc_is_property_zero_width8.8.2 Properties as functions – the functional API
uc_is_punct8.12 Classifications like in ISO C
uc_is_script8.9 Scripts
uc_is_space8.12 Classifications like in ISO C
uc_is_upper8.12 Classifications like in ISO C
uc_is_xdigit8.12 Classifications like in ISO C
uc_java_ident_category8.11 ISO C and Java syntax
uc_locale_language13.2 Case mappings of strings
uc_mirror_char8.7 Mirrored character
uc_numeric_value8.6 Numeric value
uc_property_byname8.8.1 Properties as objects – the object oriented API
uc_property_is_valid8.8.1 Properties as objects – the object oriented API
uc_property_t8.8.1 Properties as objects – the object oriented API
uc_script8.9 Scripts
uc_script_byname8.9 Scripts
uc_script_t8.9 Scripts
uc_tolower13.1 Case mappings of characters
uc_totitle13.1 Case mappings of characters
uc_toupper13.1 Case mappings of characters
uc_width9. Display width <uniwidth.h>
uc_wordbreak_property10.2 Word break property
UCS-41.1 Unicode
ucs4_t3. Elementary types <unitypes.h>
uint16_t3. Elementary types <unitypes.h>
uint32_t3. Elementary types <unitypes.h>
uint8_t3. Elementary types <unitypes.h>
ulc_asnprintf6. Output with Unicode strings <unistdio.h>
ulc_asprintf6. Output with Unicode strings <unistdio.h>
ulc_casecmp13.4 Case insensitive comparison
ulc_casecoll13.4 Case insensitive comparison
ulc_casexfrm13.4 Case insensitive comparison
ulc_fprintf6. Output with Unicode strings <unistdio.h>
ulc_possible_linebreaks11. Line breaking <unilbrk.h>
ulc_snprintf6. Output with Unicode strings <unistdio.h>
ulc_sprintf6. Output with Unicode strings <unistdio.h>
ulc_vasnprintf6. Output with Unicode strings <unistdio.h>
ulc_vasprintf6. Output with Unicode strings <unistdio.h>
ulc_vfprintf6. Output with Unicode strings <unistdio.h>
ulc_vsnprintf6. Output with Unicode strings <unistdio.h>
ulc_vsprintf6. Output with Unicode strings <unistdio.h>
ulc_width_linebreaks11. Line breaking <unilbrk.h>
ulc_wordbreaks10.1 Word breaks in a string
Unicode1.1 Unicode
Unicode character, bidirectional category8.3 Bidirectional category
Unicode character, block8.10 Blocks
Unicode character, canonical combining class8.2 Canonical combining class
Unicode character, case mappings13.1 Case mappings of characters
Unicode character, classification8.1 General category
Unicode character, classification like in C8.12 Classifications like in ISO C
Unicode character, general category8.1 General category
Unicode character, mirroring8.7 Mirrored character
Unicode character, name7. Names of Unicode characters <uniname.h>
Unicode character, properties8.8 Properties
Unicode character, script8.9 Scripts
Unicode character, validity in C identifiers8.11 ISO C and Java syntax
Unicode character, validity in Java identifiers8.11 ISO C and Java syntax
Unicode character, value8.4 Decimal digit value
Unicode character, value8.5 Digit value
Unicode character, value8.6 Numeric value
Unicode character, width9. Display width <uniwidth.h>
unicode_character_name7. Names of Unicode characters <uniname.h>
unicode_name_character7. Names of Unicode characters <uniname.h>
uninorm_decomposing_form12.3 Normalization of strings
uninorm_filter_create12.5 Normalization of streams of Unicode characters
uninorm_filter_flush12.5 Normalization of streams of Unicode characters
uninorm_filter_free12.5 Normalization of streams of Unicode characters
uninorm_filter_write12.5 Normalization of streams of Unicode characters
uninorm_is_compat_decomposing12.3 Normalization of strings
uninorm_is_composing12.3 Normalization of strings
uninorm_t12.3 Normalization of strings
uppercasing13.2 Case mappings of strings
use cases1. Introduction
UTF-161.1 Unicode
UTF-16, strings1.7 Unicode strings
UTF-321.1 Unicode
UTF-32, strings1.7 Unicode strings
UTF-81.1 Unicode
UTF-8, strings1.7 Unicode strings

+
Jump to:   A +   +B +   +C +   +D +   +E +   +F +   +G +   +H +   +I +   +J +   +L +   +M +   +N +   +O +   +P +   +R +   +S +   +T +   +U +   +V +   +W +   +
+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_19.html b/doc/libunistring_19.html new file mode 100644 index 00000000..5974e2f6 --- /dev/null +++ b/doc/libunistring_19.html @@ -0,0 +1,188 @@ + + + + + +GNU libunistring: Index: V – W + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ +

Index: V – W

+
Jump to:   A +   +B +   +C +   +D +   +E +   +F +   +G +   +H +   +I +   +J +   +L +   +M +   +N +   +O +   +P +   +R +   +S +   +T +   +U +   +V +   +W +   +
+ + + + + + + + + + + + + + + + + +
Index Entry Section

V
validity4.1 Elementary string checks
value, of libunistring1. Introduction
value, of Unicode character8.4 Decimal digit value
value, of Unicode character8.5 Digit value
value, of Unicode character8.6 Numeric value
verification4.1 Elementary string checks

W
wchar_t, type1.6 The wchar_t mess
width9. Display width <uniwidth.h>
word breaks10. Word breaks in strings <uniwbrk.h>
wrapping11. Line breaking <unilbrk.h>

+
Jump to:   A +   +B +   +C +   +D +   +E +   +F +   +G +   +H +   +I +   +J +   +L +   +M +   +N +   +O +   +P +   +R +   +S +   +T +   +U +   +V +   +W +   +
+ +
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_2.html b/doc/libunistring_2.html new file mode 100644 index 00000000..6d63b632 --- /dev/null +++ b/doc/libunistring_2.html @@ -0,0 +1,141 @@ + + + + + +GNU libunistring: 2. Conventions + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

2. Conventions

+ +

This chapter explains conventions valid throughout the libunistring library. +

+ +

Variables of type char * denote C strings in locale encoding. +See Locale encodings. +

+

Variables of type uint8_t * denote UTF-8 strings. Their units +are bytes. +

+

Variables of type uint16_t * denote UTF-16 strings, without byte +order mark. Their units are 2-byte words. +

+

Variables of type uint32_t * denote UTF-32 strings, without byte +order mark. Their units are 4-byte words. +

+

Argument pairs (s, n) denote a string +s[0..n-1] with exactly n units. +

+

All functions with prefix ‘ulc_’ operate on C strings in locale +encoding. +

+

All functions with prefix ‘u8_’ operate on UTF-8 strings. +

+

All functions with prefix ‘u16_’ operate on UTF-16 strings. +

+

All functions with prefix ‘u32_’ operate on UTF-32 strings. +

+

For every function with prefix ‘u8_’, operating on UTF-8 strings, +there is also a corresponding function with prefix ‘u16_’, +operating on UTF-16 strings, and a corresponding function with prefix +‘u32_’, operating on UTF-32 strings. Their description is +analogous; in this documentation we describe only the function that +operates on UTF-8 strings, for brevity. +

+

A declaration with a variable n denotes the three concrete +declarations with n = 8, n = 16, n = 32. +

+

All parameters starting with ‘str’ and the parameters of +functions starting with u8_str/u16_str/u32_str +denote a NUL terminated string. +

+ +

Error values are always returned through the errno variable, +usually with a return value that indicates the presence of an error +(NULL for functions that return an pointer, or -1 for functions that +return an int). +

+

Functions returning a string result take a +(resultbuf, lengthp) +argument pair. If resultbuf is not NULL and the result fits +into *lengthp units, it is put in resultbuf, and +resultbuf is returned. Otherwise, a freshly allocated string +is returned. In both cases, *lengthp is set to the +length (number of units) of the returned string. In case of error, +NULL is returned and errno is set. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_3.html b/doc/libunistring_3.html new file mode 100644 index 00000000..cc446970 --- /dev/null +++ b/doc/libunistring_3.html @@ -0,0 +1,107 @@ + + + + + +GNU libunistring: 3. Elementary types <unitypes.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

3. Elementary types <unitypes.h>

+ +

The include file <unitypes.h> provides the following basic types. +

+
+
Type: uint8_t + +
+
Type: uint16_t + +
+
Type: uint32_t + +
+

These are the storage units of UTF-8/16/32 strings, respectively. The definitions are +taken from <stdint.h>, on platforms where this include file is present. +

+ +
+
Type: ucs4_t + +
+

This type represents a single Unicode character, outside of an UTF-32 string. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_4.html b/doc/libunistring_4.html new file mode 100644 index 00000000..60992cd0 --- /dev/null +++ b/doc/libunistring_4.html @@ -0,0 +1,864 @@ + + + + + +GNU libunistring: 4. Elementary Unicode string functions <unistr.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

4. Elementary Unicode string functions <unistr.h>

+ +

This include file declares elementary functions for Unicode strings. It is +essentially the equivalent of what <string.h> is for C strings. +

+ +
+ + +

4.1 Elementary string checks

+ +

The following function is available to verify the integrity of a Unicode string. +

+
+
Function: const uint8_t * u8_check (const uint8_t *s, size_t n) + +
+
Function: const uint16_t * u16_check (const uint16_t *s, size_t n) + +
+
Function: const uint32_t * u32_check (const uint32_t *s, size_t n) + +
+

This function checks whether a Unicode string is well-formed. +It returns NULL if valid, or a pointer to the first invalid unit otherwise. +

+ +
+ + +

4.2 Elementary string conversions

+ +

The following functions perform conversions between the different forms of Unicode strings. +

+
+
Function: uint16_t * u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, size_t *lengthp) + +
+

Converts an UTF-8 string to an UTF-16 string. +

+ +
+
Function: uint32_t * u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, size_t *lengthp) + +
+

Converts an UTF-8 string to an UTF-32 string. +

+ +
+
Function: uint8_t * u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, size_t *lengthp) + +
+

Converts an UTF-16 string to an UTF-8 string. +

+ +
+
Function: uint32_t * u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, size_t *lengthp) + +
+

Converts an UTF-16 string to an UTF-32 string. +

+ +
+
Function: uint8_t * u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, size_t *lengthp) + +
+

Converts an UTF-32 string to an UTF-8 string. +

+ +
+
Function: uint16_t * u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, size_t *lengthp) + +
+

Converts an UTF-32 string to an UTF-16 string. +

+ +
+ + +

4.3 Elementary string functions

+ +

The following functions inspect and return details about the first character +in a Unicode string. +

+
+
Function: int u8_mblen (const uint8_t *s, size_t n) + +
+
Function: int u16_mblen (const uint16_t *s, size_t n) + +
+
Function: int u32_mblen (const uint32_t *s, size_t n) + +
+

Returns the length (number of units) of the first character in s, which +is no longer than n. Returns 0 if it is the NUL character. Returns -1 +upon failure. +

+

This function is similar to mblen, except that it operates on a +Unicode string and that s must not be NULL. +

+ +
+
Function: int u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) + +
+
Function: int u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) + +
+
Function: int u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n) + +
+

Returns the length (number of units) of the first character in s, +putting its ucs4_t representation in *puc. Upon failure, +*puc is set to 0xfffd, and an appropriate number of units +is returned. +

+

The number of available units, n, must be > 0. +

+

This function is similar to mbtowc, except that it operates on a +Unicode string, puc and s must not be NULL, n must be > 0, +and the NUL character is not treated specially. +

+ +
+
Function: int u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) + +
+
Function: int u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) + +
+
Function: int u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n) + +
+

This function is like u8_mbtouc_unsafe, except that it will detect an +invalid UTF-8 character, even if the library is compiled without +‘--enable-safety’. +

+ +
+
Function: int u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) + +
+
Function: int u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n) + +
+
Function: int u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n) + +
+

Returns the length (number of units) of the first character in s, +putting its ucs4_t representation in *puc. Upon failure, +*puc is set to 0xfffd, and -1 is returned for an invalid +sequence of units, -2 is returned for an incomplete sequence of units. +

+

The number of available units, n, must be > 0. +

+

This function is similar to u8_mbtouc, except that the return value +gives more details about the failure, similar to mbrtowc. +

+ +

The following function stores a Unicode character as a Unicode string in +memory. +

+
+
Function: int u8_uctomb (uint8_t *s, ucs4_t uc, int n) + +
+
Function: int u16_uctomb (uint16_t *s, ucs4_t uc, int n) + +
+
Function: int u32_uctomb (uint32_t *s, ucs4_t uc, int n) + +
+

Puts the multibyte character represented by uc in s, returning its +length. Returns -1 upon failure, -2 if the number of available units, n, +is too small. The latter case cannot occur if n >= 6/2/1, respectively. +

+

This function is similar to wctomb, except that it operates on a +Unicode strings, s must not be NULL, and the argument n must be +specified. +

+ + +

The following functions copy Unicode strings in memory. +

+
+
Function: uint8_t * u8_cpy (uint8_t *dest, const uint8_t *src, size_t n) + +
+
Function: uint16_t * u16_cpy (uint16_t *dest, const uint16_t *src, size_t n) + +
+
Function: uint32_t * u32_cpy (uint32_t *dest, const uint32_t *src, size_t n) + +
+

Copies n units from src to dest. +

+

This function is similar to memcpy, except that it operates on +Unicode strings. +

+ +
+
Function: uint8_t * u8_move (uint8_t *dest, const uint8_t *src, size_t n) + +
+
Function: uint16_t * u16_move (uint16_t *dest, const uint16_t *src, size_t n) + +
+
Function: uint32_t * u32_move (uint32_t *dest, const uint32_t *src, size_t n) + +
+

Copies n units from src to dest, guaranteeing correct +behavior for overlapping memory areas. +

+

This function is similar to memmove, except that it operates on +Unicode strings. +

+ +

The following function fills a Unicode string. +

+
+
Function: uint8_t * u8_set (uint8_t *s, ucs4_t uc, size_t n) + +
+
Function: uint16_t * u16_set (uint16_t *s, ucs4_t uc, size_t n) + +
+
Function: uint32_t * u32_set (uint32_t *s, ucs4_t uc, size_t n) + +
+

Sets the first n characters of s to uc. uc should be +a character that occupies only 1 unit. +

+

This function is similar to memset, except that it operates on +Unicode strings. +

+ + +

The following function compares two Unicode strings of the same length. +

+
+
Function: int u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n) + +
+
Function: int u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n) + +
+
Function: int u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n) + +
+

Compares s1 and s2, each of length n, lexicographically. +Returns a negative value if s1 compares smaller than s2, +a positive value if s1 compares larger than s2, or 0 if +they compare equal. +

+

This function is similar to memcmp, except that it operates on +Unicode strings. +

+ +

The following function compares two Unicode strings of possibly different +lengths. +

+
+
Function: int u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2) + +
+
Function: int u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2) + +
+
Function: int u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2) + +
+

Compares s1 and s2, lexicographically. +Returns a negative value if s1 compares smaller than s2, +a positive value if s1 compares larger than s2, or 0 if +they compare equal. +

+

This function is similar to the gnulib function memcmp2, except that it +operates on Unicode strings. +

+ + +

The following function searches for a given Unicode character. +

+
+
Function: uint8_t * u8_chr (const uint8_t *s, size_t n, ucs4_t uc) + +
+
Function: uint16_t * u16_chr (const uint16_t *s, size_t n, ucs4_t uc) + +
+
Function: uint32_t * u32_chr (const uint32_t *s, size_t n, ucs4_t uc) + +
+

Searches the string at s for uc. Returns a pointer to the first +occurrence of uc in s, or NULL if uc does not occur in +s. +

+

This function is similar to memchr, except that it operates on +Unicode strings. +

+ + +

The following function counts the number of Unicode characters. +

+
+
Function: size_t u8_mbsnlen (const uint8_t *s, size_t n) + +
+
Function: size_t u16_mbsnlen (const uint16_t *s, size_t n) + +
+
Function: size_t u32_mbsnlen (const uint32_t *s, size_t n) + +
+

Counts and returns the number of Unicode characters in the n units +from s. +

+

This function is similar to the gnulib function mbsnlen, except that +it operates on Unicode strings. +

+ +
+ + +

4.4 Elementary string functions with memory allocation

+ +

The following function copies a Unicode string. +

+
+
Function: uint8_t * u8_cpy_alloc (const uint8_t *s, size_t n) + +
+
Function: uint16_t * u16_cpy_alloc (const uint16_t *s, size_t n) + +
+
Function: uint32_t * u32_cpy_alloc (const uint32_t *s, size_t n) + +
+

Makes a freshly allocated copy of s, of length n. +

+ +
+ + +

4.5 Elementary string functions on NUL terminated strings

+ +

The following functions inspect and return details about the first character +in a Unicode string. +

+
+
Function: int u8_strmblen (const uint8_t *s) + +
+
Function: int u16_strmblen (const uint16_t *s) + +
+
Function: int u32_strmblen (const uint32_t *s) + +
+

Returns the length (number of units) of the first character in s. +Returns 0 if it is the NUL character. Returns -1 upon failure. +

+ + +
+
Function: int u8_strmbtouc (ucs4_t *puc, const uint8_t *s) + +
+
Function: int u16_strmbtouc (ucs4_t *puc, const uint16_t *s) + +
+
Function: int u32_strmbtouc (ucs4_t *puc, const uint32_t *s) + +
+

Returns the length (number of units) of the first character in s, +putting its ucs4_t representation in *puc. Returns 0 +if it is the NUL character. Returns -1 upon failure. +

+ +
+
Function: const uint8_t * u8_next (ucs4_t *puc, const uint8_t *s) + +
+
Function: const uint16_t * u16_next (ucs4_t *puc, const uint16_t *s) + +
+
Function: const uint32_t * u32_next (ucs4_t *puc, const uint32_t *s) + +
+

Forward iteration step. Advances the pointer past the next character, +or returns NULL if the end of the string has been reached. Puts the +character's ucs4_t representation in *puc. +

+ +

The following function inspects and returns details about the previous +character in a Unicode string. +

+
+
Function: const uint8_t * u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start) + +
+
Function: const uint16_t * u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start) + +
+
Function: const uint32_t * u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start) + +
+

Backward iteration step. Advances the pointer to point to the previous +character, or returns NULL if the beginning of the string had been reached. +Puts the character's ucs4_t representation in *puc. +

+ +

The following functions determine the length of a Unicode string. +

+
+
Function: size_t u8_strlen (const uint8_t *s) + +
+
Function: size_t u16_strlen (const uint16_t *s) + +
+
Function: size_t u32_strlen (const uint32_t *s) + +
+

Returns the number of units in s. +

+

This function is similar to strlen and wcslen, except +that it operates on Unicode strings. +

+ +
+
Function: size_t u8_strnlen (const uint8_t *s, size_t maxlen) + +
+
Function: size_t u16_strnlen (const uint16_t *s, size_t maxlen) + +
+
Function: size_t u32_strnlen (const uint32_t *s, size_t maxlen) + +
+

Returns the number of units in s, but at most maxlen. +

+

This function is similar to strnlen and wcsnlen, except +that it operates on Unicode strings. +

+ + +

The following functions copy portions of Unicode strings in memory. +

+
+
Function: uint8_t * u8_strcpy (uint8_t *dest, const uint8_t *src) + +
+
Function: uint16_t * u16_strcpy (uint16_t *dest, const uint16_t *src) + +
+
Function: uint32_t * u32_strcpy (uint32_t *dest, const uint32_t *src) + +
+

Copies src to dest. +

+

This function is similar to strcpy and wcscpy, except +that it operates on Unicode strings. +

+ +
+
Function: uint8_t * u8_stpcpy (uint8_t *dest, const uint8_t *src) + +
+
Function: uint16_t * u16_stpcpy (uint16_t *dest, const uint16_t *src) + +
+
Function: uint32_t * u32_stpcpy (uint32_t *dest, const uint32_t *src) + +
+

Copies src to dest, returning the address of the terminating NUL +in dest. +

+

This function is similar to stpcpy, except that it operates on +Unicode strings. +

+ +
+
Function: uint8_t * u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n) + +
+
Function: uint16_t * u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n) + +
+
Function: uint32_t * u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n) + +
+

Copies no more than n units of src to dest. +

+

This function is similar to strncpy and wcsncpy, except +that it operates on Unicode strings. +

+ +
+
Function: uint8_t * u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n) + +
+
Function: uint16_t * u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n) + +
+
Function: uint32_t * u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n) + +
+

Copies no more than n units of src to dest, returning the +address of the last unit written into dest. +

+

This function is similar to stpncpy, except that it operates on +Unicode strings. +

+ +
+
Function: uint8_t * u8_strcat (uint8_t *dest, const uint8_t *src) + +
+
Function: uint16_t * u16_strcat (uint16_t *dest, const uint16_t *src) + +
+
Function: uint32_t * u32_strcat (uint32_t *dest, const uint32_t *src) + +
+

Appends src onto dest. +

+

This function is similar to strcat and wcscat, except +that it operates on Unicode strings. +

+ +
+
Function: uint8_t * u8_strncat (uint8_t *dest, const uint8_t *src, size_t n) + +
+
Function: uint16_t * u16_strncat (uint16_t *dest, const uint16_t *src, size_t n) + +
+
Function: uint32_t * u32_strncat (uint32_t *dest, const uint32_t *src, size_t n) + +
+

Appends no more than n units of src onto dest. +

+

This function is similar to strncat and wcsncat, except +that it operates on Unicode strings. +

+ + +

The following functions compare two Unicode strings. +

+
+
Function: int u8_strcmp (const uint8_t *s1, const uint8_t *s2) + +
+
Function: int u16_strcmp (const uint16_t *s1, const uint16_t *s2) + +
+
Function: int u32_strcmp (const uint32_t *s1, const uint32_t *s2) + +
+

Compares s1 and s2, lexicographically. +Returns a negative value if s1 compares smaller than s2, +a positive value if s1 compares larger than s2, or 0 if +they compare equal. +

+

This function is similar to strcmp and wcscmp, except +that it operates on Unicode strings. +

+ + +
+
Function: int u8_strcoll (const uint8_t *s1, const uint8_t *s2) + +
+
Function: int u16_strcoll (const uint16_t *s1, const uint16_t *s2) + +
+
Function: int u32_strcoll (const uint32_t *s1, const uint32_t *s2) + +
+

Compares s1 and s2 using the collation rules of the current +locale. +Returns -1 if s1 < s2, 0 if s1 = s2, 1 if +s1 > s2. Upon failure, sets errno and returns any value. +

+

This function is similar to strcoll and wcscoll, except +that it operates on Unicode strings. +

+

Note that this function may consider different canonical normalizations +of the same string as having a large distance. It is therefore better to +use the function u8_normcoll instead of this one; see Normalization forms (composition and decomposition) <uninorm.h>. +

+ +
+
Function: int u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n) + +
+
Function: int u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n) + +
+
Function: int u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n) + +
+

Compares no more than n units of s1 and s2. +

+

This function is similar to strncmp and wcsncmp, except +that it operates on Unicode strings. +

+ + +

The following function allocates a duplicate of a Unicode string. +

+
+
Function: uint8_t * u8_strdup (const uint8_t *s) + +
+
Function: uint16_t * u16_strdup (const uint16_t *s) + +
+
Function: uint32_t * u32_strdup (const uint32_t *s) + +
+

Duplicates s, returning an identical malloc'd string. +

+

This function is similar to strdup and wcsdup, except +that it operates on Unicode strings. +

+ + +

The following functions search for a given Unicode character. +

+
+
Function: uint8_t * u8_strchr (const uint8_t *str, ucs4_t uc) + +
+
Function: uint16_t * u16_strchr (const uint16_t *str, ucs4_t uc) + +
+
Function: uint32_t * u32_strchr (const uint32_t *str, ucs4_t uc) + +
+

Finds the first occurrence of uc in str. +

+

This function is similar to strchr and wcschr, except +that it operates on Unicode strings. +

+ +
+
Function: uint8_t * u8_strrchr (const uint8_t *str, ucs4_t uc) + +
+
Function: uint16_t * u16_strrchr (const uint16_t *str, ucs4_t uc) + +
+
Function: uint32_t * u32_strrchr (const uint32_t *str, ucs4_t uc) + +
+

Finds the last occurrence of uc in str. +

+

This function is similar to strrchr and wcsrchr, except +that it operates on Unicode strings. +

+ +

The following functions search for the first occurrence of some Unicode +character in or outside a given set of Unicode characters. +

+
+
Function: size_t u8_strcspn (const uint8_t *str, const uint8_t *reject) + +
+
Function: size_t u16_strcspn (const uint16_t *str, const uint16_t *reject) + +
+
Function: size_t u32_strcspn (const uint32_t *str, const uint32_t *reject) + +
+

Returns the length of the initial segment of str which consists entirely +of Unicode characters not in reject. +

+

This function is similar to strcspn and wcscspn, except +that it operates on Unicode strings. +

+ +
+
Function: size_t u8_strspn (const uint8_t *str, const uint8_t *accept) + +
+
Function: size_t u16_strspn (const uint16_t *str, const uint16_t *accept) + +
+
Function: size_t u32_strspn (const uint32_t *str, const uint32_t *accept) + +
+

Returns the length of the initial segment of str which consists entirely +of Unicode characters in accept. +

+

This function is similar to strspn and wcsspn, except +that it operates on Unicode strings. +

+ +
+
Function: uint8_t * u8_strpbrk (const uint8_t *str, const uint8_t *accept) + +
+
Function: uint16_t * u16_strpbrk (const uint16_t *str, const uint16_t *accept) + +
+
Function: uint32_t * u32_strpbrk (const uint32_t *str, const uint32_t *accept) + +
+

Finds the first occurrence in str of any character in accept. +

+

This function is similar to strpbrk and wcspbrk, except +that it operates on Unicode strings. +

+ + +

The following functions search whether a given Unicode string is a substring +of another Unicode string. +

+
+
Function: uint8_t * u8_strstr (const uint8_t *haystack, const uint8_t *needle) + +
+
Function: uint16_t * u16_strstr (const uint16_t *haystack, const uint16_t *needle) + +
+
Function: uint32_t * u32_strstr (const uint32_t *haystack, const uint32_t *needle) + +
+

Finds the first occurrence of needle in haystack. +

+

This function is similar to strstr and wcsstr, except +that it operates on Unicode strings. +

+ +
+
Function: bool u8_startswith (const uint8_t *str, const uint8_t *prefix) + +
+
Function: bool u16_startswith (const uint16_t *str, const uint16_t *prefix) + +
+
Function: bool u32_startswith (const uint32_t *str, const uint32_t *prefix) + +
+

Tests whether str starts with prefix. +

+ +
+
Function: bool u8_endswith (const uint8_t *str, const uint8_t *suffix) + +
+
Function: bool u16_endswith (const uint16_t *str, const uint16_t *suffix) + +
+
Function: bool u32_endswith (const uint32_t *str, const uint32_t *suffix) + +
+

Tests whether str ends with suffix. +

+ +

The following function does one step in tokenizing a Unicode string. +

+
+
Function: uint8_t * u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr) + +
+
Function: uint16_t * u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr) + +
+
Function: uint32_t * u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr) + +
+

Divides str into tokens separated by characters in delim. +

+

This function is similar to strtok_r and wcstok, except +that it operates on Unicode strings. Its interface is actually more similar to +wcstok than to strtok. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_5.html b/doc/libunistring_5.html new file mode 100644 index 00000000..92e115f9 --- /dev/null +++ b/doc/libunistring_5.html @@ -0,0 +1,296 @@ + + + + + +GNU libunistring: 5. Conversions between Unicode and encodings <uniconv.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

5. Conversions between Unicode and encodings <uniconv.h>

+ +

This include file declares functions for converting between Unicode strings +and char * strings in locale encoding or in other specified encodings. +

+ +

The following function returns the locale encoding. +

+
+
Function: const char * locale_charset () + +
+

Determines the current locale's character encoding, and canonicalizes it +into one of the canonical names listed in ‘config.charset’. +If the canonical name cannot be determined, the result is a non-canonical +name. +

+

The result must not be freed; it is statically allocated. +

+

The result of this function can be used as an argument to the iconv_open +function in GNU libc, in GNU libiconv, or in the gnulib provided wrapper +around the native iconv_open function. It may not work as an argument +to the native iconv_open function directly. +

+ +

The handling of unconvertible characters during the conversions can be +parametrized through the following enumeration type: +

+
+
Type: enum iconv_ilseq_handler + +
+

This type specifies how unconvertible characters in the input are handled. +

+ +
+
Constant: enum iconv_ilseq_handler iconveh_error + +
+

This handler causes the function to return with errno set to +EILSEQ. +

+ +
+
Constant: enum iconv_ilseq_handler iconveh_question_mark + +
+

This handler produces one question mark ‘?’ per unconvertible character. +

+ +
+
Constant: enum iconv_ilseq_handler iconveh_escape_sequence + +
+

This handler produces an escape sequence \uxxxx or +\Uxxxxxxxx for each unconvertible character. +

+ + +

The following functions convert between strings in a specified encoding and +Unicode strings. +

+
+
Function: uint8_t * u8_conv_from_encoding (const char *fromcode, enum iconv_ilseq_handler handler, const char *src, size_t srclen, size_t *offsets, uint8_t *resultbuf, size_t *lengthp) + +
+
Function: uint16_t * u16_conv_from_encoding (const char *fromcode, enum iconv_ilseq_handler handler, const char *src, size_t srclen, size_t *offsets, uint16_t *resultbuf, size_t *lengthp) + +
+
Function: uint32_t * u32_conv_from_encoding (const char *fromcode, enum iconv_ilseq_handler handler, const char *src, size_t srclen, size_t *offsets, uint32_t *resultbuf, size_t *lengthp) + +
+

Converts an entire string, possibly including NUL bytes, from one encoding +to UTF-8 encoding. +

+

Converts a memory region given in encoding fromcode. fromcode is +as for the iconv_open function. +

+

The input is in the memory region between src (inclusive) and +src + srclen (exclusive). +

+

If offsets is not NULL, it should point to an array of srclen +integers; this array is filled with offsets into the result, i.e. the +character starting at src[i] corresponds to the character starting +at result[offsets[i]], and other offsets are set to +(size_t)(-1). +

+

resultbuf and *lengthp should be a scratch +buffer and its size, or resultbuf can be NULL. +

+

May erase the contents of the memory at resultbuf. +

+

If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in *lengthp. The resulting string is +resultbuf if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. +

+

In case of error: NULL is returned and errno is set. +Particular errno values: EINVAL, EILSEQ, ENOMEM. +

+ +
+
Function: char * u8_conv_to_encoding (const char *tocode, enum iconv_ilseq_handler handler, const uint8_t *src, size_t srclen, size_t *offsets, char *resultbuf, size_t *lengthp) + +
+
Function: char * u16_conv_to_encoding (const char *tocode, enum iconv_ilseq_handler handler, const uint16_t *src, size_t srclen, size_t *offsets, char *resultbuf, size_t *lengthp) + +
+
Function: char * u32_conv_to_encoding (const char *tocode, enum iconv_ilseq_handler handler, const uint32_t *src, size_t srclen, size_t *offsets, char *resultbuf, size_t *lengthp) + +
+

Converts an entire Unicode string, possibly including NUL units, from UTF-8 +encoding to a given encoding. +

+

Converts a memory region to encoding tocode. tocode is as for +the iconv_open function. +

+

The input is in the memory region between src (inclusive) and +src + srclen (exclusive). +

+

If offsets is not NULL, it should point to an array of srclen +integers; this array is filled with offsets into the result, i.e. the +character starting at src[i] corresponds to the character starting +at result[offsets[i]], and other offsets are set to +(size_t)(-1). +

+

resultbuf and *lengthp should be a scratch +buffer and its size, or resultbuf can be NULL. +

+

May erase the contents of the memory at resultbuf. +

+

If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in *lengthp. The resulting string is +resultbuf if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. +

+

In case of error: NULL is returned and errno is set. +Particular errno values: EINVAL, EILSEQ, ENOMEM. +

+ +

The following functions convert between NUL terminated strings in a specified +encoding and NUL terminated Unicode strings. +

+
+
Function: uint8_t * u8_strconv_from_encoding (const char *string, const char *fromcode, enum iconv_ilseq_handler handler) + +
+
Function: uint16_t * u16_strconv_from_encoding (const char *string, const char *fromcode, enum iconv_ilseq_handler handler) + +
+
Function: uint32_t * u32_strconv_from_encoding (const char *string, const char *fromcode, enum iconv_ilseq_handler handler) + +
+

Converts a NUL terminated string from a given encoding. +

+

The result is malloc allocated, or NULL (with errno set) in case of error. +

+

Particular errno values: EILSEQ, ENOMEM. +

+ +
+
Function: char * u8_strconv_to_encoding (const uint8_t *string, const char *tocode, enum iconv_ilseq_handler handler) + +
+
Function: char * u16_strconv_to_encoding (const uint16_t *string, const char *tocode, enum iconv_ilseq_handler handler) + +
+
Function: char * u32_strconv_to_encoding (const uint32_t *string, const char *tocode, enum iconv_ilseq_handler handler) + +
+

Converts a NUL terminated string to a given encoding. +

+

The result is malloc allocated, or NULL (with errno set) in case of error. +

+

Particular errno values: EILSEQ, ENOMEM. +

+ +

The following functions are shorthands that convert between NUL terminated +strings in locale encoding and NUL terminated Unicode strings. +

+
+
Function: uint8_t * u8_strconv_from_locale (const char *string) + +
+
Function: uint16_t * u16_strconv_from_locale (const char *string) + +
+
Function: uint32_t * u32_strconv_from_locale (const char *string) + +
+

Converts a NUL terminated string from the locale encoding. +

+

The result is malloc allocated, or NULL (with errno set) in case of error. +

+

Particular errno values: ENOMEM. +

+ +
+
Function: char * u8_strconv_to_locale (const uint8_t *string) + +
+
Function: char * u16_strconv_to_locale (const uint16_t *string) + +
+
Function: char * u32_strconv_to_locale (const uint32_t *string) + +
+

Converts a NUL terminated string to the locale encoding. +

+

The result is malloc allocated, or NULL (with errno set) in case of error. +

+

Particular errno values: ENOMEM. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_6.html b/doc/libunistring_6.html new file mode 100644 index 00000000..33cda594 --- /dev/null +++ b/doc/libunistring_6.html @@ -0,0 +1,451 @@ + + + + + +GNU libunistring: 6. Output with Unicode strings <unistdio.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

6. Output with Unicode strings <unistdio.h>

+ +

This include file declares functions for doing formatted output with Unicode +strings. It defines a set of functions similar to fprintf and +sprintf, which are declared in <stdio.h>. +

+

These functions work like the printf function family. +In the format string: +

    +
  • +The format directive ‘U’ takes an UTF-8 string (const uint8_t *). +
  • +The format directive ‘lU’ takes an UTF-16 string +(const uint16_t *). +
  • +The format directive ‘llU’ takes an UTF-32 string +(const uint32_t *). +
+ +

A function name with an infix ‘v’ indicates that a va_list is +passed instead of multiple arguments. +

+

The functions *sprintf have a buf argument that is assumed to be +large enough. +(DANGEROUS! Overflowing the buffer will crash the program.) +

+

The functions *snprintf have a buf argument that is assumed to be +size units large. (DANGEROUS! The resulting string might be +truncated in the middle of a multibyte character.) +

+

The functions *asprintf have a resultp argument. The result will +be freshly allocated and stored in *resultp. +

+

The functions *asnprintf have a (resultbuf, lengthp) +argument pair. If resultbuf is not NULL and the result fits into +*lengthp units, it is put in resultbuf, and resultbuf +is returned. Otherwise, a freshly allocated string is returned. In both +cases, *lengthp is set to the length (number of units) of the +returned string. In case of error, NULL is returned and errno is set. +

+

The following functions take an ASCII format string and return a result that +is a char * string in locale encoding. +

+
+
Function: int ulc_sprintf (char *buf, const char *format, ...) + +
+
+ +
+
Function: int ulc_snprintf (char *buf, size_t size, const char *format, ...) + +
+
+ +
+
Function: int ulc_asprintf (char **resultp, const char *format, ...) + +
+
+ +
+
Function: char * ulc_asnprintf (char *resultbuf, size_t *lengthp, const char *format, ...) + +
+
+ +
+
Function: int ulc_vsprintf (char *buf, const char *format, va_list ap) + +
+
+ +
+
Function: int ulc_vsnprintf (char *buf, size_t size, const char *format, va_list ap) + +
+
+ +
+
Function: int ulc_vasprintf (char **resultp, const char *format, va_list ap) + +
+
+ +
+
Function: char * ulc_vasnprintf (char *resultbuf, size_t *lengthp, const char *format, va_list ap) + +
+
+ +

The following functions take an ASCII format string and return a result in +UTF-8 format. +

+
+
Function: int u8_sprintf (uint8_t *buf, const char *format, ...) + +
+
+
+
Function: int u8_snprintf (uint8_t *buf, size_t size, const char *format, ...) + +
+
+
+
Function: int u8_asprintf (uint8_t **resultp, const char *format, ...) + +
+
+
+
Function: uint8_t * u8_asnprintf (uint8_t *resultbuf, size_t *lengthp, const char *format, ...) + +
+
+
+
Function: int u8_vsprintf (uint8_t *buf, const char *format, va_list ap) + +
+
+
+
Function: int u8_vsnprintf (uint8_t *buf, size_t size, const char *format, va_list ap) + +
+
+
+
Function: int u8_vasprintf (uint8_t **resultp, const char *format, va_list ap) + +
+
+
+
Function: uint8_t * u8_vasnprintf (uint8_t *resultbuf, size_t *lengthp, const char *format, va_list ap) + +
+
+ +

The following functions take an UTF-8 format string and return a result in +UTF-8 format. +

+
+
Function: int u8_u8_sprintf (uint8_t *buf, const uint8_t *format, ...) + +
+
+
+
Function: int u8_u8_snprintf (uint8_t *buf, size_t size, const uint8_t *format, ...) + +
+
+
+
Function: int u8_u8_asprintf (uint8_t **resultp, const uint8_t *format, ...) + +
+
+
+
Function: uint8_t * u8_u8_asnprintf (uint8_t *resultbuf, size_t *lengthp, const uint8_t *format, ...) + +
+
+
+
Function: int u8_u8_vsprintf (uint8_t *buf, const uint8_t *format, va_list ap) + +
+
+
+
Function: int u8_u8_vsnprintf (uint8_t *buf, size_t size, const uint8_t *format, va_list ap) + +
+
+
+
Function: int u8_u8_vasprintf (uint8_t **resultp, const uint8_t *format, va_list ap) + +
+
+
+
Function: uint8_t * u8_u8_vasnprintf (uint8_t *resultbuf, size_t *lengthp, const uint8_t *format, va_list ap) + +
+
+ +

The following functions take an ASCII format string and return a result in +UTF-16 format. +

+
+
Function: int u16_sprintf (uint16_t *buf, const char *format, ...) + +
+
+
+
Function: int u16_snprintf (uint16_t *buf, size_t size, const char *format, ...) + +
+
+
+
Function: int u16_asprintf (uint16_t **resultp, const char *format, ...) + +
+
+
+
Function: uint16_t * u16_asnprintf (uint16_t *resultbuf, size_t *lengthp, const char *format, ...) + +
+
+
+
Function: int u16_vsprintf (uint16_t *buf, const char *format, va_list ap) + +
+
+
+
Function: int u16_vsnprintf (uint16_t *buf, size_t size, const char *format, va_list ap) + +
+
+
+
Function: int u16_vasprintf (uint16_t **resultp, const char *format, va_list ap) + +
+
+
+
Function: uint16_t * u16_vasnprintf (uint16_t *resultbuf, size_t *lengthp, const char *format, va_list ap) + +
+
+ +

The following functions take an UTF-16 format string and return a result in +UTF-16 format. +

+
+
Function: int u16_u16_sprintf (uint16_t *buf, const uint16_t *format, ...) + +
+
+
+
Function: int u16_u16_snprintf (uint16_t *buf, size_t size, const uint16_t *format, ...) + +
+
+
+
Function: int u16_u16_asprintf (uint16_t **resultp, const uint16_t *format, ...) + +
+
+
+
Function: uint16_t * u16_u16_asnprintf (uint16_t *resultbuf, size_t *lengthp, const uint16_t *format, ...) + +
+
+
+
Function: int u16_u16_vsprintf (uint16_t *buf, const uint16_t *format, va_list ap) + +
+
+
+
Function: int u16_u16_vsnprintf (uint16_t *buf, size_t size, const uint16_t *format, va_list ap) + +
+
+
+
Function: int u16_u16_vasprintf (uint16_t **resultp, const uint16_t *format, va_list ap) + +
+
+
+
Function: uint16_t * u16_u16_vasnprintf (uint16_t *resultbuf, size_t *lengthp, const uint16_t *format, va_list ap) + +
+
+ +

The following functions take an ASCII format string and return a result in +UTF-32 format. +

+
+
Function: int u32_sprintf (uint32_t *buf, const char *format, ...) + +
+
+
+
Function: int u32_snprintf (uint32_t *buf, size_t size, const char *format, ...) + +
+
+
+
Function: int u32_asprintf (uint32_t **resultp, const char *format, ...) + +
+
+
+
Function: uint32_t * u32_asnprintf (uint32_t *resultbuf, size_t *lengthp, const char *format, ...) + +
+
+
+
Function: int u32_vsprintf (uint32_t *buf, const char *format, va_list ap) + +
+
+
+
Function: int u32_vsnprintf (uint32_t *buf, size_t size, const char *format, va_list ap) + +
+
+
+
Function: int u32_vasprintf (uint32_t **resultp, const char *format, va_list ap) + +
+
+
+
Function: uint32_t * u32_vasnprintf (uint32_t *resultbuf, size_t *lengthp, const char *format, va_list ap) + +
+
+ +

The following functions take an UTF-32 format string and return a result in +UTF-32 format. +

+
+
Function: int u32_u32_sprintf (uint32_t *buf, const uint32_t *format, ...) + +
+
+
+
Function: int u32_u32_snprintf (uint32_t *buf, size_t size, const uint32_t *format, ...) + +
+
+
+
Function: int u32_u32_asprintf (uint32_t **resultp, const uint32_t *format, ...) + +
+
+
+
Function: uint32_t * u32_u32_asnprintf (uint32_t *resultbuf, size_t *lengthp, const uint32_t *format, ...) + +
+
+
+
Function: int u32_u32_vsprintf (uint32_t *buf, const uint32_t *format, va_list ap) + +
+
+
+
Function: int u32_u32_vsnprintf (uint32_t *buf, size_t size, const uint32_t *format, va_list ap) + +
+
+
+
Function: int u32_u32_vasprintf (uint32_t **resultp, const uint32_t *format, va_list ap) + +
+
+
+
Function: uint32_t * u32_u32_vasnprintf (uint32_t *resultbuf, size_t *lengthp, const uint32_t *format, va_list ap) + +
+
+ +

The following functions take an ASCII format string and produce output in +locale encoding to a FILE stream. +

+
+
Function: int ulc_fprintf (FILE *stream, const char *format, ...) + +
+
+
+
Function: int ulc_vfprintf (FILE *stream, const char *format, va_list ap) + +
+
+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_7.html b/doc/libunistring_7.html new file mode 100644 index 00000000..6fe526d9 --- /dev/null +++ b/doc/libunistring_7.html @@ -0,0 +1,125 @@ + + + + + +GNU libunistring: 7. Names of Unicode characters <uniname.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

7. Names of Unicode characters <uniname.h>

+ +

This include file implements the association between a Unicode character and +its name. +

+

The name of a Unicode character allows to distinguish it from other, similar +looking characters. For example, the character ‘x’ has the name +"LATIN SMALL LETTER X" and is therefore different from the character +named "MULTIPLICATION SIGN". +

+
+
Macro: unsigned int UNINAME_MAX + +
+

This macro expands to a constant that is the required size of buffer for a +Unicode character name. +

+ +
+
Function: char * unicode_character_name (ucs4_t uc, char *buf) + +
+

Looks up the name of a Unicode character, in uppercase ASCII. +buf must point to a buffer, at least UNINAME_MAX bytes in size. +Returns the filled buf, or NULL if the character does not have a name. +

+ +
+
Function: ucs4_t unicode_name_character (const char *name) + +
+

Looks up the Unicode character with a given name, in upper- or lowercase +ASCII. Returns the character if found, or UNINAME_INVALID if not found. +

+ +
+
Macro: ucs4_t UNINAME_INVALID + +
+

This macro expands to a constant that is a special return value of the +unicode_name_character function. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_8.html b/doc/libunistring_8.html new file mode 100644 index 00000000..def5e04a --- /dev/null +++ b/doc/libunistring_8.html @@ -0,0 +1,2071 @@ + + + + + +GNU libunistring: 8. Unicode character classification and properties <unictype.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

8. Unicode character classification and properties <unictype.h>

+ +

This include file declares functions that classify Unicode characters +and that test whether Unicode characters have specific properties. +

+

The classification assigns a “general category” to every Unicode +character. This is similar to the classification provided by ISO C in +<wctype.h>. +

+

Properties are the data that guides various text processing algorithms +in the presence of specific Unicode characters. +

+ +
+ + +

8.1 General category

+ +

Every Unicode character or code point has a general category assigned +to it. This classification is important for most algorithms that work on +Unicode text. +

+

The GNU libunistring library provides two kinds of API for working with +general categories. The object oriented API uses a variable to denote +every predefined general category value or combinations thereof. The +low-level API uses a bit mask instead. The advantage of the object oriented +API is that if only a few predefined general category values are used, +the data tables are relatively small. When you combine general category +values (using uc_general_category_or, uc_general_category_and, +or uc_general_category_and_not), or when you use the low level +bit masks, a big table is used thats holds the complete general category +information for all Unicode characters. +

+ +
+ + +

8.1.1 The object oriented API for general category

+ +
+
Type: uc_general_category_t + +
+

This data type denotes a general category value. It is an immediate type that +can be copied by simple assignment, without involving memory allocation. It is +not an array type. +

+ +

The following are the predefined general category value. Additional general +categories may be added in the future. +

+
+
Constant: uc_general_category_t UC_CATEGORY_L + +
+
Constant: uc_general_category_t UC_CATEGORY_Lu + +
+
Constant: uc_general_category_t UC_CATEGORY_Ll + +
+
Constant: uc_general_category_t UC_CATEGORY_Lt + +
+
Constant: uc_general_category_t UC_CATEGORY_Lm + +
+
Constant: uc_general_category_t UC_CATEGORY_Lo + +
+
Constant: uc_general_category_t UC_CATEGORY_M + +
+
Constant: uc_general_category_t UC_CATEGORY_Mn + +
+
Constant: uc_general_category_t UC_CATEGORY_Mc + +
+
Constant: uc_general_category_t UC_CATEGORY_Me + +
+
Constant: uc_general_category_t UC_CATEGORY_N + +
+
Constant: uc_general_category_t UC_CATEGORY_Nd + +
+
Constant: uc_general_category_t UC_CATEGORY_Nl + +
+
Constant: uc_general_category_t UC_CATEGORY_No + +
+
Constant: uc_general_category_t UC_CATEGORY_P + +
+
Constant: uc_general_category_t UC_CATEGORY_Pc + +
+
Constant: uc_general_category_t UC_CATEGORY_Pd + +
+
Constant: uc_general_category_t UC_CATEGORY_Ps + +
+
Constant: uc_general_category_t UC_CATEGORY_Pe + +
+
Constant: uc_general_category_t UC_CATEGORY_Pi + +
+
Constant: uc_general_category_t UC_CATEGORY_Pf + +
+
Constant: uc_general_category_t UC_CATEGORY_Po + +
+
Constant: uc_general_category_t UC_CATEGORY_S + +
+
Constant: uc_general_category_t UC_CATEGORY_Sm + +
+
Constant: uc_general_category_t UC_CATEGORY_Sc + +
+
Constant: uc_general_category_t UC_CATEGORY_Sk + +
+
Constant: uc_general_category_t UC_CATEGORY_So + +
+
Constant: uc_general_category_t UC_CATEGORY_Z + +
+
Constant: uc_general_category_t UC_CATEGORY_Zs + +
+
Constant: uc_general_category_t UC_CATEGORY_Zl + +
+
Constant: uc_general_category_t UC_CATEGORY_Zp + +
+
Constant: uc_general_category_t UC_CATEGORY_C + +
+
Constant: uc_general_category_t UC_CATEGORY_Cc + +
+
Constant: uc_general_category_t UC_CATEGORY_Cf + +
+
Constant: uc_general_category_t UC_CATEGORY_Cs + +
+
Constant: uc_general_category_t UC_CATEGORY_Co + +
+
Constant: uc_general_category_t UC_CATEGORY_Cn + +
+
+ +

The following are alias names for predefined General category values. +

+
+
Macro: uc_general_category_t UC_LETTER + +
+

This is another name for UC_CATEGORY_L. +

+ +
+
Macro: uc_general_category_t UC_UPPERCASE_LETTER + +
+

This is another name for UC_CATEGORY_Lu. +

+ +
+
Macro: uc_general_category_t UC_LOWERCASE_LETTER + +
+

This is another name for UC_CATEGORY_Ll. +

+ +
+
Macro: uc_general_category_t UC_TITLECASE_LETTER + +
+

This is another name for UC_CATEGORY_Lt. +

+ +
+
Macro: uc_general_category_t UC_MODIFIER_LETTER + +
+

This is another name for UC_CATEGORY_Lm. +

+ +
+
Macro: uc_general_category_t UC_OTHER_LETTER + +
+

This is another name for UC_CATEGORY_Lo. +

+ +
+
Macro: uc_general_category_t UC_MARK + +
+

This is another name for UC_CATEGORY_M. +

+ +
+
Macro: uc_general_category_t UC_NON_SPACING_MARK + +
+

This is another name for UC_CATEGORY_Mn. +

+ +
+
Macro: uc_general_category_t UC_COMBINING_SPACING_MARK + +
+

This is another name for UC_CATEGORY_Mc. +

+ +
+
Macro: uc_general_category_t UC_ENCLOSING_MARK + +
+

This is another name for UC_CATEGORY_Me. +

+ +
+
Macro: uc_general_category_t UC_NUMBER + +
+

This is another name for UC_CATEGORY_N. +

+ +
+
Macro: uc_general_category_t UC_DECIMAL_DIGIT_NUMBER + +
+

This is another name for UC_CATEGORY_Nd. +

+ +
+
Macro: uc_general_category_t UC_LETTER_NUMBER + +
+

This is another name for UC_CATEGORY_Nl. +

+ +
+
Macro: uc_general_category_t UC_OTHER_NUMBER + +
+

This is another name for UC_CATEGORY_No. +

+ +
+
Macro: uc_general_category_t UC_PUNCTUATION + +
+

This is another name for UC_CATEGORY_P. +

+ +
+
Macro: uc_general_category_t UC_CONNECTOR_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Pc. +

+ +
+
Macro: uc_general_category_t UC_DASH_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Pd. +

+ +
+
Macro: uc_general_category_t UC_OPEN_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Ps (“start punctuation”). +

+ +
+
Macro: uc_general_category_t UC_CLOSE_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Pe (“end punctuation”). +

+ +
+
Macro: uc_general_category_t UC_INITIAL_QUOTE_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Pi. +

+ +
+
Macro: uc_general_category_t UC_FINAL_QUOTE_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Pf. +

+ +
+
Macro: uc_general_category_t UC_OTHER_PUNCTUATION + +
+

This is another name for UC_CATEGORY_Po. +

+ +
+
Macro: uc_general_category_t UC_SYMBOL + +
+

This is another name for UC_CATEGORY_S. +

+ +
+
Macro: uc_general_category_t UC_MATH_SYMBOL + +
+

This is another name for UC_CATEGORY_Sm. +

+ +
+
Macro: uc_general_category_t UC_CURRENCY_SYMBOL + +
+

This is another name for UC_CATEGORY_Sc. +

+ +
+
Macro: uc_general_category_t UC_MODIFIER_SYMBOL + +
+

This is another name for UC_CATEGORY_Sk. +

+ +
+
Macro: uc_general_category_t UC_OTHER_SYMBOL + +
+

This is another name for UC_CATEGORY_So. +

+ +
+
Macro: uc_general_category_t UC_SEPARATOR + +
+

This is another name for UC_CATEGORY_Z. +

+ +
+
Macro: uc_general_category_t UC_SPACE_SEPARATOR + +
+

This is another name for UC_CATEGORY_Zs. +

+ +
+
Macro: uc_general_category_t UC_LINE_SEPARATOR + +
+

This is another name for UC_CATEGORY_Zl. +

+ +
+
Macro: uc_general_category_t UC_PARAGRAPH_SEPARATOR + +
+

This is another name for UC_CATEGORY_Zp. +

+ +
+
Macro: uc_general_category_t UC_OTHER + +
+

This is another name for UC_CATEGORY_C. +

+ +
+
Macro: uc_general_category_t UC_CONTROL + +
+

This is another name for UC_CATEGORY_Cc. +

+ +
+
Macro: uc_general_category_t UC_FORMAT + +
+

This is another name for UC_CATEGORY_Cf. +

+ +
+
Macro: uc_general_category_t UC_SURROGATE + +
+

This is another name for UC_CATEGORY_Cs. All code points in this +category are invalid characters. +

+ +
+
Macro: uc_general_category_t UC_PRIVATE_USE + +
+

This is another name for UC_CATEGORY_Co. +

+ +
+
Macro: uc_general_category_t UC_UNASSIGNED + +
+

This is another name for UC_CATEGORY_Cn. Some code points in this +category are invalid characters. +

+ +

The following functions combine general categories, like in a boolean algebra, +except that there is no ‘not’ operation. +

+
+
Function: uc_general_category_t uc_general_category_or (uc_general_category_t category1, uc_general_category_t category2) + +
+

Returns the union of two general categories. +This corresponds to the unions of the two sets of characters. +

+ +
+
Function: uc_general_category_t uc_general_category_and (uc_general_category_t category1, uc_general_category_t category2) + +
+

Returns the intersection of two general categories as bit masks. +This does not correspond to the intersection of the two sets of +characters. +

+ +
+
Function: uc_general_category_t uc_general_category_and_not (uc_general_category_t category1, uc_general_category_t category2) + +
+

Returns the intersection of a general category with the complement of a +second general category, as bit masks. +This does not correspond to the intersection with complement, when +viewing the categories as sets of characters. +

+ +

The following functions associate general categories with their name. +

+
+
Function: const char * uc_general_category_name (uc_general_category_t category) + +
+

Returns the name of a general category. +Returns NULL if the general category corresponds to a bit mask that does not +have a name. +

+ +
+
Function: uc_general_category_t uc_general_category_byname (const char *category_name) + +
+

Returns the general category given by name, e.g. "Lu". +

+ +

The following functions view general categories as sets of Unicode characters. +

+
+
Function: uc_general_category_t uc_general_category (ucs4_t uc) + +
+

Returns the general category of a Unicode character. +

+

This function uses a big table. +

+ +
+
Function: bool uc_is_general_category (ucs4_t uc, uc_general_category_t category) + +
+

Tests whether a Unicode character belongs to a given category. +The category argument can be a predefined general category or the +combination of several predefined general categories. +

+ +
+ + +

8.1.2 The bit mask API for general category

+ +

The following are the predefined general category value as bit masks. +Additional general categories may be added in the future. +

+
+
Macro: uint32_t UC_CATEGORY_MASK_L + +
+
Macro: uint32_t UC_CATEGORY_MASK_Lu + +
+
Macro: uint32_t UC_CATEGORY_MASK_Ll + +
+
Macro: uint32_t UC_CATEGORY_MASK_Lt + +
+
Macro: uint32_t UC_CATEGORY_MASK_Lm + +
+
Macro: uint32_t UC_CATEGORY_MASK_Lo + +
+
Macro: uint32_t UC_CATEGORY_MASK_M + +
+
Macro: uint32_t UC_CATEGORY_MASK_Mn + +
+
Macro: uint32_t UC_CATEGORY_MASK_Mc + +
+
Macro: uint32_t UC_CATEGORY_MASK_Me + +
+
Macro: uint32_t UC_CATEGORY_MASK_N + +
+
Macro: uint32_t UC_CATEGORY_MASK_Nd + +
+
Macro: uint32_t UC_CATEGORY_MASK_Nl + +
+
Macro: uint32_t UC_CATEGORY_MASK_No + +
+
Macro: uint32_t UC_CATEGORY_MASK_P + +
+
Macro: uint32_t UC_CATEGORY_MASK_Pc + +
+
Macro: uint32_t UC_CATEGORY_MASK_Pd + +
+
Macro: uint32_t UC_CATEGORY_MASK_Ps + +
+
Macro: uint32_t UC_CATEGORY_MASK_Pe + +
+
Macro: uint32_t UC_CATEGORY_MASK_Pi + +
+
Macro: uint32_t UC_CATEGORY_MASK_Pf + +
+
Macro: uint32_t UC_CATEGORY_MASK_Po + +
+
Macro: uint32_t UC_CATEGORY_MASK_S + +
+
Macro: uint32_t UC_CATEGORY_MASK_Sm + +
+
Macro: uint32_t UC_CATEGORY_MASK_Sc + +
+
Macro: uint32_t UC_CATEGORY_MASK_Sk + +
+
Macro: uint32_t UC_CATEGORY_MASK_So + +
+
Macro: uint32_t UC_CATEGORY_MASK_Z + +
+
Macro: uint32_t UC_CATEGORY_MASK_Zs + +
+
Macro: uint32_t UC_CATEGORY_MASK_Zl + +
+
Macro: uint32_t UC_CATEGORY_MASK_Zp + +
+
Macro: uint32_t UC_CATEGORY_MASK_C + +
+
Macro: uint32_t UC_CATEGORY_MASK_Cc + +
+
Macro: uint32_t UC_CATEGORY_MASK_Cf + +
+
Macro: uint32_t UC_CATEGORY_MASK_Cs + +
+
Macro: uint32_t UC_CATEGORY_MASK_Co + +
+
Macro: uint32_t UC_CATEGORY_MASK_Cn + +
+
+ +

The following function views general categories as sets of Unicode characters. +

+
+
Function: bool uc_is_general_category_withtable (ucs4_t uc, uint32_t bitmask) + +
+

Tests whether a Unicode character belongs to a given category. +The bitmask argument can be a predefined general category bitmask or the +combination of several predefined general category bitmasks. +

+

This function uses a big table comprising all general categories. +

+ +
+ + +

8.2 Canonical combining class

+ +

Every Unicode character or code point has a canonical combining class +assigned to it. +

+

What is the meaning of the canonical combining class? Essentially, it +indicates the priority with which a combining character is attached to its +base character. The characters for which the canonical combining class is 0 +are the base characters, and the characters for which it is greater than 0 are +the combining characters. Combining characters are rendered +near/attached/around their base character, and combining characters with small +combining classes are attached "first" or "closer" to the base character. +

+

The canonical combining class of a character is a number in the range +0..255. The possible values are described in the Unicode Character Database +http://www.unicode.org/Public/UNIDATA/UCD.html. The list here is +not definitive; more values can be added in future versions. +

+
+
Constant: int UC_CCC_NR + +
+

The canonical combining class value for “Not Reordered” characters. +The value is 0. +

+ +
+
Constant: int UC_CCC_OV + +
+

The canonical combining class value for “Overlay” characters. +

+ +
+
Constant: int UC_CCC_NK + +
+

The canonical combining class value for “Nukta” characters. +

+ +
+
Constant: int UC_CCC_KV + +
+

The canonical combining class value for “Kana Voicing” characters. +

+ +
+
Constant: int UC_CCC_VR + +
+

The canonical combining class value for “Virama” characters. +

+ +
+
Constant: int UC_CCC_ATBL + +
+

The canonical combining class value for “Attached Below Left” characters. +

+ +
+
Constant: int UC_CCC_ATB + +
+

The canonical combining class value for “Attached Below” characters. +

+ +
+
Constant: int UC_CCC_ATAR + +
+

The canonical combining class value for “Attached Above Right” characters. +

+ +
+
Constant: int UC_CCC_BL + +
+

The canonical combining class value for “Below Left” characters. +

+ +
+
Constant: int UC_CCC_B + +
+

The canonical combining class value for “Below” characters. +

+ +
+
Constant: int UC_CCC_BR + +
+

The canonical combining class value for “Below Right” characters. +

+ +
+
Constant: int UC_CCC_L + +
+

The canonical combining class value for “Left” characters. +

+ +
+
Constant: int UC_CCC_R + +
+

The canonical combining class value for “Right” characters. +

+ +
+
Constant: int UC_CCC_AL + +
+

The canonical combining class value for “Above Left” characters. +

+ +
+
Constant: int UC_CCC_A + +
+

The canonical combining class value for “Above” characters. +

+ +
+
Constant: int UC_CCC_AR + +
+

The canonical combining class value for “Above Right” characters. +

+ +
+
Constant: int UC_CCC_DB + +
+

The canonical combining class value for “Double Below” characters. +

+ +
+
Constant: int UC_CCC_DA + +
+

The canonical combining class value for “Double Above” characters. +

+ +
+
Constant: int UC_CCC_IS + +
+

The canonical combining class value for “Iota Subscript” characters. +

+ +

The following function looks up the canonical combining class of a character. +

+
+
Function: int uc_combining_class (ucs4_t uc) + +
+

Returns the canonical combining class of a Unicode character. +

+ +
+ + +

8.3 Bidirectional category

+ +

Every Unicode character or code point has a bidirectional category +assigned to it. +

+

The bidirectional category guides the bidirectional algorithm +(http://www.unicode.org/reports/tr9/). The possible values are +the following. +

+
+
Constant: int UC_BIDI_L + +
+

The bidirectional category for `Left-to-Right`” characters. +

+ +
+
Constant: int UC_BIDI_LRE + +
+

The bidirectional category for “Left-to-Right Embedding” characters. +

+ +
+
Constant: int UC_BIDI_LRO + +
+

The bidirectional category for “Left-to-Right Override” characters. +

+ +
+
Constant: int UC_BIDI_R + +
+

The bidirectional category for “Right-to-Left” characters. +

+ +
+
Constant: int UC_BIDI_AL + +
+

The bidirectional category for “Right-to-Left Arabic” characters. +

+ +
+
Constant: int UC_BIDI_RLE + +
+

The bidirectional category for “Right-to-Left Embedding” characters. +

+ +
+
Constant: int UC_BIDI_RLO + +
+

The bidirectional category for “Right-to-Left Override” characters. +

+ +
+
Constant: int UC_BIDI_PDF + +
+

The bidirectional category for “Pop Directional Format” characters. +

+ +
+
Constant: int UC_BIDI_EN + +
+

The bidirectional category for “European Number” characters. +

+ +
+
Constant: int UC_BIDI_ES + +
+

The bidirectional category for “European Number Separator” characters. +

+ +
+
Constant: int UC_BIDI_ET + +
+

The bidirectional category for “European Number Terminator” characters. +

+ +
+
Constant: int UC_BIDI_AN + +
+

The bidirectional category for “Arabic Number” characters. +

+ +
+
Constant: int UC_BIDI_CS + +
+

The bidirectional category for “Common Number Separator” characters. +

+ +
+
Constant: int UC_BIDI_NSM + +
+

The bidirectional category for “Non-Spacing Mark” characters. +

+ +
+
Constant: int UC_BIDI_BN + +
+

The bidirectional category for “Boundary Neutral” characters. +

+ +
+
Constant: int UC_BIDI_B + +
+

The bidirectional category for “Paragraph Separator” characters. +

+ +
+
Constant: int UC_BIDI_S + +
+

The bidirectional category for “Segment Separator” characters. +

+ +
+
Constant: int UC_BIDI_WS + +
+

The bidirectional category for “Whitespace” characters. +

+ +
+
Constant: int UC_BIDI_ON + +
+

The bidirectional category for “Other Neutral” characters. +

+ +

The following functions implement the association between a bidirectional +category and its name. +

+
+
Function: const char * uc_bidi_category_name (int category) + +
+

Returns the name of a bidirectional category. +

+ +
+
Function: int uc_bidi_category_byname (const char *category_name) + +
+

Returns the bidirectional category given by name, e.g. "LRE". +

+ +

The following functions view bidirectional categories as sets of Unicode +characters. +

+
+
Function: int uc_bidi_category (ucs4_t uc) + +
+

Returns the bidirectional category of a Unicode character. +

+ +
+
Function: bool uc_is_bidi_category (ucs4_t uc, int category) + +
+

Tests whether a Unicode character belongs to a given bidirectional category. +

+ +
+ + +

8.4 Decimal digit value

+ +

Decimal digits (like the digits from ‘0’ to ‘9’) exist in many +scripts. The following function converts a decimal digit character to its +numerical value. +

+
+
Function: int uc_decimal_value (ucs4_t uc) + +
+

Returns the decimal digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a decimal digit. +

+ +
+ + +

8.5 Digit value

+ +

Digit characters are like decimal digit characters, possibly in special forms, +like as superscript, subscript, or circled. The following function converts a +digit character to its numerical value. +

+
+
Function: int uc_digit_value (ucs4_t uc) + +
+

Returns the digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a digit. +

+ +
+ + +

8.6 Numeric value

+ +

There are also characters that represent numbers without a digit system, like +the Roman numerals, and fractional numbers, like 1/4 or 3/4. +

+

The following type represents the numeric value of a Unicode character. +

+
Type: uc_fraction_t + +
+

This is a structure type with the following fields: +

 
int numerator;
+int denominator;
+
+

An integer n is represented by numerator = n, +denominator = 1. +

+ +

The following function converts a number character to its numerical value. +

+
+
Function: uc_fraction_t uc_numeric_value (ucs4_t uc) + +
+

Returns the numeric value of a Unicode character. +The return value is a fraction, or the pseudo-fraction { 0, 0 } for +characters that do not represent a number. +

+ +
+ + +

8.7 Mirrored character

+ +

Character mirroring is used to associate the closing parenthesis character +to the opening parenthesis character, the closing brace character with the +opening brace character, and so on. +

+

The following function looks up the mirrored character of a Unicode character. +

+
+
Function: bool uc_mirror_char (ucs4_t uc, ucs4_t *puc) + +
+

Stores the mirrored character of a Unicode character uc in +*puc and returns true, if it exists. Otherwise it +stores uc unmodified in *puc and returns false. +

+ +
+ + +

8.8 Properties

+ +

This section defines boolean properties of Unicode characters. This +means, a character either has the given property or does not have it. +In other words, the property can be viewed as a subset of the set of +Unicode characters. +

+

The GNU libunistring library provides two kinds of API for working with +properties. The object oriented API uses a type uc_property_t +to designate a property. In the function-based API, which is a bit more +low level, a property is merely a function. +

+ +
+ + +

8.8.1 Properties as objects – the object oriented API

+ +

The following type designates a property on Unicode characters. +

+
+
Type: uc_property_t + +
+

This data type denotes a boolean property on Unicode characters. It is an +immediate type that can be copied by simple assignment, without involving +memory allocation. It is not an array type. +

+ +

Many Unicode properties are predefined. +

+

The following are general properties. +

+
+
Constant: uc_property_t UC_PROPERTY_WHITE_SPACE + +
+
Constant: uc_property_t UC_PROPERTY_ALPHABETIC + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_ALPHABETIC + +
+
Constant: uc_property_t UC_PROPERTY_NOT_A_CHARACTER + +
+
Constant: uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT + +
+
Constant: uc_property_t UC_PROPERTY_DEPRECATED + +
+
Constant: uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION + +
+
Constant: uc_property_t UC_PROPERTY_VARIATION_SELECTOR + +
+
Constant: uc_property_t UC_PROPERTY_PRIVATE_USE + +
+
Constant: uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE + +
+
+ +

The following properties are related to case folding. +

+
+
Constant: uc_property_t UC_PROPERTY_UPPERCASE + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_UPPERCASE + +
+
Constant: uc_property_t UC_PROPERTY_LOWERCASE + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_LOWERCASE + +
+
Constant: uc_property_t UC_PROPERTY_TITLECASE + +
+
Constant: uc_property_t UC_PROPERTY_SOFT_DOTTED + +
+
+ +

The following properties are related to identifiers. +

+
+
Constant: uc_property_t UC_PROPERTY_ID_START + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_ID_START + +
+
Constant: uc_property_t UC_PROPERTY_ID_CONTINUE + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE + +
+
Constant: uc_property_t UC_PROPERTY_XID_START + +
+
Constant: uc_property_t UC_PROPERTY_XID_CONTINUE + +
+
Constant: uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE + +
+
Constant: uc_property_t UC_PROPERTY_PATTERN_SYNTAX + +
+
+ +

The following properties have an influence on shaping and rendering. +

+
+
Constant: uc_property_t UC_PROPERTY_JOIN_CONTROL + +
+
Constant: uc_property_t UC_PROPERTY_GRAPHEME_BASE + +
+
Constant: uc_property_t UC_PROPERTY_GRAPHEME_EXTEND + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND + +
+
Constant: uc_property_t UC_PROPERTY_GRAPHEME_LINK + +
+
+ +

The following properties relate to bidirectional reordering. +

+
+
Constant: uc_property_t UC_PROPERTY_BIDI_CONTROL + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_WHITESPACE + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_PDF + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE + +
+
Constant: uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL + +
+
+ +

The following properties deal with number representations. +

+
+
Constant: uc_property_t UC_PROPERTY_HEX_DIGIT + +
+
Constant: uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT + +
+
+ +

The following properties deal with CJK. +

+
+
Constant: uc_property_t UC_PROPERTY_IDEOGRAPHIC + +
+
Constant: uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH + +
+
Constant: uc_property_t UC_PROPERTY_RADICAL + +
+
Constant: uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR + +
+
Constant: uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR + +
+
+ +

Other miscellaneous properties are: +

+
+
Constant: uc_property_t UC_PROPERTY_ZERO_WIDTH + +
+
Constant: uc_property_t UC_PROPERTY_SPACE + +
+
Constant: uc_property_t UC_PROPERTY_NON_BREAK + +
+
Constant: uc_property_t UC_PROPERTY_ISO_CONTROL + +
+
Constant: uc_property_t UC_PROPERTY_FORMAT_CONTROL + +
+
Constant: uc_property_t UC_PROPERTY_DASH + +
+
Constant: uc_property_t UC_PROPERTY_HYPHEN + +
+
Constant: uc_property_t UC_PROPERTY_PUNCTUATION + +
+
Constant: uc_property_t UC_PROPERTY_LINE_SEPARATOR + +
+
Constant: uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR + +
+
Constant: uc_property_t UC_PROPERTY_QUOTATION_MARK + +
+
Constant: uc_property_t UC_PROPERTY_SENTENCE_TERMINAL + +
+
Constant: uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION + +
+
Constant: uc_property_t UC_PROPERTY_CURRENCY_SYMBOL + +
+
Constant: uc_property_t UC_PROPERTY_MATH + +
+
Constant: uc_property_t UC_PROPERTY_OTHER_MATH + +
+
Constant: uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION + +
+
Constant: uc_property_t UC_PROPERTY_LEFT_OF_PAIR + +
+
Constant: uc_property_t UC_PROPERTY_COMBINING + +
+
Constant: uc_property_t UC_PROPERTY_COMPOSITE + +
+
Constant: uc_property_t UC_PROPERTY_DECIMAL_DIGIT + +
+
Constant: uc_property_t UC_PROPERTY_NUMERIC + +
+
Constant: uc_property_t UC_PROPERTY_DIACRITIC + +
+
Constant: uc_property_t UC_PROPERTY_EXTENDER + +
+
Constant: uc_property_t UC_PROPERTY_IGNORABLE_CONTROL + +
+
+ +

The following function looks up a property by its name. +

+
+
Function: uc_property_t uc_property_byname (const char *property_name) + +
+

Returns the property given by name, e.g. "White space". If a property +with the given name exists, the result will satisfy the +uc_property_is_valid predicate. Otherwise the result will not satisfy +this predicate and must not be passed to functions that expect an +uc_property_t argument. +

+

This function references a big table of all predefined properties. Its use +can significantly increase the size of your application. +

+ +
+
Function: bool uc_property_is_valid (uc_property_t property) + +
+

Returns true when the given property is valid, or false +otherwise. +

+ +

The following function views a property as a set of Unicode characters. +

+
+
Function: bool uc_is_property (ucs4_t uc, uc_property_t property) + +
+

Tests whether the Unicode character uc has the given property. +

+ +
+ + +

8.8.2 Properties as functions – the functional API

+ +

The following are general properties. +

+
+
Function: bool uc_is_property_white_space (ucs4_t uc) + +
+
Function: bool uc_is_property_alphabetic (ucs4_t uc) + +
+
Function: bool uc_is_property_other_alphabetic (ucs4_t uc) + +
+
Function: bool uc_is_property_not_a_character (ucs4_t uc) + +
+
Function: bool uc_is_property_default_ignorable_code_point (ucs4_t uc) + +
+
Function: bool uc_is_property_other_default_ignorable_code_point (ucs4_t uc) + +
+
Function: bool uc_is_property_deprecated (ucs4_t uc) + +
+
Function: bool uc_is_property_logical_order_exception (ucs4_t uc) + +
+
Function: bool uc_is_property_variation_selector (ucs4_t uc) + +
+
Function: bool uc_is_property_private_use (ucs4_t uc) + +
+
Function: bool uc_is_property_unassigned_code_value (ucs4_t uc) + +
+
+ +

The following properties are related to case folding. +

+
+
Function: bool uc_is_property_uppercase (ucs4_t uc) + +
+
Function: bool uc_is_property_other_uppercase (ucs4_t uc) + +
+
Function: bool uc_is_property_lowercase (ucs4_t uc) + +
+
Function: bool uc_is_property_other_lowercase (ucs4_t uc) + +
+
Function: bool uc_is_property_titlecase (ucs4_t uc) + +
+
Function: bool uc_is_property_soft_dotted (ucs4_t uc) + +
+
+ +

The following properties are related to identifiers. +

+
+
Function: bool uc_is_property_id_start (ucs4_t uc) + +
+
Function: bool uc_is_property_other_id_start (ucs4_t uc) + +
+
Function: bool uc_is_property_id_continue (ucs4_t uc) + +
+
Function: bool uc_is_property_other_id_continue (ucs4_t uc) + +
+
Function: bool uc_is_property_xid_start (ucs4_t uc) + +
+
Function: bool uc_is_property_xid_continue (ucs4_t uc) + +
+
Function: bool uc_is_property_pattern_white_space (ucs4_t uc) + +
+
Function: bool uc_is_property_pattern_syntax (ucs4_t uc) + +
+
+ +

The following properties have an influence on shaping and rendering. +

+
+
Function: bool uc_is_property_join_control (ucs4_t uc) + +
+
Function: bool uc_is_property_grapheme_base (ucs4_t uc) + +
+
Function: bool uc_is_property_grapheme_extend (ucs4_t uc) + +
+
Function: bool uc_is_property_other_grapheme_extend (ucs4_t uc) + +
+
Function: bool uc_is_property_grapheme_link (ucs4_t uc) + +
+
+ +

The following properties relate to bidirectional reordering. +

+
+
Function: bool uc_is_property_bidi_control (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_left_to_right (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_arabic_right_to_left (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_european_digit (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_eur_num_separator (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_eur_num_terminator (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_arabic_digit (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_common_separator (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_block_separator (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_segment_separator (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_whitespace (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_non_spacing_mark (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_boundary_neutral (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_pdf (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_embedding_or_override (ucs4_t uc) + +
+
Function: bool uc_is_property_bidi_other_neutral (ucs4_t uc) + +
+
+ +

The following properties deal with number representations. +

+
+
Function: bool uc_is_property_hex_digit (ucs4_t uc) + +
+
Function: bool uc_is_property_ascii_hex_digit (ucs4_t uc) + +
+
+ +

The following properties deal with CJK. +

+
+
Function: bool uc_is_property_ideographic (ucs4_t uc) + +
+
Function: bool uc_is_property_unified_ideograph (ucs4_t uc) + +
+
Function: bool uc_is_property_radical (ucs4_t uc) + +
+
Function: bool uc_is_property_ids_binary_operator (ucs4_t uc) + +
+
Function: bool uc_is_property_ids_trinary_operator (ucs4_t uc) + +
+
+ +

Other miscellaneous properties are: +

+
+
Function: bool uc_is_property_zero_width (ucs4_t uc) + +
+
Function: bool uc_is_property_space (ucs4_t uc) + +
+
Function: bool uc_is_property_non_break (ucs4_t uc) + +
+
Function: bool uc_is_property_iso_control (ucs4_t uc) + +
+
Function: bool uc_is_property_format_control (ucs4_t uc) + +
+
Function: bool uc_is_property_dash (ucs4_t uc) + +
+
Function: bool uc_is_property_hyphen (ucs4_t uc) + +
+
Function: bool uc_is_property_punctuation (ucs4_t uc) + +
+
Function: bool uc_is_property_line_separator (ucs4_t uc) + +
+
Function: bool uc_is_property_paragraph_separator (ucs4_t uc) + +
+
Function: bool uc_is_property_quotation_mark (ucs4_t uc) + +
+
Function: bool uc_is_property_sentence_terminal (ucs4_t uc) + +
+
Function: bool uc_is_property_terminal_punctuation (ucs4_t uc) + +
+
Function: bool uc_is_property_currency_symbol (ucs4_t uc) + +
+
Function: bool uc_is_property_math (ucs4_t uc) + +
+
Function: bool uc_is_property_other_math (ucs4_t uc) + +
+
Function: bool uc_is_property_paired_punctuation (ucs4_t uc) + +
+
Function: bool uc_is_property_left_of_pair (ucs4_t uc) + +
+
Function: bool uc_is_property_combining (ucs4_t uc) + +
+
Function: bool uc_is_property_composite (ucs4_t uc) + +
+
Function: bool uc_is_property_decimal_digit (ucs4_t uc) + +
+
Function: bool uc_is_property_numeric (ucs4_t uc) + +
+
Function: bool uc_is_property_diacritic (ucs4_t uc) + +
+
Function: bool uc_is_property_extender (ucs4_t uc) + +
+
Function: bool uc_is_property_ignorable_control (ucs4_t uc) + +
+
+ +
+ + +

8.9 Scripts

+ +

The Unicode characters are subdivided into scripts. +

+

The following type is used to represent a script: +

+
+
Type: uc_script_t + +
+

This data type is a structure type that refers to statically allocated +read-only data. It contains the following fields: +

 
const char *name;
+
+ +

The name field contains the name of the script. +

+ + +

The following functions look up a script. +

+
+
Function: const uc_script_t * uc_script (ucs4_t uc) + +
+

Returns the script of a Unicode character. Returns NULL if uc does not +belong to any script. +

+ +
+
Function: const uc_script_t * uc_script_byname (const char *script_name) + +
+

Returns the script given by its name, e.g. "HAN". Returns NULL if a +script with the given name does not exist. +

+ +

The following function views a script as a set of Unicode characters. +

+
+
Function: bool uc_is_script (ucs4_t uc, const uc_script_t *script) + +
+

Tests whether a Unicode character belongs to a given script. +

+ +

The following gives a global picture of all scripts. +

+
+
Function: void uc_all_scripts (const uc_script_t **scripts, size_t *count) + +
+

Get the list of all scripts. Stores a pointer to an array of all scripts in +*scripts and the length of this array in *count. +

+ +
+ + +

8.10 Blocks

+ +

The Unicode characters are subdivided into blocks. A block is an interval of +Unicode code points. +

+

The following type is used to represent a block. +

+
+
Type: uc_block_t + +
+

This data type is a structure type that refers to statically allocated data. +It contains the following fields: +

 
ucs4_t start;
+ucs4_t end;
+const char *name;
+
+ +

The start field is the first Unicode code point in the block. +

+

The end field is the last Unicode code point in the block. +

+

The name field is the name of the block. +

+ + +

The following function looks up a block. +

+
+
Function: const uc_block_t * uc_block (ucs4_t uc) + +
+

Returns the block a character belongs to. +

+ +

The following function views a block as a set of Unicode characters. +

+
+
Function: bool uc_is_block (ucs4_t uc, const uc_block_t *block) + +
+

Tests whether a Unicode character belongs to a given block. +

+ +

The following gives a global picture of all block. +

+
+
Function: void uc_all_blocks (const uc_block_t **blocks, size_t *count) + +
+

Get the list of all blocks. Stores a pointer to an array of all blocks in +*blocks and the length of this array in *count. +

+ +
+ + +

8.11 ISO C and Java syntax

+ +

The following properties are taken from language standards. The supported +language standards are ISO C 99 and Java. +

+
+
Function: bool uc_is_c_whitespace (ucs4_t uc) + +
+

Tests whether a Unicode character is considered whitespace in ISO C 99. +

+ +
+
Function: bool uc_is_java_whitespace (ucs4_t uc) + +
+

Tests whether a Unicode character is considered whitespace in Java. +

+ +

The following enumerated values are the possible return values of the functions +uc_c_ident_category and uc_java_ident_category. +

+
+
Constant: int UC_IDENTIFIER_START + +
+

This return value means that the given character is valid as first or +subsequent character in an identifier. +

+ +
+
Constant: int UC_IDENTIFIER_VALID + +
+

This return value means that the given character is valid as subsequent +character only. +

+ +
+
Constant: int UC_IDENTIFIER_INVALID + +
+

This return value means that the given character is not valid in an identifier. +

+ +
+
Constant: int UC_IDENTIFIER_IGNORABLE + +
+

This return value (only for Java) means that the given character is ignorable. +

+ +

The following function determine whether a given character can be a constituent +of an identifier in the given programming language. +

+ +
+
Function: int uc_c_ident_category (ucs4_t uc) + +
+

Returns the categorization of a Unicode character with respect to the ISO C 99 +identifier syntax. +

+ + +
+
Function: int uc_java_ident_category (ucs4_t uc) + +
+

Returns the categorization of a Unicode character with respect to the Java +identifier syntax. +

+ +
+ + +

8.12 Classifications like in ISO C

+ +

The following character classifications mimic those declared in the ISO C +header files <ctype.h> and <wctype.h>. These functions are +deprecated, because this set of functions was designed with ASCII in mind and +cannot reflect the more diverse reality of the Unicode character set. But +they can be a quick-and-dirty porting aid when migrating from wchar_t +APIs to Unicode strings. +

+
+
Function: bool uc_is_alnum (ucs4_t uc) + +
+

Tests for any character for which uc_is_alpha or uc_is_digit is +true. +

+ +
+
Function: bool uc_is_alpha (ucs4_t uc) + +
+

Tests for any character for which uc_is_upper or uc_is_lower is +true, or any character that is one of a locale-specific set of characters for +which none of uc_is_cntrl, uc_is_digit, uc_is_punct, or +uc_is_space is true. +

+ +
+
Function: bool uc_is_cntrl (ucs4_t uc) + +
+

Tests for any control character. +

+ +
+
Function: bool uc_is_digit (ucs4_t uc) + +
+

Tests for any character that corresponds to a decimal-digit character. +

+ +
+
Function: bool uc_is_graph (ucs4_t uc) + +
+

Tests for any character for which uc_is_print is true and +uc_is_space is false. +

+ +
+
Function: bool uc_is_lower (ucs4_t uc) + +
+

Tests for any character that corresponds to a lowercase letter or is one +of a locale-specific set of characters for which none of uc_is_cntrl, +uc_is_digit, uc_is_punct, or uc_is_space is true. +

+ +
+
Function: bool uc_is_print (ucs4_t uc) + +
+

Tests for any printing character. +

+ +
+
Function: bool uc_is_punct (ucs4_t uc) + +
+

Tests for any printing character that is one of a locale-specific set of +characters for which neither uc_is_space nor uc_is_alnum is true. +

+ +
+
Function: bool uc_is_space (ucs4_t uc) + +
+

Test for any character that corresponds to a locale-specific set of characters +for which none of uc_is_alnum, uc_is_graph, or uc_is_punct +is true. +

+ +
+
Function: bool uc_is_upper (ucs4_t uc) + +
+

Tests for any character that corresponds to an uppercase letter or is one +of a locale-specific set of characters for which none of uc_is_cntrl, +uc_is_digit, uc_is_punct, or uc_is_space is true. +

+ +
+
Function: bool uc_is_xdigit (ucs4_t uc) + +
+

Tests for any character that corresponds to a hexadecimal-digit character. +

+ +
+
Function: bool uc_is_blank (ucs4_t uc) + +
+

Tests for any character that corresponds to a standard blank character or +a locale-specific set of characters for which uc_is_alnum is false. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_9.html b/doc/libunistring_9.html new file mode 100644 index 00000000..0586e0f2 --- /dev/null +++ b/doc/libunistring_9.html @@ -0,0 +1,141 @@ + + + + + +GNU libunistring: 9. Display width <uniwidth.h> + + + + + + + + + + + + + + + + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+ +
+ + +

9. Display width <uniwidth.h>

+ +

This include file declares functions that return the display width, measured +in columns, of characters or strings, when output to a device that uses +non-proportional fonts. +

+ +

Note that for some rarely used characters the actual fonts or terminal +emulators can use a different width. There is no mechanism for communicating +the display width of characters across a Unix pseudo-terminal (tty). Also, +there are scripts with complex rendering, like the Indic scripts. For these +scripts, there is no such concept as non-proportional fonts. Therefore +the results of these functions usually work fine on most scripts and on +most characters but can fail to represent the actual display width. +

+

These functions are locale dependent. The encoding argument identifies +the encoding (e.g. "ISO-8859-2" for Polish). +

+ + + +
+
Function: int uc_width (ucs4_t uc, const char *encoding) + +
+

Determines and returns the number of column positions required for uc. +Returns -1 if uc is a control character that has an influence on the +column position when output. +

+ +
+
Function: int u8_width (const uint8_t *s, size_t n, const char *encoding) + +
+
Function: int u16_width (const uint16_t *s, size_t n, const char *encoding) + +
+
Function: int u32_width (const uint32_t *s, size_t n, const char *encoding) + +
+

Determines and returns the number of column positions required for first +n units (or fewer if s ends before this) in s. This +function ignores control characters in the string. +

+ +
+
Function: int u8_strwidth (const uint8_t *s, const char *encoding) + +
+
Function: int u16_strwidth (const uint16_t *s, const char *encoding) + +
+
Function: int u32_strwidth (const uint32_t *s, const char *encoding) + +
+

Determines and returns the number of column positions required for s. +This function ignores control characters in the string. +

+
+ + + + + + + + + + + + +
[ << ][ >> ]           [Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_abt.html b/doc/libunistring_abt.html new file mode 100644 index 00000000..47d71bac --- /dev/null +++ b/doc/libunistring_abt.html @@ -0,0 +1,167 @@ + + + + + +GNU libunistring: About This Document + + + + + + + + + + + + + + + + + + + + +
[Top][Contents][Index][ ? ]
+

About This Document

+

+ This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. +

+

+ The buttons in the navigation panels have the following meaning: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Button Name Go to From 1.2.3 go to
[ < ] BackPrevious section in reading order1.2.2
[ > ] ForwardNext section in reading order1.2.4
[ << ] FastBackBeginning of this chapter or previous chapter1
[ Up ] UpUp section1.2
[ >> ] FastForwardNext chapter2
[Top] TopCover (top) of document  
[Contents] ContentsTable of contents  
[Index] IndexIndex  
[ ? ] AboutAbout (help)  
+ +

+ where the Example assumes that the current position is at Subsubsection One-Two-Three of a document of the following structure: +

+ +
    +
  • 1. Section One +
      +
    • 1.1 Subsection One-One +
        +
      • ...
      • +
      +
    • +
    • 1.2 Subsection One-Two +
        +
      • 1.2.1 Subsubsection One-Two-One
      • +
      • 1.2.2 Subsubsection One-Two-Two
      • +
      • 1.2.3 Subsubsection One-Two-Three     + <== Current Position
      • +
      • 1.2.4 Subsubsection One-Two-Four
      • +
      +
    • +
    • 1.3 Subsection One-Three +
        +
      • ...
      • +
      +
    • +
    • 1.4 Subsection One-Four
    • +
    +
  • +
+ +
+ + + + + +
[Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/libunistring_toc.html b/doc/libunistring_toc.html new file mode 100644 index 00000000..e7e4e51e --- /dev/null +++ b/doc/libunistring_toc.html @@ -0,0 +1,164 @@ + + + + + +GNU libunistring: GNU libunistring + + + + + + + + + + + + + + + + + + + +
[Top][Contents][Index][ ? ]
+

GNU libunistring

+ +

Table of Contents

+
+ + +
+ + + + + +
+ + + + + +
[Top][Contents][Index][ ? ]
+

+ + This document was generated by Bruno Haible on July, 1 2009 using texi2html 1.78a. + +
+ +

+ + diff --git a/doc/stamp-vti b/doc/stamp-vti new file mode 100644 index 00000000..ee4ba906 --- /dev/null +++ b/doc/stamp-vti @@ -0,0 +1,4 @@ +@set UPDATED 29 June 2009 +@set UPDATED-MONTH June 2009 +@set EDITION 0.9.1 +@set VERSION 0.9.1 diff --git a/doc/unicase.texi b/doc/unicase.texi new file mode 100644 index 00000000..14b46be0 --- /dev/null +++ b/doc/unicase.texi @@ -0,0 +1,364 @@ +@node unicase.h +@chapter Case mappings @code{} + +This include file defines functions for case mapping for Unicode strings and +case insensitive comparison of Unicode strings and C strings. + +These string functions fix the problems that were mentioned in +@ref{char * strings}, namely, they handle the Croatian +@sc{LETTER DZ WITH CARON}, the German @sc{LATIN SMALL LETTER SHARP S}, the +Greek sigma and the Lithuanian i correctly. + +@menu +* Case mappings of characters:: +* Case mappings of strings:: +* Case mappings of substrings:: +* Case insensitive comparison:: +* Case detection:: +@end menu + +@node Case mappings of characters +@section Case mappings of characters + +@cindex Unicode character, case mappings +The following functions implement case mappings on Unicode characters --- +for those cases only where the result of the mapping is a again a single +Unicode character. + +These mappings are locale and context independent. + +@cartouche +@strong{WARNING!} These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that treat an +entire string at once and are language aware. +@end cartouche + +@deftypefun ucs4_t uc_toupper (ucs4_t @var{uc}) +Returns the uppercase mapping of the Unicode character @var{uc}. +@end deftypefun + +@deftypefun ucs4_t uc_tolower (ucs4_t @var{uc}) +Returns the lowercase mapping of the Unicode character @var{uc}. +@end deftypefun + +@deftypefun ucs4_t uc_totitle (ucs4_t @var{uc}) +Returns the titlecase mapping of the Unicode character @var{uc}. + +The titlecase mapping of a character is to be used when the character should +look like upper case and the following characters are lower cased. + +For most characters, this is the same as the uppercase mapping. There are +only few characters where the title case variant and the uuper case variant +are different. These characters occur in the Latin writing of the Croatian, +Bosnian, and Serbian languages. + +@c Normally we would use .33 space for each column, but this is too much in +@c TeX mode, see +@c . +@multitable @columnfractions .31 .31 .31 +@headitem Lower case @tab Title case @tab Upper case +@item LATIN SMALL LETTER LJ + @tab LATIN CAPITAL LETTER L WITH SMALL LETTER J + @tab LATIN CAPITAL LETTER LJ +@item LATIN SMALL LETTER NJ + @tab LATIN CAPITAL LETTER N WITH SMALL LETTER J + @tab LATIN CAPITAL LETTER NJ +@item LATIN SMALL LETTER DZ + @tab LATIN CAPITAL LETTER D WITH SMALL LETTER Z + @tab LATIN CAPITAL LETTER DZ +@item LATIN SMALL LETTER DZ WITH CARON + @tab LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + @tab LATIN CAPITAL LETTER DZ WITH CARON +@end multitable +@end deftypefun + +@node Case mappings of strings +@section Case mappings of strings + +@cindex case mappings +@cindex uppercasing +@cindex lowercasing +@cindex titlecasing +Case mapping should always be performed on entire strings, not on individual +characters. The functions in this sections do so. + +These functions allow to apply a normalization after the case mapping. The +reason is that if you want to treat @samp{@"{a}} and @samp{@"{A}} the same, +you most often also want to treat the composed and decomposed forms of such +a character, U+00C4 @sc{LATIN CAPITAL LETTER A WITH DIAERESIS} and +U+0041 @sc{LATIN CAPITAL LETTER A} U+0308 @sc{COMBINING DIAERESIS} the same. +The @var{nf} argument designates the normalization. + +@cindex locale language +These functions are locale dependent. The @var{iso639_language} argument +identifies the language (e.g. @code{"tr"} for Turkish). NULL means to use +locale independent case mappings. + +@deftypefun {const char *} uc_locale_language () +Returns the ISO 639 language code of the current locale. +Returns @code{""} if it is unknown, or in the "C" locale. +@end deftypefun + +@deftypefun {uint8_t *} u8_toupper (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_toupper (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_toupper (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the uppercase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_tolower (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_tolower (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_tolower (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the lowercase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_totitle (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_totitle (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_totitle (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the titlecase mapping of a string. + +Mapping to title case means that, in each word, the first cased character +is being mapped to title case and the remaining characters of the word +are being mapped to lower case. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@node Case mappings of substrings +@section Case mappings of substrings + +Case mapping of a substring cannot simply be performed by extracting the +substring and then applying the case mapping function to it. This does not +work because case mapping requires some information about the surrounding +characters. The following functions allow to apply case mappings to +substrings of a given string, while taking into account the characters that +precede it (the ``prefix'') and the characters that follow it (the ``suffix''). + +@deftp Type casing_prefix_context_t +This data type denotes the case-mapping context that is given by a prefix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +@end deftp + +@deftypevr Constant casing_prefix_context_t unicase_empty_prefix_context +This constant is the case-mapping context that corresponds to an empty prefix +string. +@end deftypevr + +The following functions return @code{casing_prefix_context_t} objects: + +@deftypefun casing_prefix_context_t u8_casing_prefix_context (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx casing_prefix_context_t u16_casing_prefix_context (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx casing_prefix_context_t u32_casing_prefix_context (const uint32_t *@var{s}, size_t @var{n}) +Returns the case-mapping context of a given prefix string. +@end deftypefun + +@deftypefun casing_prefix_context_t u8_casing_prefixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +@deftypefunx casing_prefix_context_t u16_casing_prefixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +@deftypefunx casing_prefix_context_t u32_casing_prefixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +Returns the case-mapping context of the prefix concat(@var{a}, @var{s}), +given the case-mapping context of the prefix @var{a}. +@end deftypefun + +@deftp Type casing_suffix_context_t +This data type denotes the case-mapping context that is given by a suffix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +@end deftp + +@deftypevr Constant casing_suffix_context_t unicase_empty_suffix_context +This constant is the case-mapping context that corresponds to an empty suffix +string. +@end deftypevr + +The following functions return @code{casing_suffix_context_t} objects: + +@deftypefun casing_suffix_context_t u8_casing_suffix_context (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx casing_suffix_context_t u16_casing_suffix_context (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx casing_suffix_context_t u32_casing_suffix_context (const uint32_t *@var{s}, size_t @var{n}) +Returns the case-mapping context of a given suffix string. +@end deftypefun + +@deftypefun casing_suffix_context_t u8_casing_suffixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +@deftypefunx casing_suffix_context_t u16_casing_suffixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +@deftypefunx casing_suffix_context_t u32_casing_suffixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +Returns the case-mapping context of the suffix concat(@var{s}, @var{a}), +given the case-mapping context of the suffix @var{a}. +@end deftypefun + +The following functions perform a case mapping, considering the +prefix context and the suffix context. + +@deftypefun {uint8_t *} u8_ct_toupper (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_toupper (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_toupper (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the uppercase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_tolower (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_tolower (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_tolower (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the lowercase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_totitle (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_totitle (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_totitle (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the titlecase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +For example, to uppercase the UTF-8 substring between @code{s + start_index} +and @code{s + end_index} of a string that extends from @code{s} to +@code{s + u8_strlen (s)}, you can use the statements + +@smallexample +size_t result_length; +uint8_t result = + u8_ct_toupper (s + start_index, end_index - start_index, + u8_casing_prefix_context (s, start_index), + u8_casing_suffix_context (s + end_index, + u8_strlen (s) - end_index), + iso639_language, NULL, NULL, &result_length); +@end smallexample + +@node Case insensitive comparison +@section Case insensitive comparison + +@cindex comparing, ignoring case +@cindex comparing, ignoring normalization and case +The following functions implement comparison that ignores differences in case +and normalization. + +@deftypefun {uint8_t *} u8_casefold (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_casefold (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_casefold (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the case folded string. + +Comparing @code{u8_casefold (@var{s1})} and @code{u8_casefold (@var{s2})} +with the @code{u8_cmp2} function is equivalent to comparing @var{s1} and +@var{s2} with @code{u8_casecmp}. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_casefold (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_casefold (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_casefold (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the case folded string. The case folding takes into account the +case mapping contexts of the prefix and suffix strings. +@end deftypefun + +@deftypefun int u8_casecmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_casecmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_casecmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int ulc_casecmp (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in case and normalization. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@cindex comparing, ignoring case, with collation rules +@cindex comparing, with collation rules, ignoring case +@cindex comparing, ignoring normalization and case, with collation rules +@cindex comparing, with collation rules, ignoring normalization and case +The following functions additionally take into account the sorting rules of the +current locale. + +@deftypefun {char *} u8_casexfrm (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_casexfrm (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_casexfrm (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} ulc_casexfrm (const char *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts the string @var{s} of length @var{n} to a NUL-terminated byte +sequence, in such a way that comparing @code{u8_casexfrm (@var{s1})} and +@code{u8_casexfrm (@var{s2})} with the gnulib function @code{memcmp2} is +equivalent to comparing @var{s1} and @var{s2} with @code{u8_casecoll}. + +@var{nf} must be either @code{UNINORM_NFC}, @code{UNINORM_NFKC}, or NULL for +no normalization. +@end deftypefun + +@deftypefun int u8_casecoll (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_casecoll (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_casecoll (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int ulc_casecoll (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in case and normalization, +using the collation rules of the current locale. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. +It can also be NULL, for no normalization. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@node Case detection +@section Case detection + +@cindex case detection +@cindex detecting case +The following functions determine whether a Unicode string is entirely in +upper case. or entirely in lower case, or entirely in title case, or already +case-folded. + +@deftypefun int u8_is_uppercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_uppercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_uppercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to upper case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_lowercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_lowercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_lowercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to lower case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_titlecase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_titlecase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_titlecase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to title case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_casefolded (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_casefolded (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_casefolded (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if applying case folding to NFD(@var{S}) is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +The following functions determine whether case mappings have any effect on a +Unicode string. + +@deftypefun int u8_is_cased (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_cased (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_cased (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if case matters for @var{s}, that is, if +mapping NFD(@var{s}) to either upper case or lower case or title case is not +a no-op. Set @code{*@var{resultp}} to false if NFD(@var{s}) maps to itself +under the upper case mapping, under the lower case mapping, and under the title +case mapping; in other words, when NFD(@var{s}) consists entirely of caseless +characters. Upon failure, returns -1 with @code{errno} set. +@end deftypefun diff --git a/doc/uniconv.texi b/doc/uniconv.texi new file mode 100644 index 00000000..07cfa1be --- /dev/null +++ b/doc/uniconv.texi @@ -0,0 +1,157 @@ +@node uniconv.h +@chapter Conversions between Unicode and encodings @code{} + +This include file declares functions for converting between Unicode strings +and @code{char *} strings in locale encoding or in other specified encodings. + +@cindex locale encoding +The following function returns the locale encoding. + +@deftypefun {const char *} locale_charset () +Determines the current locale's character encoding, and canonicalizes it +into one of the canonical names listed in @file{config.charset}. +If the canonical name cannot be determined, the result is a non-canonical +name. + +The result must not be freed; it is statically allocated. + +The result of this function can be used as an argument to the @code{iconv_open} +function in GNU libc, in GNU libiconv, or in the gnulib provided wrapper +around the native @code{iconv_open} function. It may not work as an argument +to the native @code{iconv_open} function directly. +@end deftypefun + +The handling of unconvertible characters during the conversions can be +parametrized through the following enumeration type: + +@deftp Type {enum iconv_ilseq_handler} +This type specifies how unconvertible characters in the input are handled. +@end deftp + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_error +This handler causes the function to return with @code{errno} set to +@code{EILSEQ}. +@end deftypevr + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_question_mark +This handler produces one question mark @samp{?} per unconvertible character. +@end deftypevr + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_escape_sequence +This handler produces an escape sequence @code{\u@var{xxxx}} or +@code{\U@var{xxxxxxxx}} for each unconvertible character. +@end deftypevr + +@cindex converting +The following functions convert between strings in a specified encoding and +Unicode strings. + +@deftypefun {uint8_t *} u8_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an entire string, possibly including NUL bytes, from one encoding +to UTF-8 encoding. + +Converts a memory region given in encoding @var{fromcode}. @var{fromcode} is +as for the @code{iconv_open} function. + +The input is in the memory region between @var{src} (inclusive) and +@code{@var{src} + @var{srclen}} (exclusive). + +If @var{offsets} is not NULL, it should point to an array of @var{srclen} +integers; this array is filled with offsets into the result, i.e@. the +character starting at @code{@var{src}[i]} corresponds to the character starting +at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to +@code{(size_t)(-1)}. + +@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch +buffer and its size, or @code{@var{resultbuf}} can be NULL. + +May erase the contents of the memory at @code{@var{resultbuf}}. + +If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in @code{*@var{lengthp}}. The resulting string is +@code{@var{resultbuf}} if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. + +In case of error: NULL is returned and @code{errno} is set. +Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint8_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint16_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint32_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts an entire Unicode string, possibly including NUL units, from UTF-8 +encoding to a given encoding. + +Converts a memory region to encoding @var{tocode}. @var{tocode} is as for +the @code{iconv_open} function. + +The input is in the memory region between @var{src} (inclusive) and +@code{@var{src} + @var{srclen}} (exclusive). + +If @var{offsets} is not NULL, it should point to an array of @var{srclen} +integers; this array is filled with offsets into the result, i.e@. the +character starting at @code{@var{src}[i]} corresponds to the character starting +at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to +@code{(size_t)(-1)}. + +@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch +buffer and its size, or @code{@var{resultbuf}} can be NULL. + +May erase the contents of the memory at @code{@var{resultbuf}}. + +If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in @code{*@var{lengthp}}. The resulting string is +@code{@var{resultbuf}} if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. + +In case of error: NULL is returned and @code{errno} is set. +Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +The following functions convert between NUL terminated strings in a specified +encoding and NUL terminated Unicode strings. + +@deftypefun {uint8_t *} u8_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {uint16_t *} u16_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {uint32_t *} u32_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +Converts a NUL terminated string from a given encoding. + +The result is @code{malloc} allocated, or NULL (with @var{errno} set) in case of error. + +Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_strconv_to_encoding (const uint8_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {char *} u16_strconv_to_encoding (const uint16_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {char *} u32_strconv_to_encoding (const uint32_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +Converts a NUL terminated string to a given encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +The following functions are shorthands that convert between NUL terminated +strings in locale encoding and NUL terminated Unicode strings. + +@deftypefun {uint8_t *} u8_strconv_from_locale (const char *@var{string}) +@deftypefunx {uint16_t *} u16_strconv_from_locale (const char *@var{string}) +@deftypefunx {uint32_t *} u32_strconv_from_locale (const char *@var{string}) +Converts a NUL terminated string from the locale encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_strconv_to_locale (const uint8_t *@var{string}) +@deftypefunx {char *} u16_strconv_to_locale (const uint16_t *@var{string}) +@deftypefunx {char *} u32_strconv_to_locale (const uint32_t *@var{string}) +Converts a NUL terminated string to the locale encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{ENOMEM}. +@end deftypefun diff --git a/doc/unictype.texi b/doc/unictype.texi new file mode 100644 index 00000000..129159c7 --- /dev/null +++ b/doc/unictype.texi @@ -0,0 +1,1145 @@ +@node unictype.h +@chapter Unicode character classification and properties @code{} + +This include file declares functions that classify Unicode characters +and that test whether Unicode characters have specific properties. + +The classification assigns a ``general category'' to every Unicode +character. This is similar to the classification provided by ISO C in +@code{}. + +Properties are the data that guides various text processing algorithms +in the presence of specific Unicode characters. + +@menu +* General category:: +* Canonical combining class:: +* Bidirectional category:: +* Decimal digit value:: +* Digit value:: +* Numeric value:: +* Mirrored character:: +* Properties:: +* Scripts:: +* Blocks:: +* ISO C and Java syntax:: +* Classifications like in ISO C:: +@end menu + +@node General category +@section General category + +@cindex general category +@cindex Unicode character, general category +@cindex Unicode character, classification +Every Unicode character or code point has a @emph{general category} assigned +to it. This classification is important for most algorithms that work on +Unicode text. + +The GNU libunistring library provides two kinds of API for working with +general categories. The object oriented API uses a variable to denote +every predefined general category value or combinations thereof. The +low-level API uses a bit mask instead. The advantage of the object oriented +API is that if only a few predefined general category values are used, +the data tables are relatively small. When you combine general category +values (using @code{uc_general_category_or}, @code{uc_general_category_and}, +or @code{uc_general_category_and_not}), or when you use the low level +bit masks, a big table is used thats holds the complete general category +information for all Unicode characters. + +@menu +* Object oriented API:: +* Bit mask API:: +@end menu + +@node Object oriented API +@subsection The object oriented API for general category + +@deftp Type uc_general_category_t +This data type denotes a general category value. It is an immediate type that +can be copied by simple assignment, without involving memory allocation. It is +not an array type. +@end deftp + +The following are the predefined general category value. Additional general +categories may be added in the future. + +@deftypevr Constant uc_general_category_t UC_CATEGORY_L +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lu +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Ll +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lt +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lm +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Lo +@deftypevrx Constant uc_general_category_t UC_CATEGORY_M +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Mn +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Mc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Me +@deftypevrx Constant uc_general_category_t UC_CATEGORY_N +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Nd +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Nl +@deftypevrx Constant uc_general_category_t UC_CATEGORY_No +@deftypevrx Constant uc_general_category_t UC_CATEGORY_P +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pd +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Ps +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pe +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pi +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Pf +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Po +@deftypevrx Constant uc_general_category_t UC_CATEGORY_S +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Sm +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Sc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Sk +@deftypevrx Constant uc_general_category_t UC_CATEGORY_So +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Z +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Zs +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Zl +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Zp +@deftypevrx Constant uc_general_category_t UC_CATEGORY_C +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cc +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cf +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cs +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Co +@deftypevrx Constant uc_general_category_t UC_CATEGORY_Cn +@end deftypevr + +The following are alias names for predefined General category values. + +@deftypevr Macro uc_general_category_t UC_LETTER +This is another name for @code{UC_CATEGORY_L}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_UPPERCASE_LETTER +This is another name for @code{UC_CATEGORY_Lu}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_LOWERCASE_LETTER +This is another name for @code{UC_CATEGORY_Ll}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_TITLECASE_LETTER +This is another name for @code{UC_CATEGORY_Lt}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MODIFIER_LETTER +This is another name for @code{UC_CATEGORY_Lm}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_LETTER +This is another name for @code{UC_CATEGORY_Lo}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MARK +This is another name for @code{UC_CATEGORY_M}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_NON_SPACING_MARK +This is another name for @code{UC_CATEGORY_Mn}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_COMBINING_SPACING_MARK +This is another name for @code{UC_CATEGORY_Mc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_ENCLOSING_MARK +This is another name for @code{UC_CATEGORY_Me}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_NUMBER +This is another name for @code{UC_CATEGORY_N}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_DECIMAL_DIGIT_NUMBER +This is another name for @code{UC_CATEGORY_Nd}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_LETTER_NUMBER +This is another name for @code{UC_CATEGORY_Nl}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_NUMBER +This is another name for @code{UC_CATEGORY_No}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_PUNCTUATION +This is another name for @code{UC_CATEGORY_P}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CONNECTOR_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_DASH_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pd}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OPEN_PUNCTUATION +This is another name for @code{UC_CATEGORY_Ps} (``start punctuation''). +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CLOSE_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pe} (``end punctuation''). +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_INITIAL_QUOTE_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pi}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_FINAL_QUOTE_PUNCTUATION +This is another name for @code{UC_CATEGORY_Pf}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_PUNCTUATION +This is another name for @code{UC_CATEGORY_Po}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SYMBOL +This is another name for @code{UC_CATEGORY_S}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MATH_SYMBOL +This is another name for @code{UC_CATEGORY_Sm}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CURRENCY_SYMBOL +This is another name for @code{UC_CATEGORY_Sc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_MODIFIER_SYMBOL +This is another name for @code{UC_CATEGORY_Sk}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER_SYMBOL +This is another name for @code{UC_CATEGORY_So}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SEPARATOR +This is another name for @code{UC_CATEGORY_Z}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SPACE_SEPARATOR +This is another name for @code{UC_CATEGORY_Zs}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_LINE_SEPARATOR +This is another name for @code{UC_CATEGORY_Zl}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_PARAGRAPH_SEPARATOR +This is another name for @code{UC_CATEGORY_Zp}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_OTHER +This is another name for @code{UC_CATEGORY_C}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_CONTROL +This is another name for @code{UC_CATEGORY_Cc}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_FORMAT +This is another name for @code{UC_CATEGORY_Cf}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_SURROGATE +This is another name for @code{UC_CATEGORY_Cs}. All code points in this +category are invalid characters. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_PRIVATE_USE +This is another name for @code{UC_CATEGORY_Co}. +@end deftypevr + +@deftypevr Macro uc_general_category_t UC_UNASSIGNED +This is another name for @code{UC_CATEGORY_Cn}. Some code points in this +category are invalid characters. +@end deftypevr + +The following functions combine general categories, like in a boolean algebra, +except that there is no @samp{not} operation. + +@deftypefun uc_general_category_t uc_general_category_or (uc_general_category_t @var{category1}, uc_general_category_t @var{category2}) +Returns the union of two general categories. +This corresponds to the unions of the two sets of characters. +@end deftypefun + +@deftypefun uc_general_category_t uc_general_category_and (uc_general_category_t @var{category1}, uc_general_category_t @var{category2}) +Returns the intersection of two general categories as bit masks. +This @emph{does not} correspond to the intersection of the two sets of +characters. +@c Really?? +@end deftypefun + +@deftypefun uc_general_category_t uc_general_category_and_not (uc_general_category_t @var{category1}, uc_general_category_t @var{category2}) +Returns the intersection of a general category with the complement of a +second general category, as bit masks. +This @emph{does not} correspond to the intersection with complement, when +viewing the categories as sets of characters. +@c Really?? +@end deftypefun + +The following functions associate general categories with their name. + +@deftypefun {const char *} uc_general_category_name (uc_general_category_t @var{category}) +Returns the name of a general category. +Returns NULL if the general category corresponds to a bit mask that does not +have a name. +@end deftypefun + +@deftypefun uc_general_category_t uc_general_category_byname (const char *@var{category_name}) +Returns the general category given by name, e.g@. @code{"Lu"}. +@end deftypefun + +The following functions view general categories as sets of Unicode characters. + +@deftypefun uc_general_category_t uc_general_category (ucs4_t @var{uc}) +Returns the general category of a Unicode character. + +This function uses a big table. +@end deftypefun + +@deftypefun bool uc_is_general_category (ucs4_t @var{uc}, uc_general_category_t @var{category}) +Tests whether a Unicode character belongs to a given category. +The @var{category} argument can be a predefined general category or the +combination of several predefined general categories. +@end deftypefun + +@node Bit mask API +@subsection The bit mask API for general category + +The following are the predefined general category value as bit masks. +Additional general categories may be added in the future. + +@deftypevr Macro uint32_t UC_CATEGORY_MASK_L +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lu +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Ll +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lt +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lm +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Lo +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_M +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Mn +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Mc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Me +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_N +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Nd +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Nl +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_No +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_P +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pd +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Ps +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pe +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pi +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Pf +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Po +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_S +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Sm +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Sc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Sk +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_So +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Z +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Zs +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Zl +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Zp +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_C +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cc +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cf +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cs +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Co +@deftypevrx Macro uint32_t UC_CATEGORY_MASK_Cn +@end deftypevr + +The following function views general categories as sets of Unicode characters. + +@deftypefun bool uc_is_general_category_withtable (ucs4_t @var{uc}, uint32_t @var{bitmask}) +Tests whether a Unicode character belongs to a given category. +The @var{bitmask} argument can be a predefined general category bitmask or the +combination of several predefined general category bitmasks. + +This function uses a big table comprising all general categories. +@end deftypefun + +@node Canonical combining class +@section Canonical combining class + +@cindex canonical combining class +@cindex Unicode character, canonical combining class +Every Unicode character or code point has a @emph{canonical combining class} +assigned to it. + +What is the meaning of the canonical combining class? Essentially, it +indicates the priority with which a combining character is attached to its +base character. The characters for which the canonical combining class is 0 +are the base characters, and the characters for which it is greater than 0 are +the combining characters. Combining characters are rendered +near/attached/around their base character, and combining characters with small +combining classes are attached "first" or "closer" to the base character. + +The canonical combining class of a character is a number in the range +0..255. The possible values are described in the Unicode Character Database +@texnl{}@url{http://www.unicode.org/Public/UNIDATA/UCD.html}. The list here is +not definitive; more values can be added in future versions. + +@deftypevr Constant int UC_CCC_NR +The canonical combining class value for ``Not Reordered'' characters. +The value is 0. +@end deftypevr + +@deftypevr Constant int UC_CCC_OV +The canonical combining class value for ``Overlay'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_NK +The canonical combining class value for ``Nukta'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_KV +The canonical combining class value for ``Kana Voicing'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_VR +The canonical combining class value for ``Virama'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_ATBL +The canonical combining class value for ``Attached Below Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_ATB +The canonical combining class value for ``Attached Below'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_ATAR +The canonical combining class value for ``Attached Above Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_BL +The canonical combining class value for ``Below Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_B +The canonical combining class value for ``Below'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_BR +The canonical combining class value for ``Below Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_L +The canonical combining class value for ``Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_R +The canonical combining class value for ``Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_AL +The canonical combining class value for ``Above Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_A +The canonical combining class value for ``Above'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_AR +The canonical combining class value for ``Above Right'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_DB +The canonical combining class value for ``Double Below'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_DA +The canonical combining class value for ``Double Above'' characters. +@end deftypevr + +@deftypevr Constant int UC_CCC_IS +The canonical combining class value for ``Iota Subscript'' characters. +@end deftypevr + +The following function looks up the canonical combining class of a character. + +@deftypefun int uc_combining_class (ucs4_t @var{uc}) +Returns the canonical combining class of a Unicode character. +@end deftypefun + +@node Bidirectional category +@section Bidirectional category + +@cindex bidirectional category +@cindex Unicode character, bidirectional category +Every Unicode character or code point has a @emph{bidirectional category} +assigned to it. + +The bidirectional category guides the bidirectional algorithm@texnl{} +(@url{http://www.unicode.org/reports/tr9/}). The possible values are +the following. + +@deftypevr Constant int UC_BIDI_L +The bidirectional category for `Left-to-Right`'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_LRE +The bidirectional category for ``Left-to-Right Embedding'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_LRO +The bidirectional category for ``Left-to-Right Override'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_R +The bidirectional category for ``Right-to-Left'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_AL +The bidirectional category for ``Right-to-Left Arabic'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_RLE +The bidirectional category for ``Right-to-Left Embedding'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_RLO +The bidirectional category for ``Right-to-Left Override'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_PDF +The bidirectional category for ``Pop Directional Format'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_EN +The bidirectional category for ``European Number'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_ES +The bidirectional category for ``European Number Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_ET +The bidirectional category for ``European Number Terminator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_AN +The bidirectional category for ``Arabic Number'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_CS +The bidirectional category for ``Common Number Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_NSM +The bidirectional category for ``Non-Spacing Mark'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_BN +The bidirectional category for ``Boundary Neutral'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_B +The bidirectional category for ``Paragraph Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_S +The bidirectional category for ``Segment Separator'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_WS +The bidirectional category for ``Whitespace'' characters. +@end deftypevr + +@deftypevr Constant int UC_BIDI_ON +The bidirectional category for ``Other Neutral'' characters. +@end deftypevr + +The following functions implement the association between a bidirectional +category and its name. + +@deftypefun {const char *} uc_bidi_category_name (int @var{category}) +Returns the name of a bidirectional category. +@end deftypefun + +@deftypefun int uc_bidi_category_byname (const char *@var{category_name}) +Returns the bidirectional category given by name, e.g@. @code{"LRE"}. +@end deftypefun + +The following functions view bidirectional categories as sets of Unicode +characters. + +@deftypefun int uc_bidi_category (ucs4_t @var{uc}) +Returns the bidirectional category of a Unicode character. +@end deftypefun + +@deftypefun bool uc_is_bidi_category (ucs4_t @var{uc}, int @var{category}) +Tests whether a Unicode character belongs to a given bidirectional category. +@end deftypefun + +@node Decimal digit value +@section Decimal digit value + +@cindex value, of Unicode character +@cindex Unicode character, value +Decimal digits (like the digits from @samp{0} to @samp{9}) exist in many +scripts. The following function converts a decimal digit character to its +numerical value. + +@deftypefun int uc_decimal_value (ucs4_t @var{uc}) +Returns the decimal digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a decimal digit. +@end deftypefun + +@node Digit value +@section Digit value + +@cindex value, of Unicode character +@cindex Unicode character, value +Digit characters are like decimal digit characters, possibly in special forms, +like as superscript, subscript, or circled. The following function converts a +digit character to its numerical value. + +@deftypefun int uc_digit_value (ucs4_t @var{uc}) +Returns the digit value of a Unicode character. +The return value is an integer in the range 0..9, or -1 for characters that +do not represent a digit. +@end deftypefun + +@node Numeric value +@section Numeric value + +@cindex value, of Unicode character +@cindex Unicode character, value +There are also characters that represent numbers without a digit system, like +the Roman numerals, and fractional numbers, like 1/4 or 3/4. + +The following type represents the numeric value of a Unicode character. +@deftp Type uc_fraction_t +This is a structure type with the following fields: +@smallexample +int numerator; +int denominator; +@end smallexample +An integer @var{n} is represented by @code{numerator = @var{n}}, +@code{denominator = 1}. +@end deftp + +The following function converts a number character to its numerical value. + +@deftypefun uc_fraction_t uc_numeric_value (ucs4_t @var{uc}) +Returns the numeric value of a Unicode character. +The return value is a fraction, or the pseudo-fraction @code{@{ 0, 0 @}} for +characters that do not represent a number. +@end deftypefun + +@node Mirrored character +@section Mirrored character + +@cindex mirroring, of Unicode character +@cindex Unicode character, mirroring +Character mirroring is used to associate the closing parenthesis character +to the opening parenthesis character, the closing brace character with the +opening brace character, and so on. + +The following function looks up the mirrored character of a Unicode character. + +@deftypefun bool uc_mirror_char (ucs4_t @var{uc}, ucs4_t *@var{puc}) +Stores the mirrored character of a Unicode character @var{uc} in +@code{*@var{puc}} and returns @code{true}, if it exists. Otherwise it +stores @var{uc} unmodified in @code{*@var{puc}} and returns @code{false}. +@end deftypefun + +@node Properties +@section Properties + +@cindex properties, of Unicode character +@cindex Unicode character, properties +This section defines boolean properties of Unicode characters. This +means, a character either has the given property or does not have it. +In other words, the property can be viewed as a subset of the set of +Unicode characters. + +The GNU libunistring library provides two kinds of API for working with +properties. The object oriented API uses a type @code{uc_property_t} +to designate a property. In the function-based API, which is a bit more +low level, a property is merely a function. + +@menu +* Properties as objects:: +* Properties as functions:: +@end menu + +@node Properties as objects +@subsection Properties as objects -- the object oriented API + +The following type designates a property on Unicode characters. + +@deftp Type uc_property_t +This data type denotes a boolean property on Unicode characters. It is an +immediate type that can be copied by simple assignment, without involving +memory allocation. It is not an array type. +@end deftp + +Many Unicode properties are predefined. + +The following are general properties. + +@deftypevr Constant uc_property_t UC_PROPERTY_WHITE_SPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_ALPHABETIC +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_ALPHABETIC +@deftypevrx Constant uc_property_t UC_PROPERTY_NOT_A_CHARACTER +@deftypevrx Constant uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT +@deftypevrx Constant uc_property_t UC_PROPERTY_DEPRECATED +@deftypevrx Constant uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION +@deftypevrx Constant uc_property_t UC_PROPERTY_VARIATION_SELECTOR +@deftypevrx Constant uc_property_t UC_PROPERTY_PRIVATE_USE +@deftypevrx Constant uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE +@end deftypevr + +The following properties are related to case folding. + +@deftypevr Constant uc_property_t UC_PROPERTY_UPPERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_UPPERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_LOWERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_LOWERCASE +@deftypevrx Constant uc_property_t UC_PROPERTY_TITLECASE +@deftypevrx Constant uc_property_t UC_PROPERTY_SOFT_DOTTED +@end deftypevr + +The following properties are related to identifiers. + +@deftypevr Constant uc_property_t UC_PROPERTY_ID_START +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_ID_START +@deftypevrx Constant uc_property_t UC_PROPERTY_ID_CONTINUE +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE +@deftypevrx Constant uc_property_t UC_PROPERTY_XID_START +@deftypevrx Constant uc_property_t UC_PROPERTY_XID_CONTINUE +@deftypevrx Constant uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_PATTERN_SYNTAX +@end deftypevr + +The following properties have an influence on shaping and rendering. + +@deftypevr Constant uc_property_t UC_PROPERTY_JOIN_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_GRAPHEME_BASE +@deftypevrx Constant uc_property_t UC_PROPERTY_GRAPHEME_EXTEND +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND +@deftypevrx Constant uc_property_t UC_PROPERTY_GRAPHEME_LINK +@end deftypevr + +The following properties relate to bidirectional reordering. + +@deftypevr Constant uc_property_t UC_PROPERTY_BIDI_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_WHITESPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_PDF +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE +@deftypevrx Constant uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL +@end deftypevr + +The following properties deal with number representations. + +@deftypevr Constant uc_property_t UC_PROPERTY_HEX_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT +@end deftypevr + +The following properties deal with CJK. + +@deftypevr Constant uc_property_t UC_PROPERTY_IDEOGRAPHIC +@deftypevrx Constant uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH +@deftypevrx Constant uc_property_t UC_PROPERTY_RADICAL +@deftypevrx Constant uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR +@end deftypevr + +Other miscellaneous properties are: + +@deftypevr Constant uc_property_t UC_PROPERTY_ZERO_WIDTH +@deftypevrx Constant uc_property_t UC_PROPERTY_SPACE +@deftypevrx Constant uc_property_t UC_PROPERTY_NON_BREAK +@deftypevrx Constant uc_property_t UC_PROPERTY_ISO_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_FORMAT_CONTROL +@deftypevrx Constant uc_property_t UC_PROPERTY_DASH +@deftypevrx Constant uc_property_t UC_PROPERTY_HYPHEN +@deftypevrx Constant uc_property_t UC_PROPERTY_PUNCTUATION +@deftypevrx Constant uc_property_t UC_PROPERTY_LINE_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR +@deftypevrx Constant uc_property_t UC_PROPERTY_QUOTATION_MARK +@deftypevrx Constant uc_property_t UC_PROPERTY_SENTENCE_TERMINAL +@deftypevrx Constant uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION +@deftypevrx Constant uc_property_t UC_PROPERTY_CURRENCY_SYMBOL +@deftypevrx Constant uc_property_t UC_PROPERTY_MATH +@deftypevrx Constant uc_property_t UC_PROPERTY_OTHER_MATH +@deftypevrx Constant uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION +@deftypevrx Constant uc_property_t UC_PROPERTY_LEFT_OF_PAIR +@deftypevrx Constant uc_property_t UC_PROPERTY_COMBINING +@deftypevrx Constant uc_property_t UC_PROPERTY_COMPOSITE +@deftypevrx Constant uc_property_t UC_PROPERTY_DECIMAL_DIGIT +@deftypevrx Constant uc_property_t UC_PROPERTY_NUMERIC +@deftypevrx Constant uc_property_t UC_PROPERTY_DIACRITIC +@deftypevrx Constant uc_property_t UC_PROPERTY_EXTENDER +@deftypevrx Constant uc_property_t UC_PROPERTY_IGNORABLE_CONTROL +@end deftypevr + +The following function looks up a property by its name. + +@deftypefun uc_property_t uc_property_byname (const char *@var{property_name}) +Returns the property given by name, e.g. @code{"White space"}. If a property +with the given name exists, the result will satisfy the +@code{uc_property_is_valid} predicate. Otherwise the result will not satisfy +this predicate and must not be passed to functions that expect an +@code{uc_property_t} argument. + +This function references a big table of all predefined properties. Its use +can significantly increase the size of your application. +@end deftypefun + +@deftypefun bool uc_property_is_valid (uc_property_t property) +Returns @code{true} when the given property is valid, or @code{false} +otherwise. +@end deftypefun + +The following function views a property as a set of Unicode characters. + +@deftypefun bool uc_is_property (ucs4_t @var{uc}, uc_property_t @var{property}) +Tests whether the Unicode character @var{uc} has the given property. +@end deftypefun + +@node Properties as functions +@subsection Properties as functions -- the functional API + +The following are general properties. + +@deftypefun bool uc_is_property_white_space (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_alphabetic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_alphabetic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_not_a_character (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_default_ignorable_code_point (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_default_ignorable_code_point (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_deprecated (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_logical_order_exception (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_variation_selector (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_private_use (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_unassigned_code_value (ucs4_t @var{uc}) +@end deftypefun + +The following properties are related to case folding. + +@deftypefun bool uc_is_property_uppercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_uppercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_lowercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_lowercase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_titlecase (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_soft_dotted (ucs4_t @var{uc}) +@end deftypefun + +The following properties are related to identifiers. + +@deftypefun bool uc_is_property_id_start (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_id_start (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_id_continue (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_id_continue (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_xid_start (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_xid_continue (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_pattern_white_space (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_pattern_syntax (ucs4_t @var{uc}) +@end deftypefun + +The following properties have an influence on shaping and rendering. + +@deftypefun bool uc_is_property_join_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_grapheme_base (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_grapheme_extend (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_grapheme_extend (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_grapheme_link (ucs4_t @var{uc}) +@end deftypefun + +The following properties relate to bidirectional reordering. + +@deftypefun bool uc_is_property_bidi_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_left_to_right (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_arabic_right_to_left (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_european_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_eur_num_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_eur_num_terminator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_arabic_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_common_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_block_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_segment_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_whitespace (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_non_spacing_mark (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_boundary_neutral (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_pdf (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_embedding_or_override (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_bidi_other_neutral (ucs4_t @var{uc}) +@end deftypefun + +The following properties deal with number representations. + +@deftypefun bool uc_is_property_hex_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ascii_hex_digit (ucs4_t @var{uc}) +@end deftypefun + +The following properties deal with CJK. + +@deftypefun bool uc_is_property_ideographic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_unified_ideograph (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_radical (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ids_binary_operator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ids_trinary_operator (ucs4_t @var{uc}) +@end deftypefun + +Other miscellaneous properties are: + +@deftypefun bool uc_is_property_zero_width (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_space (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_non_break (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_iso_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_format_control (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_dash (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_hyphen (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_punctuation (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_line_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_paragraph_separator (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_quotation_mark (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_sentence_terminal (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_terminal_punctuation (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_currency_symbol (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_math (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_other_math (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_paired_punctuation (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_left_of_pair (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_combining (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_composite (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_decimal_digit (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_numeric (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_diacritic (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_extender (ucs4_t @var{uc}) +@deftypefunx bool uc_is_property_ignorable_control (ucs4_t @var{uc}) +@end deftypefun + +@node Scripts +@section Scripts + +@cindex scripts +The Unicode characters are subdivided into scripts. + +The following type is used to represent a script: + +@deftp Type uc_script_t +This data type is a structure type that refers to statically allocated +read-only data. It contains the following fields: +@smallexample +const char *name; +@end smallexample + +The @code{name} field contains the name of the script. +@end deftp + +@cindex Unicode character, script +The following functions look up a script. + +@deftypefun {const uc_script_t *} uc_script (ucs4_t @var{uc}) +Returns the script of a Unicode character. Returns NULL if @var{uc} does not +belong to any script. +@end deftypefun + +@deftypefun {const uc_script_t *} uc_script_byname (const char *@var{script_name}) +Returns the script given by its name, e.g@. @code{"HAN"}. Returns NULL if a +script with the given name does not exist. +@end deftypefun + +The following function views a script as a set of Unicode characters. + +@deftypefun bool uc_is_script (ucs4_t @var{uc}, const uc_script_t *@var{script}) +Tests whether a Unicode character belongs to a given script. +@end deftypefun + +The following gives a global picture of all scripts. + +@deftypefun void uc_all_scripts (const uc_script_t **@var{scripts}, size_t *@var{count}) +Get the list of all scripts. Stores a pointer to an array of all scripts in +@code{*@var{scripts}} and the length of this array in @code{*@var{count}}. +@end deftypefun + +@node Blocks +@section Blocks + +@cindex block +The Unicode characters are subdivided into blocks. A block is an interval of +Unicode code points. + +The following type is used to represent a block. + +@deftp Type uc_block_t +This data type is a structure type that refers to statically allocated data. +It contains the following fields: +@smallexample +ucs4_t start; +ucs4_t end; +const char *name; +@end smallexample + +The @code{start} field is the first Unicode code point in the block. + +The @code{end} field is the last Unicode code point in the block. + +The @code{name} field is the name of the block. +@end deftp + +@cindex Unicode character, block +The following function looks up a block. + +@deftypefun {const uc_block_t *} uc_block (ucs4_t @var{uc}) +Returns the block a character belongs to. +@end deftypefun + +The following function views a block as a set of Unicode characters. + +@deftypefun bool uc_is_block (ucs4_t @var{uc}, const uc_block_t *@var{block}) +Tests whether a Unicode character belongs to a given block. +@end deftypefun + +The following gives a global picture of all block. + +@deftypefun void uc_all_blocks (const uc_block_t **@var{blocks}, size_t *@var{count}) +Get the list of all blocks. Stores a pointer to an array of all blocks in +@code{*@var{blocks}} and the length of this array in @code{*@var{count}}. +@end deftypefun + +@node ISO C and Java syntax +@section ISO C and Java syntax + +@cindex C, programming language +@cindex Java, programming language +@cindex identifiers +The following properties are taken from language standards. The supported +language standards are ISO C 99 and Java. + +@deftypefun bool uc_is_c_whitespace (ucs4_t @var{uc}) +Tests whether a Unicode character is considered whitespace in ISO C 99. +@end deftypefun + +@deftypefun bool uc_is_java_whitespace (ucs4_t @var{uc}) +Tests whether a Unicode character is considered whitespace in Java. +@end deftypefun + +The following enumerated values are the possible return values of the functions +@code{uc_c_ident_category} and @code{uc_java_ident_category}. + +@deftypevr Constant int UC_IDENTIFIER_START +This return value means that the given character is valid as first or +subsequent character in an identifier. +@end deftypevr + +@deftypevr Constant int UC_IDENTIFIER_VALID +This return value means that the given character is valid as subsequent +character only. +@end deftypevr + +@deftypevr Constant int UC_IDENTIFIER_INVALID +This return value means that the given character is not valid in an identifier. +@end deftypevr + +@deftypevr Constant int UC_IDENTIFIER_IGNORABLE +This return value (only for Java) means that the given character is ignorable. +@end deftypevr + +The following function determine whether a given character can be a constituent +of an identifier in the given programming language. + +@cindex Unicode character, validity in C identifiers +@deftypefun int uc_c_ident_category (ucs4_t @var{uc}) +Returns the categorization of a Unicode character with respect to the ISO C 99 +identifier syntax. +@end deftypefun + +@cindex Unicode character, validity in Java identifiers +@deftypefun int uc_java_ident_category (ucs4_t @var{uc}) +Returns the categorization of a Unicode character with respect to the Java +identifier syntax. +@end deftypefun + +@node Classifications like in ISO C +@section Classifications like in ISO C + +@cindex C-like API +@cindex Unicode character, classification like in C +The following character classifications mimic those declared in the ISO C +header files @code{} and @code{}. These functions are +deprecated, because this set of functions was designed with ASCII in mind and +cannot reflect the more diverse reality of the Unicode character set. But +they can be a quick-and-dirty porting aid when migrating from @code{wchar_t} +APIs to Unicode strings. + +@deftypefun bool uc_is_alnum (ucs4_t @var{uc}) +Tests for any character for which @code{uc_is_alpha} or @code{uc_is_digit} is +true. +@end deftypefun + +@deftypefun bool uc_is_alpha (ucs4_t @var{uc}) +Tests for any character for which @code{uc_is_upper} or @code{uc_is_lower} is +true, or any character that is one of a locale-specific set of characters for +which none of @code{uc_is_cntrl}, @code{uc_is_digit}, @code{uc_is_punct}, or +@code{uc_is_space} is true. +@end deftypefun + +@deftypefun bool uc_is_cntrl (ucs4_t @var{uc}) +Tests for any control character. +@end deftypefun + +@deftypefun bool uc_is_digit (ucs4_t @var{uc}) +Tests for any character that corresponds to a decimal-digit character. +@end deftypefun + +@deftypefun bool uc_is_graph (ucs4_t @var{uc}) +Tests for any character for which @code{uc_is_print} is true and +@code{uc_is_space} is false. +@end deftypefun + +@deftypefun bool uc_is_lower (ucs4_t @var{uc}) +Tests for any character that corresponds to a lowercase letter or is one +of a locale-specific set of characters for which none of @code{uc_is_cntrl}, +@code{uc_is_digit}, @code{uc_is_punct}, or @code{uc_is_space} is true. +@end deftypefun + +@deftypefun bool uc_is_print (ucs4_t @var{uc}) +Tests for any printing character. +@end deftypefun + +@deftypefun bool uc_is_punct (ucs4_t @var{uc}) +Tests for any printing character that is one of a locale-specific set of +characters for which neither @code{uc_is_space} nor @code{uc_is_alnum} is true. +@end deftypefun + +@deftypefun bool uc_is_space (ucs4_t @var{uc}) +Test for any character that corresponds to a locale-specific set of characters +for which none of @code{uc_is_alnum}, @code{uc_is_graph}, or @code{uc_is_punct} +is true. +@end deftypefun + +@deftypefun bool uc_is_upper (ucs4_t @var{uc}) +Tests for any character that corresponds to an uppercase letter or is one +of a locale-specific set of characters for which none of @code{uc_is_cntrl}, +@code{uc_is_digit}, @code{uc_is_punct}, or @code{uc_is_space} is true. +@end deftypefun + +@deftypefun bool uc_is_xdigit (ucs4_t @var{uc}) +Tests for any character that corresponds to a hexadecimal-digit character. +@end deftypefun + +@deftypefun bool uc_is_blank (ucs4_t @var{uc}) +Tests for any character that corresponds to a standard blank character or +a locale-specific set of characters for which @code{uc_is_alnum} is false. +@end deftypefun diff --git a/doc/unilbrk.texi b/doc/unilbrk.texi new file mode 100644 index 00000000..5441f317 --- /dev/null +++ b/doc/unilbrk.texi @@ -0,0 +1,88 @@ +@node unilbrk.h +@chapter Line breaking @code{} + +@cindex line breaks +@cindex breaks, line +@cindex wrapping +This include file declares functions for determining where in a string +line breaks could or should be introduced, in order to make the displayed +string fit into a column of given width. + +These functions are locale dependent. The @var{encoding} argument identifies +the encoding (e.g@. @code{"ISO-8859-2"} for Polish). + +The following enumerated values indicate whether, at a given position, a line +break is possible or not. Given an string @var{s} as an array +@code{@var{s}[0..@var{n}-1]} and a position @var{i}, the values have the +following meanings: + +@deftypevr Constant int UC_BREAK_MANDATORY +This value indicates that @code{@var{s}[@var{i}]} is a line break character. +@end deftypevr + +@deftypevr Constant int UC_BREAK_POSSIBLE +This value indicates that a line break may be inserted between +@code{@var{s}[@var{i}-1]} and @code{@var{s}[@var{i}]}. +@end deftypevr + +@deftypevr Constant int UC_BREAK_HYPHENATION +This value indicates that a hyphen and a line break may be inserted between +@code{@var{s}[@var{i}-1]} and @code{@var{s}[@var{i}]}. But beware of language +dependent hyphenation rules. +@end deftypevr + +@deftypevr Constant int UC_BREAK_PROHIBITED +This value indicates that @code{@var{s}[@var{i}-1]} and @code{@var{s}[@var{i}]} +must not be separated. +@end deftypevr + +@deftypevr Constant int UC_BREAK_UNDEFINED +This value is not used as a return value; rather, in the overriding argument of +the @code{u*_width_linebreaks} functions, it indicates the absence of an +override. +@end deftypevr + +The following functions determine the positions at which line breaks are +possible. + +@deftypefun void u8_possible_linebreaks (const uint8_t *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +@deftypefunx void u16_possible_linebreaks (const uint16_t *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +@deftypefunx void u32_possible_linebreaks (const uint32_t *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +@deftypefunx void ulc_possible_linebreaks (const char *@var{s}, size_t @var{n}, const char *@var{encoding}, char *@var{p}) +Determines the line break points in @var{s}, and stores the result at +@code{@var{p}[0..@var{n}-1]}. Every @code{@var{p}[@var{i}]} is assigned one of +the values @code{UC_BREAK_MANDATORY}, @code{UC_BREAK_POSSIBLE}, +@code{UC_BREAK_HYPHENATION}, @code{UC_BREAK_PROHIBITED}. +@end deftypefun + +The following functions determine where line breaks should be inserted so that +each line fits in a given width, when output to a device that uses +non-proportional fonts. + +@deftypefun int u8_width_linebreaks (const uint8_t *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +@deftypefunx int u16_width_linebreaks (const uint16_t *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +@deftypefunx int u32_width_linebreaks (const uint32_t *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +@deftypefunx int ulc_width_linebreaks (const char *@var{s}, size_t @var{n}, int @var{width}, int @var{start_column}, int @var{at_end_columns}, const char *@var{override}, const char *@var{encoding}, char *@var{p}) +Chooses the best line breaks, assuming that every character occupies a width +given by the @code{uc_width} function (see @ref{uniwidth.h}). + +The string is @code{@var{s}[0..@var{n}-1]}. + +The maximum number of columns per line is given as @var{width}. +The starting column of the string is given as @var{start_column}. +If the algorithm shall keep room after the last piece, this amount of room can +be given as @var{at_end_columns}. + +@var{override} is an optional override; if +@code{@var{override}[@var{i}] != UC_BREAK_UNDEFINED}, +@code{@var{override}[@var{i}]} takes precedence over @code{@var{p}[@var{i}]} +as returned by the @code{u*_possible_linebreaks} function. + +The given @var{encoding} is used for disambiguating widths in @code{uc_width}. + +Returns the column after the end of the string, and stores the result at +@code{@var{p}[0..@var{n}-1]}. Every @code{@var{p}[@var{i}]} is assigned one of +the values @code{UC_BREAK_MANDATORY}, @code{UC_BREAK_POSSIBLE}, +@code{UC_BREAK_HYPHENATION}, @code{UC_BREAK_PROHIBITED}. Here the value +@code{UC_BREAK_POSSIBLE} indicates that a line break @emph{should} be inserted. +@end deftypefun diff --git a/doc/uniname.texi b/doc/uniname.texi new file mode 100644 index 00000000..66461be5 --- /dev/null +++ b/doc/uniname.texi @@ -0,0 +1,32 @@ +@node uniname.h +@chapter Names of Unicode characters @code{} + +@cindex Unicode character, name +This include file implements the association between a Unicode character and +its name. + +The name of a Unicode character allows to distinguish it from other, similar +looking characters. For example, the character @samp{x} has the name +@code{"LATIN SMALL LETTER X"} and is therefore different from the character +named @code{"MULTIPLICATION SIGN"}. + +@deftypevr Macro {unsigned int} UNINAME_MAX +This macro expands to a constant that is the required size of buffer for a +Unicode character name. +@end deftypevr + +@deftypefun {char *} unicode_character_name (ucs4_t @var{uc}, char *@var{buf}) +Looks up the name of a Unicode character, in uppercase ASCII. +@var{buf} must point to a buffer, at least @code{UNINAME_MAX} bytes in size. +Returns the filled @var{buf}, or NULL if the character does not have a name. +@end deftypefun + +@deftypefun ucs4_t unicode_name_character (const char *@var{name}) +Looks up the Unicode character with a given name, in upper- or lowercase +ASCII. Returns the character if found, or @code{UNINAME_INVALID} if not found. +@end deftypefun + +@deftypevr Macro ucs4_t UNINAME_INVALID +This macro expands to a constant that is a special return value of the +@code{unicode_name_character} function. +@end deftypevr diff --git a/doc/uninorm.texi b/doc/uninorm.texi new file mode 100644 index 00000000..d4206d50 --- /dev/null +++ b/doc/uninorm.texi @@ -0,0 +1,299 @@ +@node uninorm.h +@chapter Normalization forms (composition and decomposition) @code{} + +@cindex normal forms +@cindex normalizing +This include file defines functions for transforming Unicode strings to one +of the four normal forms, known as NFC, NFD, NKFC, NFKD. These +transformations involve decomposition and --- for NFC and NFKC --- composition +of Unicode characters. + +@menu +* Decomposition of characters:: +* Composition of characters:: +* Normalization of strings:: +* Normalizing comparisons:: +* Normalization of streams:: +@end menu + +@node Decomposition of characters +@section Decomposition of Unicode characters + +@cindex decomposing +The following enumerated values are the possible types of decomposition of a +Unicode character. + +@deftypevr Constant int UC_DECOMP_CANONICAL +Denotes canonical decomposition. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_FONT +UCD marker: @code{}. Denotes a font variant (e.g. a blackletter form). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_NOBREAK +UCD marker: @code{}. +Denotes a no-break version of a space or hyphen. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_INITIAL +UCD marker: @code{}. +Denotes an initial presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_MEDIAL +UCD marker: @code{}. +Denotes a medial presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_FINAL +UCD marker: @code{}. +Denotes a final presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_ISOLATED +UCD marker: @code{}. +Denotes an isolated presentation form (Arabic). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_CIRCLE +UCD marker: @code{}. +Denotes an encircled form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SUPER +UCD marker: @code{}. +Denotes a superscript form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SUB +UCD marker: @code{}. +Denotes a subscript form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_VERTICAL +UCD marker: @code{}. +Denotes a vertical layout presentation form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_WIDE +UCD marker: @code{}. +Denotes a wide (or zenkaku) compatibility character. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_NARROW +UCD marker: @code{}. +Denotes a narrow (or hankaku) compatibility character. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SMALL +UCD marker: @code{}. +Denotes a small variant form (CNS compatibility). +@end deftypevr + +@deftypevr Constant int UC_DECOMP_SQUARE +UCD marker: @code{}. +Denotes a CJK squared font variant. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_FRACTION +UCD marker: @code{}. +Denotes a vulgar fraction form. +@end deftypevr + +@deftypevr Constant int UC_DECOMP_COMPAT +UCD marker: @code{}. +Denotes an otherwise unspecified compatibility character. +@end deftypevr + +The following constant denotes the maximum size of decomposition of a single +Unicode character. + +@deftypevr Macro {unsigned int} UC_DECOMPOSITION_MAX_LENGTH +This macro expands to a constant that is the required size of buffer passed to +the @code{uc_decomposition} and @code{uc_canonical_decomposition} functions. +@end deftypevr + +The following functions decompose a Unicode character. + +@deftypefun int uc_decomposition (ucs4_t @var{uc}, int *@var{decomp_tag}, ucs4_t *@var{decomposition}) +Returns the character decomposition mapping of the Unicode character @var{uc}. +@var{decomposition} must point to an array of at least +@code{UC_DECOMPOSITION_MAX_LENGTH} @code{ucs_t} elements. + +When a decomposition exists, @code{@var{decomposition}[0..@var{n}-1]} and +@code{*@var{decomp_tag}} are filled and @var{n} is returned. Otherwise -1 is +returned. +@end deftypefun + +@deftypefun int uc_canonical_decomposition (ucs4_t @var{uc}, ucs4_t *@var{decomposition}) +Returns the canonical character decomposition mapping of the Unicode character +@var{uc}. @var{decomposition} must point to an array of at least +@code{UC_DECOMPOSITION_MAX_LENGTH} @code{ucs_t} elements. + +When a decomposition exists, @code{@var{decomposition}[0..@var{n}-1]} is filled +and @var{n} is returned. Otherwise -1 is returned. +@end deftypefun + +@node Composition of characters +@section Composition of Unicode characters + +@cindex composing, Unicode characters +@cindex combining, Unicode characters +The following function composes a Unicode character from two Unicode +characters. + +@deftypefun ucs4_t uc_composition (ucs4_t @var{uc1}, ucs4_t @var{uc2}) +Attempts to combine the Unicode characters @var{uc1}, @var{uc2}. +@var{uc1} is known to have canonical combining class 0. + +Returns the combination of @var{uc1} and @var{uc2}, if it exists. +Returns 0 otherwise. + +Not all decompositions can be recombined using this function. See the Unicode +file @file{CompositionExclusions.txt} for details. +@end deftypefun + +@node Normalization of strings +@section Normalization of strings + +The Unicode standard defines four normalization forms for Unicode strings. +The following type is used to denote a normalization form. + +@deftp Type uninorm_t +An object of type @code{uninorm_t} denotes a Unicode normalization form. +This is a scalar type; its values can be compared with @code{==}. +@end deftp + +The following constants denote the four normalization forms. + +@deftypevr Macro uninorm_t UNINORM_NFD +Denotes Normalization form D: canonical decomposition. +@end deftypevr + +@deftypevr Macro uninorm_t UNINORM_NFC +Normalization form C: canonical decomposition, then canonical composition. +@end deftypevr + +@deftypevr Macro uninorm_t UNINORM_NFKD +Normalization form KD: compatibility decomposition. +@end deftypevr + +@deftypevr Macro uninorm_t UNINORM_NFKC +Normalization form KC: compatibility decomposition, then canonical composition. +@end deftypevr + +The following functions operate on @code{uninorm_t} objects. + +@deftypefun bool uninorm_is_compat_decomposing (uninorm_t @var{nf}) +Tests whether the normalization form @var{nf} does compatibility decomposition. +@end deftypefun + +@deftypefun bool uninorm_is_composing (uninorm_t @var{nf}) +Tests whether the normalization form @var{nf} includes canonical composition. +@end deftypefun + +@deftypefun uninorm_t uninorm_decomposing_form (uninorm_t @var{nf}) +Returns the decomposing variant of the normalization form @var{nf}. +This maps NFC,NFD @arrow{} NFD and NFKC,NFKD @arrow{} NFKD. +@end deftypefun + +The following functions apply a Unicode normalization form to a Unicode string. + +@deftypefun {uint8_t *} u8_normalize (uninorm_t @var{nf}, const uint8_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_normalize (uninorm_t @var{nf}, const uint16_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_normalize (uninorm_t @var{nf}, const uint32_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the specified normalization form of a string. +@end deftypefun + +@node Normalizing comparisons +@section Normalizing comparisons + +@cindex comparing, ignoring normalization +The following functions compare Unicode string, ignoring differences in +normalization. + +@deftypefun int u8_normcmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_normcmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_normcmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in normalization. + +@var{nf} must be either @code{UNINORM_NFD} or @code{UNINORM_NFKD}. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@cindex comparing, ignoring normalization, with collation rules +@cindex comparing, with collation rules, ignoring normalization +@deftypefun {char *} u8_normxfrm (const uint8_t *@var{s}, size_t @var{n}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_normxfrm (const uint16_t *@var{s}, size_t @var{n}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_normxfrm (const uint32_t *@var{s}, size_t @var{n}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts the string @var{s} of length @var{n} to a NUL-terminated byte +sequence, in such a way that comparing @code{u8_normxfrm (@var{s1})} and +@code{u8_normxfrm (@var{s2})} with the @code{u8_cmp2} function is equivalent to +comparing @var{s1} and @var{s2} with the @code{u8_normcoll} function. + +@var{nf} must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. +@end deftypefun + +@deftypefun int u8_normcoll (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_normcoll (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_normcoll (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in normalization, using +the collation rules of the current locale. + +@var{nf} must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@node Normalization of streams +@section Normalization of streams of Unicode characters + +@cindex stream, normalizing a +A ``stream of Unicode characters'' is essentially a function that accepts an +@code{ucs4_t} argument repeatedly, optionally combined with a function that +``flushes'' the stream. + +@deftp Type {struct uninorm_filter} +This is the data type of a stream of Unicode characters that normalizes its +input according to a given normalization form and passes the normalized +character sequence to the encapsulated stream of Unicode characters. +@end deftp + +@deftypefun {struct uninorm_filter *} uninorm_filter_create (uninorm_t @var{nf}, int (*@var{stream_func}) (void *@var{stream_data}, ucs4_t @var{uc}), void *@var{stream_data}) +Creates and returns a normalization filter for Unicode characters. + +The pair (@var{stream_func}, @var{stream_data}) is the encapsulated stream. +@code{@var{stream_func} (@var{stream_data}, @var{uc})} receives the Unicode +character @var{uc} and returns 0 if successful, or -1 with @code{errno} set +upon failure. + +Returns the new filter, or NULL with @code{errno} set upon failure. +@end deftypefun + +@deftypefun int uninorm_filter_write (struct uninorm_filter *@var{filter}, ucs4_t @var{uc}) +Stuffs a Unicode character into a normalizing filter. +Returns 0 if successful, or -1 with @code{errno} set upon failure. +@end deftypefun + +@deftypefun int uninorm_filter_flush (struct uninorm_filter *@var{filter}) +Brings data buffered in the filter to its destination, the encapsulated stream. + +Returns 0 if successful, or -1 with @code{errno} set upon failure. + +Note! If after calling this function, additional characters are written +into the filter, the resulting character sequence in the encapsulated stream +will not necessarily be normalized. +@end deftypefun + +@deftypefun int uninorm_filter_free (struct uninorm_filter *@var{filter}) +Brings data buffered in the filter to its destination, the encapsulated stream, +then closes and frees the filter. + +Returns 0 if successful, or -1 with @code{errno} set upon failure. +@end deftypefun diff --git a/doc/uniregex.texi b/doc/uniregex.texi new file mode 100644 index 00000000..ae290ffa --- /dev/null +++ b/doc/uniregex.texi @@ -0,0 +1,5 @@ +@node uniregex.h +@chapter Regular expressions @code{} + +@cindex regular expression +This include file is not yet implemented. diff --git a/doc/unistdio.texi b/doc/unistdio.texi new file mode 100644 index 00000000..e1fb9cfa --- /dev/null +++ b/doc/unistdio.texi @@ -0,0 +1,197 @@ +@node unistdio.h +@chapter Output with Unicode strings @code{} + +@cindex formatted output +@cindex output, formatted +This include file declares functions for doing formatted output with Unicode +strings. It defines a set of functions similar to @code{fprintf} and +@code{sprintf}, which are declared in @code{}. + +These functions work like the @code{printf} function family. +In the format string: +@itemize +@item +The format directive @samp{U} takes an UTF-8 string (@code{const uint8_t *}). +@item +The format directive @samp{lU} takes an UTF-16 string +(@code{const uint16_t *}). +@item +The format directive @samp{llU} takes an UTF-32 string +(@code{const uint32_t *}). +@end itemize + +A function name with an infix @samp{v} indicates that a @code{va_list} is +passed instead of multiple arguments. + +The functions @code{*sprintf} have a @var{buf} argument that is assumed to be +large enough. +(@emph{DANGEROUS! Overflowing the buffer will crash the program.}) + +The functions @code{*snprintf} have a @var{buf} argument that is assumed to be +@var{size} units large. (@emph{DANGEROUS! The resulting string might be +truncated in the middle of a multibyte character.}) + +The functions @code{*asprintf} have a @var{resultp} argument. The result will +be freshly allocated and stored in @code{*resultp}. + +The functions @code{*asnprintf} have a (@var{resultbuf}, @var{lengthp}) +argument pair. If @var{resultbuf} is not NULL and the result fits into +@code{*@var{lengthp}} units, it is put in @var{resultbuf}, and @var{resultbuf} +is returned. Otherwise, a freshly allocated string is returned. In both +cases, @code{*@var{lengthp}} is set to the length (number of units) of the +returned string. In case of error, NULL is returned and @code{errno} is set. + +The following functions take an ASCII format string and return a result that +is a @code{char *} string in locale encoding. + +@deftypefun int ulc_sprintf (char *@var{buf}, const char *@var{format}, ...) +@end deftypefun + +@deftypefun int ulc_snprintf (char *@var{buf}, size_t size, const char *@var{format}, ...) +@end deftypefun + +@deftypefun int ulc_asprintf (char **@var{resultp}, const char *@var{format}, ...) +@end deftypefun + +@deftypefun {char *} ulc_asnprintf (char *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun + +@deftypefun int ulc_vsprintf (char *@var{buf}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +@deftypefun int ulc_vsnprintf (char *@var{buf}, size_t size, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +@deftypefun int ulc_vasprintf (char **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +@deftypefun {char *} ulc_vasnprintf (char *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and return a result in +UTF-8 format. + +@deftypefun int u8_sprintf (uint8_t *@var{buf}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u8_snprintf (uint8_t *@var{buf}, size_t @var{size}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u8_asprintf (uint8_t **@var{resultp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun {uint8_t *} u8_asnprintf (uint8_t *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u8_vsprintf (uint8_t *@var{buf}, const char *@var{format}, va_list ap) +@end deftypefun +@deftypefun int u8_vsnprintf (uint8_t *@var{buf}, size_t @var{size}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u8_vasprintf (uint8_t **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint8_t *} u8_vasnprintf (uint8_t *resultbuf, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an UTF-8 format string and return a result in +UTF-8 format. + +@deftypefun int u8_u8_sprintf (uint8_t *@var{buf}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun int u8_u8_snprintf (uint8_t *@var{buf}, size_t @var{size}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun int u8_u8_asprintf (uint8_t **@var{resultp}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun {uint8_t *} u8_u8_asnprintf (uint8_t *resultbuf, size_t *@var{lengthp}, const uint8_t *@var{format}, ...) +@end deftypefun +@deftypefun int u8_u8_vsprintf (uint8_t *@var{buf}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u8_u8_vsnprintf (uint8_t *@var{buf}, size_t @var{size}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u8_u8_vasprintf (uint8_t **@var{resultp}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint8_t *} u8_u8_vasnprintf (uint8_t *resultbuf, size_t *@var{lengthp}, const uint8_t *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and return a result in +UTF-16 format. + +@deftypefun int u16_sprintf (uint16_t *@var{buf}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u16_snprintf (uint16_t *@var{buf}, size_t @var{size}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u16_asprintf (uint16_t **@var{resultp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun {uint16_t *} u16_asnprintf (uint16_t *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u16_vsprintf (uint16_t *@var{buf}, const char *@var{format}, va_list ap) +@end deftypefun +@deftypefun int u16_vsnprintf (uint16_t *@var{buf}, size_t @var{size}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u16_vasprintf (uint16_t **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint16_t *} u16_vasnprintf (uint16_t *resultbuf, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an UTF-16 format string and return a result in +UTF-16 format. + +@deftypefun int u16_u16_sprintf (uint16_t *@var{buf}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun int u16_u16_snprintf (uint16_t *@var{buf}, size_t @var{size}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun int u16_u16_asprintf (uint16_t **@var{resultp}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun {uint16_t *} u16_u16_asnprintf (uint16_t *resultbuf, size_t *@var{lengthp}, const uint16_t *@var{format}, ...) +@end deftypefun +@deftypefun int u16_u16_vsprintf (uint16_t *@var{buf}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u16_u16_vsnprintf (uint16_t *@var{buf}, size_t @var{size}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u16_u16_vasprintf (uint16_t **@var{resultp}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint16_t *} u16_u16_vasnprintf (uint16_t *resultbuf, size_t *@var{lengthp}, const uint16_t *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and return a result in +UTF-32 format. + +@deftypefun int u32_sprintf (uint32_t *@var{buf}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u32_snprintf (uint32_t *@var{buf}, size_t @var{size}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u32_asprintf (uint32_t **@var{resultp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun {uint32_t *} u32_asnprintf (uint32_t *@var{resultbuf}, size_t *@var{lengthp}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int u32_vsprintf (uint32_t *@var{buf}, const char *@var{format}, va_list ap) +@end deftypefun +@deftypefun int u32_vsnprintf (uint32_t *@var{buf}, size_t @var{size}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u32_vasprintf (uint32_t **@var{resultp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint32_t *} u32_vasnprintf (uint32_t *resultbuf, size_t *@var{lengthp}, const char *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an UTF-32 format string and return a result in +UTF-32 format. + +@deftypefun int u32_u32_sprintf (uint32_t *@var{buf}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun int u32_u32_snprintf (uint32_t *@var{buf}, size_t @var{size}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun int u32_u32_asprintf (uint32_t **@var{resultp}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun {uint32_t *} u32_u32_asnprintf (uint32_t *resultbuf, size_t *@var{lengthp}, const uint32_t *@var{format}, ...) +@end deftypefun +@deftypefun int u32_u32_vsprintf (uint32_t *@var{buf}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u32_u32_vsnprintf (uint32_t *@var{buf}, size_t @var{size}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun int u32_u32_vasprintf (uint32_t **@var{resultp}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun +@deftypefun {uint32_t *} u32_u32_vasnprintf (uint32_t *resultbuf, size_t *@var{lengthp}, const uint32_t *@var{format}, va_list @var{ap}) +@end deftypefun + +The following functions take an ASCII format string and produce output in +locale encoding to a @code{FILE} stream. + +@deftypefun int ulc_fprintf (FILE *@var{stream}, const char *@var{format}, ...) +@end deftypefun +@deftypefun int ulc_vfprintf (FILE *@var{stream}, const char *@var{format}, va_list @var{ap}) +@end deftypefun diff --git a/doc/unistr.texi b/doc/unistr.texi new file mode 100644 index 00000000..9c6261b2 --- /dev/null +++ b/doc/unistr.texi @@ -0,0 +1,493 @@ +@node unistr.h +@chapter Elementary Unicode string functions @code{} + +This include file declares elementary functions for Unicode strings. It is +essentially the equivalent of what @code{} is for C strings. + +@menu +* Elementary string checks:: +* Elementary string conversions:: +* Elementary string functions:: +* Elementary string functions with memory allocation:: +* Elementary string functions on NUL terminated strings:: +@end menu + +@node Elementary string checks +@section Elementary string checks + +@cindex validity +@cindex verification +The following function is available to verify the integrity of a Unicode string. + +@deftypefun {const uint8_t *} u8_check (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx {const uint16_t *} u16_check (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx {const uint32_t *} u32_check (const uint32_t *@var{s}, size_t @var{n}) +This function checks whether a Unicode string is well-formed. +It returns NULL if valid, or a pointer to the first invalid unit otherwise. +@end deftypefun + +@node Elementary string conversions +@section Elementary string conversions + +@cindex converting +The following functions perform conversions between the different forms of Unicode strings. + +@deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-8 string to an UTF-16 string. +@end deftypefun + +@deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-8 string to an UTF-32 string. +@end deftypefun + +@deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-16 string to an UTF-8 string. +@end deftypefun + +@deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-16 string to an UTF-32 string. +@end deftypefun + +@deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-32 string to an UTF-8 string. +@end deftypefun + +@deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an UTF-32 string to an UTF-16 string. +@end deftypefun + +@node Elementary string functions +@section Elementary string functions + +@cindex iterating +The following functions inspect and return details about the first character +in a Unicode string. + +@deftypefun int u8_mblen (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mblen (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mblen (const uint32_t *@var{s}, size_t @var{n}) +Returns the length (number of units) of the first character in @var{s}, which +is no longer than @var{n}. Returns 0 if it is the NUL character. Returns -1 +upon failure. + +This function is similar to @posixfunc{mblen}, except that it operates on a +Unicode string and that @var{s} must not be NULL. +@end deftypefun + +@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +Returns the length (number of units) of the first character in @var{s}, +putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, +@code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units +is returned. + +The number of available units, @var{n}, must be > 0. + +This function is similar to @posixfunc{mbtowc}, except that it operates on a +Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0, +and the NUL character is not treated specially. +@end deftypefun + +@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +This function is like @code{u8_mbtouc_unsafe}, except that it will detect an +invalid UTF-8 character, even if the library is compiled without +@option{--enable-safety}. +@end deftypefun + +@deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx int u16_mbtoucr (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx int u32_mbtoucr (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) +Returns the length (number of units) of the first character in @var{s}, +putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, +@code{*@var{puc}} is set to @code{0xfffd}, and -1 is returned for an invalid +sequence of units, -2 is returned for an incomplete sequence of units. + +The number of available units, @var{n}, must be > 0. + +This function is similar to @code{u8_mbtouc}, except that the return value +gives more details about the failure, similar to @posixfunc{mbrtowc}. +@end deftypefun + +The following function stores a Unicode character as a Unicode string in +memory. + +@deftypefun int u8_uctomb (uint8_t *@var{s}, ucs4_t @var{uc}, int @var{n}) +@deftypefunx int u16_uctomb (uint16_t *@var{s}, ucs4_t @var{uc}, int @var{n}) +@deftypefunx int u32_uctomb (uint32_t *@var{s}, ucs4_t @var{uc}, int @var{n}) +Puts the multibyte character represented by @var{uc} in @var{s}, returning its +length. Returns -1 upon failure, -2 if the number of available units, @var{n}, +is too small. The latter case cannot occur if @var{n} >= 6/2/1, respectively. + +This function is similar to @posixfunc{wctomb}, except that it operates on a +Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be +specified. +@end deftypefun + +@cindex copying +The following functions copy Unicode strings in memory. + +@deftypefun {uint8_t *} u8_cpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_cpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_cpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies @var{n} units from @var{src} to @var{dest}. + +This function is similar to @posixfunc{memcpy}, except that it operates on +Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_move (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_move (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_move (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies @var{n} units from @var{src} to @var{dest}, guaranteeing correct +behavior for overlapping memory areas. + +This function is similar to @posixfunc{memmove}, except that it operates on +Unicode strings. +@end deftypefun + +The following function fills a Unicode string. + +@deftypefun {uint8_t *} u8_set (uint8_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_set (uint16_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_set (uint32_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) +Sets the first @var{n} characters of @var{s} to @var{uc}. @var{uc} should be +a character that occupies only 1 unit. + +This function is similar to @posixfunc{memset}, except that it operates on +Unicode strings. +@end deftypefun + +@cindex comparing +The following function compares two Unicode strings of the same length. + +@deftypefun int u8_cmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) +@deftypefunx int u16_cmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) +@deftypefunx int u32_cmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) +Compares @var{s1} and @var{s2}, each of length @var{n}, lexicographically. +Returns a negative value if @var{s1} compares smaller than @var{s2}, +a positive value if @var{s1} compares larger than @var{s2}, or 0 if +they compare equal. + +This function is similar to @posixfunc{memcmp}, except that it operates on +Unicode strings. +@end deftypefun + +The following function compares two Unicode strings of possibly different +lengths. + +@deftypefun int u8_cmp2 (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}) +@deftypefunx int u16_cmp2 (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}) +@deftypefunx int u32_cmp2 (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}) +Compares @var{s1} and @var{s2}, lexicographically. +Returns a negative value if @var{s1} compares smaller than @var{s2}, +a positive value if @var{s1} compares larger than @var{s2}, or 0 if +they compare equal. + +This function is similar to the gnulib function @func{memcmp2}, except that it +operates on Unicode strings. +@end deftypefun + +@cindex searching, for a character +The following function searches for a given Unicode character. + +@deftypefun {uint8_t *} u8_chr (const uint8_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) +@deftypefunx {uint16_t *} u16_chr (const uint16_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) +@deftypefunx {uint32_t *} u32_chr (const uint32_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) +Searches the string at @var{s} for @var{uc}. Returns a pointer to the first +occurrence of @var{uc} in @var{s}, or NULL if @var{uc} does not occur in +@var{s}. + +This function is similar to @posixfunc{memchr}, except that it operates on +Unicode strings. +@end deftypefun + +@cindex counting +The following function counts the number of Unicode characters. + +@deftypefun size_t u8_mbsnlen (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx size_t u16_mbsnlen (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx size_t u32_mbsnlen (const uint32_t *@var{s}, size_t @var{n}) +Counts and returns the number of Unicode characters in the @var{n} units +from @var{s}. + +This function is similar to the gnulib function @func{mbsnlen}, except that +it operates on Unicode strings. +@end deftypefun + +@node Elementary string functions with memory allocation +@section Elementary string functions with memory allocation + +@cindex duplicating +The following function copies a Unicode string. + +@deftypefun {uint8_t *} u8_cpy_alloc (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_cpy_alloc (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_cpy_alloc (const uint32_t *@var{s}, size_t @var{n}) +Makes a freshly allocated copy of @var{s}, of length @var{n}. +@end deftypefun + +@node Elementary string functions on NUL terminated strings +@section Elementary string functions on NUL terminated strings + +The following functions inspect and return details about the first character +in a Unicode string. + +@deftypefun int u8_strmblen (const uint8_t *@var{s}) +@deftypefunx int u16_strmblen (const uint16_t *@var{s}) +@deftypefunx int u32_strmblen (const uint32_t *@var{s}) +Returns the length (number of units) of the first character in @var{s}. +Returns 0 if it is the NUL character. Returns -1 upon failure. +@end deftypefun + +@cindex iterating +@deftypefun int u8_strmbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}) +@deftypefunx int u16_strmbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}) +@deftypefunx int u32_strmbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}) +Returns the length (number of units) of the first character in @var{s}, +putting its @code{ucs4_t} representation in @code{*@var{puc}}. Returns 0 +if it is the NUL character. Returns -1 upon failure. +@end deftypefun + +@deftypefun {const uint8_t *} u8_next (ucs4_t *@var{puc}, const uint8_t *@var{s}) +@deftypefunx {const uint16_t *} u16_next (ucs4_t *@var{puc}, const uint16_t *@var{s}) +@deftypefunx {const uint32_t *} u32_next (ucs4_t *@var{puc}, const uint32_t *@var{s}) +Forward iteration step. Advances the pointer past the next character, +or returns NULL if the end of the string has been reached. Puts the +character's @code{ucs4_t} representation in @code{*@var{puc}}. +@end deftypefun + +The following function inspects and returns details about the previous +character in a Unicode string. + +@deftypefun {const uint8_t *} u8_prev (ucs4_t *@var{puc}, const uint8_t *@var{s}, const uint8_t *@var{start}) +@deftypefunx {const uint16_t *} u16_prev (ucs4_t *@var{puc}, const uint16_t *@var{s}, const uint16_t *@var{start}) +@deftypefunx {const uint32_t *} u32_prev (ucs4_t *@var{puc}, const uint32_t *@var{s}, const uint32_t *@var{start}) +Backward iteration step. Advances the pointer to point to the previous +character, or returns NULL if the beginning of the string had been reached. +Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}. +@end deftypefun + +The following functions determine the length of a Unicode string. + +@deftypefun size_t u8_strlen (const uint8_t *@var{s}) +@deftypefunx size_t u16_strlen (const uint16_t *@var{s}) +@deftypefunx size_t u32_strlen (const uint32_t *@var{s}) +Returns the number of units in @var{s}. + +This function is similar to @posixfunc{strlen} and @posixfunc{wcslen}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun size_t u8_strnlen (const uint8_t *@var{s}, size_t @var{maxlen}) +@deftypefunx size_t u16_strnlen (const uint16_t *@var{s}, size_t @var{maxlen}) +@deftypefunx size_t u32_strnlen (const uint32_t *@var{s}, size_t @var{maxlen}) +Returns the number of units in @var{s}, but at most @var{maxlen}. + +This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex copying +The following functions copy portions of Unicode strings in memory. + +@deftypefun {uint8_t *} u8_strcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) +@deftypefunx {uint16_t *} u16_strcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) +@deftypefunx {uint32_t *} u32_strcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) +Copies @var{src} to @var{dest}. + +This function is similar to @posixfunc{strcpy} and @posixfunc{wcscpy}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_stpcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) +@deftypefunx {uint16_t *} u16_stpcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) +@deftypefunx {uint32_t *} u32_stpcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) +Copies @var{src} to @var{dest}, returning the address of the terminating NUL +in @var{dest}. + +This function is similar to @posixfunc{stpcpy}, except that it operates on +Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_strncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_strncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies no more than @var{n} units of @var{src} to @var{dest}. + +This function is similar to @posixfunc{strncpy} and @posixfunc{wcsncpy}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_stpncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_stpncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_stpncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Copies no more than @var{n} units of @var{src} to @var{dest}, returning the +address of the last unit written into @var{dest}. + +This function is similar to @posixfunc{stpncpy}, except that it operates on +Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strcat (uint8_t *@var{dest}, const uint8_t *@var{src}) +@deftypefunx {uint16_t *} u16_strcat (uint16_t *@var{dest}, const uint16_t *@var{src}) +@deftypefunx {uint32_t *} u32_strcat (uint32_t *@var{dest}, const uint32_t *@var{src}) +Appends @var{src} onto @var{dest}. + +This function is similar to @posixfunc{strcat} and @posixfunc{wcscat}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strncat (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) +@deftypefunx {uint16_t *} u16_strncat (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) +@deftypefunx {uint32_t *} u32_strncat (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) +Appends no more than @var{n} units of @var{src} onto @var{dest}. + +This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex comparing +The following functions compare two Unicode strings. + +@deftypefun int u8_strcmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}) +@deftypefunx int u16_strcmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}) +@deftypefunx int u32_strcmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}) +Compares @var{s1} and @var{s2}, lexicographically. +Returns a negative value if @var{s1} compares smaller than @var{s2}, +a positive value if @var{s1} compares larger than @var{s2}, or 0 if +they compare equal. + +This function is similar to @posixfunc{strcmp} and @posixfunc{wcscmp}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex comparing, with collation rules +@deftypefun int u8_strcoll (const uint8_t *@var{s1}, const uint8_t *@var{s2}) +@deftypefunx int u16_strcoll (const uint16_t *@var{s1}, const uint16_t *@var{s2}) +@deftypefunx int u32_strcoll (const uint32_t *@var{s1}, const uint32_t *@var{s2}) +Compares @var{s1} and @var{s2} using the collation rules of the current +locale. +Returns -1 if @var{s1} < @var{s2}, 0 if @var{s1} = @var{s2}, 1 if +@var{s1} > @var{s2}. Upon failure, sets @code{errno} and returns any value. + +This function is similar to @posixfunc{strcoll} and @posixfunc{wcscoll}, except +that it operates on Unicode strings. + +Note that this function may consider different canonical normalizations +of the same string as having a large distance. It is therefore better to +use the function @code{u8_normcoll} instead of this one; see @ref{uninorm.h}. +@end deftypefun + +@deftypefun int u8_strncmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) +@deftypefunx int u16_strncmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) +@deftypefunx int u32_strncmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) +Compares no more than @var{n} units of @var{s1} and @var{s2}. + +This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex duplicating +The following function allocates a duplicate of a Unicode string. + +@deftypefun {uint8_t *} u8_strdup (const uint8_t *@var{s}) +@deftypefunx {uint16_t *} u16_strdup (const uint16_t *@var{s}) +@deftypefunx {uint32_t *} u32_strdup (const uint32_t *@var{s}) +Duplicates @var{s}, returning an identical malloc'd string. + +This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex searching, for a character +The following functions search for a given Unicode character. + +@deftypefun {uint8_t *} u8_strchr (const uint8_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint16_t *} u16_strchr (const uint16_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint32_t *} u32_strchr (const uint32_t *@var{str}, ucs4_t @var{uc}) +Finds the first occurrence of @var{uc} in @var{str}. + +This function is similar to @posixfunc{strchr} and @posixfunc{wcschr}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strrchr (const uint8_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint16_t *} u16_strrchr (const uint16_t *@var{str}, ucs4_t @var{uc}) +@deftypefunx {uint32_t *} u32_strrchr (const uint32_t *@var{str}, ucs4_t @var{uc}) +Finds the last occurrence of @var{uc} in @var{str}. + +This function is similar to @posixfunc{strrchr} and @posixfunc{wcsrchr}, except +that it operates on Unicode strings. +@end deftypefun + +The following functions search for the first occurrence of some Unicode +character in or outside a given set of Unicode characters. + +@deftypefun size_t u8_strcspn (const uint8_t *@var{str}, const uint8_t *@var{reject}) +@deftypefunx size_t u16_strcspn (const uint16_t *@var{str}, const uint16_t *@var{reject}) +@deftypefunx size_t u32_strcspn (const uint32_t *@var{str}, const uint32_t *@var{reject}) +Returns the length of the initial segment of @var{str} which consists entirely +of Unicode characters not in @var{reject}. + +This function is similar to @posixfunc{strcspn} and @posixfunc{wcscspn}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun size_t u8_strspn (const uint8_t *@var{str}, const uint8_t *@var{accept}) +@deftypefunx size_t u16_strspn (const uint16_t *@var{str}, const uint16_t *@var{accept}) +@deftypefunx size_t u32_strspn (const uint32_t *@var{str}, const uint32_t *@var{accept}) +Returns the length of the initial segment of @var{str} which consists entirely +of Unicode characters in @var{accept}. + +This function is similar to @posixfunc{strspn} and @posixfunc{wcsspn}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun {uint8_t *} u8_strpbrk (const uint8_t *@var{str}, const uint8_t *@var{accept}) +@deftypefunx {uint16_t *} u16_strpbrk (const uint16_t *@var{str}, const uint16_t *@var{accept}) +@deftypefunx {uint32_t *} u32_strpbrk (const uint32_t *@var{str}, const uint32_t *@var{accept}) +Finds the first occurrence in @var{str} of any character in @var{accept}. + +This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except +that it operates on Unicode strings. +@end deftypefun + +@cindex searching, for a substring +The following functions search whether a given Unicode string is a substring +of another Unicode string. + +@deftypefun {uint8_t *} u8_strstr (const uint8_t *@var{haystack}, const uint8_t *@var{needle}) +@deftypefunx {uint16_t *} u16_strstr (const uint16_t *@var{haystack}, const uint16_t *@var{needle}) +@deftypefunx {uint32_t *} u32_strstr (const uint32_t *@var{haystack}, const uint32_t *@var{needle}) +Finds the first occurrence of @var{needle} in @var{haystack}. + +This function is similar to @posixfunc{strstr} and @posixfunc{wcsstr}, except +that it operates on Unicode strings. +@end deftypefun + +@deftypefun bool u8_startswith (const uint8_t *@var{str}, const uint8_t *@var{prefix}) +@deftypefunx bool u16_startswith (const uint16_t *@var{str}, const uint16_t *@var{prefix}) +@deftypefunx bool u32_startswith (const uint32_t *@var{str}, const uint32_t *@var{prefix}) +Tests whether @var{str} starts with @var{prefix}. +@end deftypefun + +@deftypefun bool u8_endswith (const uint8_t *@var{str}, const uint8_t *@var{suffix}) +@deftypefunx bool u16_endswith (const uint16_t *@var{str}, const uint16_t *@var{suffix}) +@deftypefunx bool u32_endswith (const uint32_t *@var{str}, const uint32_t *@var{suffix}) +Tests whether @var{str} ends with @var{suffix}. +@end deftypefun + +The following function does one step in tokenizing a Unicode string. + +@deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr}) +@deftypefunx {uint16_t *} u16_strtok (uint16_t *@var{str}, const uint16_t *@var{delim}, uint16_t **@var{ptr}) +@deftypefunx {uint32_t *} u32_strtok (uint32_t *@var{str}, const uint32_t *@var{delim}, uint32_t **@var{ptr}) +Divides @var{str} into tokens separated by characters in @var{delim}. + +This function is similar to @posixfunc{strtok_r} and @posixfunc{wcstok}, except +that it operates on Unicode strings. Its interface is actually more similar to +@code{wcstok} than to @code{strtok}. +@end deftypefun diff --git a/doc/unitypes.texi b/doc/unitypes.texi new file mode 100644 index 00000000..696ba881 --- /dev/null +++ b/doc/unitypes.texi @@ -0,0 +1,15 @@ +@node unitypes.h +@chapter Elementary types @code{} + +The include file @code{} provides the following basic types. + +@deftp Type uint8_t +@deftpx Type uint16_t +@deftpx Type uint32_t +These are the storage units of UTF-8/16/32 strings, respectively. The definitions are +taken from @code{}, on platforms where this include file is present. +@end deftp + +@deftp Type ucs4_t +This type represents a single Unicode character, outside of an UTF-32 string. +@end deftp diff --git a/doc/uniwbrk.texi b/doc/uniwbrk.texi new file mode 100644 index 00000000..6f06b926 --- /dev/null +++ b/doc/uniwbrk.texi @@ -0,0 +1,71 @@ +@node uniwbrk.h +@chapter Word breaks in strings @code{} + +@cindex word breaks +@cindex breaks, word +This include file declares functions for determining where in a string +``words'' start and end. Here ``words'' are not necessarily the same as +entities that can be looked up in dictionaries, but rather groups of +consecutive characters that should not be split by text processing +operations. + +@menu +* Word breaks in a string:: +* Word break property:: +@end menu + +@node Word breaks in a string +@section Word breaks in a string + +The following functions determine the word breaks in a string. + +@deftypefun void u8_wordbreaks (const uint8_t *@var{s}, size_t @var{n}, char *@var{p}) +@deftypefunx void u16_wordbreaks (const uint16_t *@var{s}, size_t @var{n}, char *@var{p}) +@deftypefunx void u32_wordbreaks (const uint32_t *@var{s}, size_t @var{n}, char *@var{p}) +@deftypefunx void ulc_wordbreaks (const char *@var{s}, size_t @var{n}, char *@var{p}) +Determines the word break points in @var{s}, an array of @var{n} units, and +stores the result at @code{@var{p}[0..@var{n}-1]}. +@table @asis +@item @code{@var{p}[i] = 1} +means that there is a word boundary between @code{@var{s}[i-1]} and +@code{@var{s}[i]}. +@item @code{@var{p}[i] = 0} +means that @code{@var{s}[i-1]} and @code{@var{s}[i]} must not be separated. +@end table +@code{@var{p}[0]} is always set to 0. If an application wants to consider a +word break to be present at the beginning of the string (before +@code{@var{s}[0]}) or at the end of the string (after +@code{@var{s}[0..@var{n}-1]}), it has to treat these cases explicitly. +@end deftypefun + +@node Word break property +@section Word break property + +This is a more low-level API. The word break property is a property defined +in Unicode Standard Annex #29, section ``Word Boundaries'', see +@url{http://www.unicode.org/reports/tr29/#Word_Boundaries}.@texnl{} It is +used for determining the word breaks in a string. + +The following are the possible values of the word break property. More values +may be added in the future. + +@deftypevr Constant int WBP_OTHER +@deftypevrx Constant int WBP_CR +@deftypevrx Constant int WBP_LF +@deftypevrx Constant int WBP_NEWLINE +@deftypevrx Constant int WBP_EXTEND +@deftypevrx Constant int WBP_FORMAT +@deftypevrx Constant int WBP_KATAKANA +@deftypevrx Constant int WBP_ALETTER +@deftypevrx Constant int WBP_MIDNUMLET +@deftypevrx Constant int WBP_MIDLETTER +@deftypevrx Constant int WBP_MIDNUM +@deftypevrx Constant int WBP_NUMERIC +@deftypevrx Constant int WBP_EXTENDNUMLET +@end deftypevr + +The following function looks up the word break property of a character. + +@deftypefun int uc_wordbreak_property (ucs4_t @var{uc}) +Returns the Word_Break property of a Unicode character. +@end deftypefun diff --git a/doc/uniwidth.texi b/doc/uniwidth.texi new file mode 100644 index 00000000..a05d101d --- /dev/null +++ b/doc/uniwidth.texi @@ -0,0 +1,43 @@ +@node uniwidth.h +@chapter Display width @code{} + +@cindex width +This include file declares functions that return the display width, measured +in columns, of characters or strings, when output to a device that uses +non-proportional fonts. + +@cindex ambiguous width +Note that for some rarely used characters the actual fonts or terminal +emulators can use a different width. There is no mechanism for communicating +the display width of characters across a Unix pseudo-terminal (tty). Also, +there are scripts with complex rendering, like the Indic scripts. For these +scripts, there is no such concept as non-proportional fonts. Therefore +the results of these functions usually work fine on most scripts and on +most characters but can fail to represent the actual display width. + +These functions are locale dependent. The @var{encoding} argument identifies +the encoding (e.g@. @code{"ISO-8859-2"} for Polish). + +@cindex Unicode character, width +@cindex halfwidth +@cindex fullwidth +@deftypefun int uc_width (ucs4_t @var{uc}, const char *@var{encoding}) +Determines and returns the number of column positions required for @var{uc}. +Returns -1 if @var{uc} is a control character that has an influence on the +column position when output. +@end deftypefun + +@deftypefun int u8_width (const uint8_t *@var{s}, size_t @var{n}, const char *@var{encoding}) +@deftypefunx int u16_width (const uint16_t *@var{s}, size_t @var{n}, const char *@var{encoding}) +@deftypefunx int u32_width (const uint32_t *@var{s}, size_t @var{n}, const char *@var{encoding}) +Determines and returns the number of column positions required for first +@var{n} units (or fewer if @var{s} ends before this) in @var{s}. This +function ignores control characters in the string. +@end deftypefun + +@deftypefun int u8_strwidth (const uint8_t *@var{s}, const char *@var{encoding}) +@deftypefunx int u16_strwidth (const uint16_t *@var{s}, const char *@var{encoding}) +@deftypefunx int u32_strwidth (const uint32_t *@var{s}, const char *@var{encoding}) +Determines and returns the number of column positions required for @var{s}. +This function ignores control characters in the string. +@end deftypefun diff --git a/doc/version.texi b/doc/version.texi new file mode 100644 index 00000000..ee4ba906 --- /dev/null +++ b/doc/version.texi @@ -0,0 +1,4 @@ +@set UPDATED 29 June 2009 +@set UPDATED-MONTH June 2009 +@set EDITION 0.9.1 +@set VERSION 0.9.1 -- cgit v1.2.3