diff options
author | Andreas Rottmann <a.rottmann@gmx.at> | 2009-09-14 12:32:44 +0200 |
---|---|---|
committer | Andreas Rottmann <a.rottmann@gmx.at> | 2009-09-14 12:32:44 +0200 |
commit | fa095a4504cbe668e4244547e2c141597bea4ecf (patch) | |
tree | 06135820a286ffec47804e75fbf8a147e92acd2e /doc/uniconv.texi |
Imported Upstream version 0.9.1upstream/0.9.1
Diffstat (limited to 'doc/uniconv.texi')
-rw-r--r-- | doc/uniconv.texi | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/doc/uniconv.texi b/doc/uniconv.texi new file mode 100644 index 00000000..07cfa1be --- /dev/null +++ b/doc/uniconv.texi @@ -0,0 +1,157 @@ +@node uniconv.h +@chapter Conversions between Unicode and encodings @code{<uniconv.h>} + +This include file declares functions for converting between Unicode strings +and @code{char *} strings in locale encoding or in other specified encodings. + +@cindex locale encoding +The following function returns the locale encoding. + +@deftypefun {const char *} locale_charset () +Determines the current locale's character encoding, and canonicalizes it +into one of the canonical names listed in @file{config.charset}. +If the canonical name cannot be determined, the result is a non-canonical +name. + +The result must not be freed; it is statically allocated. + +The result of this function can be used as an argument to the @code{iconv_open} +function in GNU libc, in GNU libiconv, or in the gnulib provided wrapper +around the native @code{iconv_open} function. It may not work as an argument +to the native @code{iconv_open} function directly. +@end deftypefun + +The handling of unconvertible characters during the conversions can be +parametrized through the following enumeration type: + +@deftp Type {enum iconv_ilseq_handler} +This type specifies how unconvertible characters in the input are handled. +@end deftp + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_error +This handler causes the function to return with @code{errno} set to +@code{EILSEQ}. +@end deftypevr + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_question_mark +This handler produces one question mark @samp{?} per unconvertible character. +@end deftypevr + +@deftypevr Constant {enum iconv_ilseq_handler} iconveh_escape_sequence +This handler produces an escape sequence @code{\u@var{xxxx}} or +@code{\U@var{xxxxxxxx}} for each unconvertible character. +@end deftypevr + +@cindex converting +The following functions convert between strings in a specified encoding and +Unicode strings. + +@deftypefun {uint8_t *} u8_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_conv_from_encoding (const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}, const char *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Converts an entire string, possibly including NUL bytes, from one encoding +to UTF-8 encoding. + +Converts a memory region given in encoding @var{fromcode}. @var{fromcode} is +as for the @code{iconv_open} function. + +The input is in the memory region between @var{src} (inclusive) and +@code{@var{src} + @var{srclen}} (exclusive). + +If @var{offsets} is not NULL, it should point to an array of @var{srclen} +integers; this array is filled with offsets into the result, i.e@. the +character starting at @code{@var{src}[i]} corresponds to the character starting +at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to +@code{(size_t)(-1)}. + +@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch +buffer and its size, or @code{@var{resultbuf}} can be NULL. + +May erase the contents of the memory at @code{@var{resultbuf}}. + +If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in @code{*@var{lengthp}}. The resulting string is +@code{@var{resultbuf}} if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. + +In case of error: NULL is returned and @code{errno} is set. +Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint8_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint16_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_conv_to_encoding (const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}, const uint32_t *@var{src}, size_t @var{srclen}, size_t *@var{offsets}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts an entire Unicode string, possibly including NUL units, from UTF-8 +encoding to a given encoding. + +Converts a memory region to encoding @var{tocode}. @var{tocode} is as for +the @code{iconv_open} function. + +The input is in the memory region between @var{src} (inclusive) and +@code{@var{src} + @var{srclen}} (exclusive). + +If @var{offsets} is not NULL, it should point to an array of @var{srclen} +integers; this array is filled with offsets into the result, i.e@. the +character starting at @code{@var{src}[i]} corresponds to the character starting +at @code{@var{result}[@var{offsets}[i]]}, and other offsets are set to +@code{(size_t)(-1)}. + +@code{@var{resultbuf}} and @code{*@var{lengthp}} should be a scratch +buffer and its size, or @code{@var{resultbuf}} can be NULL. + +May erase the contents of the memory at @code{@var{resultbuf}}. + +If successful: The resulting Unicode string (non-NULL) is returned and +its length stored in @code{*@var{lengthp}}. The resulting string is +@code{@var{resultbuf}} if no dynamic memory allocation was necessary, +or a freshly allocated memory block otherwise. + +In case of error: NULL is returned and @code{errno} is set. +Particular @code{errno} values: @code{EINVAL}, @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +The following functions convert between NUL terminated strings in a specified +encoding and NUL terminated Unicode strings. + +@deftypefun {uint8_t *} u8_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {uint16_t *} u16_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {uint32_t *} u32_strconv_from_encoding (const char *@var{string}, const char *@var{fromcode}, enum iconv_ilseq_handler @var{handler}) +Converts a NUL terminated string from a given encoding. + +The result is @code{malloc} allocated, or NULL (with @var{errno} set) in case of error. + +Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_strconv_to_encoding (const uint8_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {char *} u16_strconv_to_encoding (const uint16_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +@deftypefunx {char *} u32_strconv_to_encoding (const uint32_t *@var{string}, const char *@var{tocode}, enum iconv_ilseq_handler @var{handler}) +Converts a NUL terminated string to a given encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{EILSEQ}, @code{ENOMEM}. +@end deftypefun + +The following functions are shorthands that convert between NUL terminated +strings in locale encoding and NUL terminated Unicode strings. + +@deftypefun {uint8_t *} u8_strconv_from_locale (const char *@var{string}) +@deftypefunx {uint16_t *} u16_strconv_from_locale (const char *@var{string}) +@deftypefunx {uint32_t *} u32_strconv_from_locale (const char *@var{string}) +Converts a NUL terminated string from the locale encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{ENOMEM}. +@end deftypefun + +@deftypefun {char *} u8_strconv_to_locale (const uint8_t *@var{string}) +@deftypefunx {char *} u16_strconv_to_locale (const uint16_t *@var{string}) +@deftypefunx {char *} u32_strconv_to_locale (const uint32_t *@var{string}) +Converts a NUL terminated string to the locale encoding. + +The result is @code{malloc} allocated, or NULL (with @code{errno} set) in case of error. + +Particular @code{errno} values: @code{ENOMEM}. +@end deftypefun |