diff options
| author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2020-08-08 11:53:00 +0200 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2020-08-08 11:53:00 +0200 | 
| commit | b623f5953691b2a0614e6f1f4def86bdbb9a4113 (patch) | |
| tree | 18102bd36f7e22eb2ba2b9f880e4cb29346f4cb8 /app/wlib/mswlib/utf8conv.c | |
| parent | 359b557176b9bb2ff1aed2082641eed39c358d0d (diff) | |
New upstream version 5.2.0Beta2.1upstream/5.2.0Beta2.1
Diffstat (limited to 'app/wlib/mswlib/utf8conv.c')
| -rw-r--r-- | app/wlib/mswlib/utf8conv.c | 210 | 
1 files changed, 210 insertions, 0 deletions
diff --git a/app/wlib/mswlib/utf8conv.c b/app/wlib/mswlib/utf8conv.c new file mode 100644 index 0000000..62ada76 --- /dev/null +++ b/app/wlib/mswlib/utf8conv.c @@ -0,0 +1,210 @@ +/** + * \file utf8conv.c. + * + * UTF-8 conversion functions + */ + +/*  XTrkCad - Model Railroad CAD + *  Copyright (C) 2020 Martin Fischer + * + *  This program is free software; you can redistribute it and/or modify + *  it under the terms of the GNU General Public License as published by + *  the Free Software Foundation; either version 2 of the License, or + *  (at your option) any later version. + * + *  This program is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + *  GNU General Public License for more details. + * + *  You should have received a copy of the GNU General Public License + *  along with this program; if not, write to the Free Software + *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <malloc.h> +#include <stdbool.h> +#include <string.h> + +#include <Windows.h> + +#include <wlib.h> + +/** + * Convert system codepage to UTF 8 + * + * \param 		   inString		   The input string. + * \param [in,out] outString	   The output string buffer. + * \param 		   outStringLength Length of the output buffer + * + * \returns FALSE if it fails. + */ + +bool +wSystemToUTF8(const char *inString, char *outString, unsigned outStringLength) +{ +    unsigned int cnt = 2 * (strlen(inString) + 1); +    char *tempBuffer = malloc(cnt); + +    // convert to wide character (UTF16) +    MultiByteToWideChar(CP_ACP, +                        0, +                        inString, +                        -1, +                        (LPWSTR)tempBuffer, +                        cnt); + +    // convert from wide char to UTF-8 +    WideCharToMultiByte(CP_UTF8, +                        0, +                        (LPCWCH)tempBuffer, +                        -1, +                        (LPSTR)outString, +                        outStringLength, +                        NULL, +                        NULL); + +    free(tempBuffer); +    return true; +} + +/** + * Convert from UTF-8 to system codepage + * + * \param 		   inString		   The input string. + * \param [in,out] outString	   the output string. + * \param 		   outStringLength Length of the output buffer. + * + * \returns True if it succeeds, false if it fails. + */ + +bool +wUTF8ToSystem(const char *inString, char *outString, unsigned outStringLength) +{ +    unsigned int cnt = 2 * (strlen(inString) + 1); +    char *tempBuffer = malloc(cnt); + +    // convert to wide character (UTF16) +    MultiByteToWideChar(CP_UTF8, +                        0, +                        inString, +                        -1, +                        (LPWSTR)tempBuffer, +                        cnt); + + +    cnt = WideCharToMultiByte(CP_ACP, +                              0, +                              (LPCWCH)tempBuffer, +                              -1, +                              (LPSTR)outString, +                              0L, +                              NULL, +                              NULL); + +    if (outStringLength <= cnt) { +        return (false); +    } + +    // convert from wide char to system codepage +    WideCharToMultiByte(CP_ACP, +                        0, +                        (LPCWCH)tempBuffer, +                        -1, +                        (LPSTR)outString, +                        outStringLength, +                        NULL, +                        NULL); + +    free(tempBuffer); +    return true; +} + +/** + * Is passed string in correct UTF-8 format? + * Taken from https://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c + * + * \param  string The string to check. + * + * \returns True if UTF 8, false if not. + */ + +bool wIsUTF8(const char * string) +{ +    if (!string) { +        return 0; +    } + +    const unsigned char * bytes = (const unsigned char *)string; +    while (*bytes) { +        if ((// ASCII +                    // use bytes[0] <= 0x7F to allow ASCII control characters +                    bytes[0] == 0x09 || +                    bytes[0] == 0x0A || +                    bytes[0] == 0x0D || +                    (0x20 <= bytes[0] && bytes[0] <= 0x7E) +                ) +           ) { +            bytes += 1; +            continue; +        } + +        if ((// non-overlong 2-byte +                    (0xC2 <= bytes[0] && bytes[0] <= 0xDF) && +                    (0x80 <= bytes[1] && bytes[1] <= 0xBF) +                ) +           ) { +            bytes += 2; +            continue; +        } + +        if ((// excluding overlongs +                    bytes[0] == 0xE0 && +                    (0xA0 <= bytes[1] && bytes[1] <= 0xBF) && +                    (0x80 <= bytes[2] && bytes[2] <= 0xBF) +                ) || +                (// straight 3-byte +                    ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) || +                     bytes[0] == 0xEE || +                     bytes[0] == 0xEF) && +                    (0x80 <= bytes[1] && bytes[1] <= 0xBF) && +                    (0x80 <= bytes[2] && bytes[2] <= 0xBF) +                ) || +                (// excluding surrogates +                    bytes[0] == 0xED && +                    (0x80 <= bytes[1] && bytes[1] <= 0x9F) && +                    (0x80 <= bytes[2] && bytes[2] <= 0xBF) +                ) +           ) { +            bytes += 3; +            continue; +        } + +        if ((// planes 1-3 +                    bytes[0] == 0xF0 && +                    (0x90 <= bytes[1] && bytes[1] <= 0xBF) && +                    (0x80 <= bytes[2] && bytes[2] <= 0xBF) && +                    (0x80 <= bytes[3] && bytes[3] <= 0xBF) +                ) || +                (// planes 4-15 +                    (0xF1 <= bytes[0] && bytes[0] <= 0xF3) && +                    (0x80 <= bytes[1] && bytes[1] <= 0xBF) && +                    (0x80 <= bytes[2] && bytes[2] <= 0xBF) && +                    (0x80 <= bytes[3] && bytes[3] <= 0xBF) +                ) || +                (// plane 16 +                    bytes[0] == 0xF4 && +                    (0x80 <= bytes[1] && bytes[1] <= 0x8F) && +                    (0x80 <= bytes[2] && bytes[2] <= 0xBF) && +                    (0x80 <= bytes[3] && bytes[3] <= 0xBF) +                ) +           ) { +            bytes += 4; +            continue; +        } + +        return false; +    } + +    return true; +}
\ No newline at end of file  | 
