diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2020-08-22 14:05:41 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2020-08-22 14:05:41 +0200 |
commit | b55285a77da0e0b829e4ce8d7e09debaabc68e15 (patch) | |
tree | f622559ef65bbdd3e1c5bdb06098a8f89eec0563 /app/wlib/mswlib/utf8conv.c | |
parent | d3897ce090dbeb220ed2c782f095597e417cf3cc (diff) | |
parent | d1ae75703e1ed81d65ea16946dcdb77e7a13adc9 (diff) |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'app/wlib/mswlib/utf8conv.c')
-rw-r--r-- | app/wlib/mswlib/utf8conv.c | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/app/wlib/mswlib/utf8conv.c b/app/wlib/mswlib/utf8conv.c new file mode 100644 index 0000000..62ada76 --- /dev/null +++ b/app/wlib/mswlib/utf8conv.c @@ -0,0 +1,210 @@ +/** + * \file utf8conv.c. + * + * UTF-8 conversion functions + */ + +/* XTrkCad - Model Railroad CAD + * Copyright (C) 2020 Martin Fischer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <malloc.h> +#include <stdbool.h> +#include <string.h> + +#include <Windows.h> + +#include <wlib.h> + +/** + * Convert system codepage to UTF 8 + * + * \param inString The input string. + * \param [in,out] outString The output string buffer. + * \param outStringLength Length of the output buffer + * + * \returns FALSE if it fails. + */ + +bool +wSystemToUTF8(const char *inString, char *outString, unsigned outStringLength) +{ + unsigned int cnt = 2 * (strlen(inString) + 1); + char *tempBuffer = malloc(cnt); + + // convert to wide character (UTF16) + MultiByteToWideChar(CP_ACP, + 0, + inString, + -1, + (LPWSTR)tempBuffer, + cnt); + + // convert from wide char to UTF-8 + WideCharToMultiByte(CP_UTF8, + 0, + (LPCWCH)tempBuffer, + -1, + (LPSTR)outString, + outStringLength, + NULL, + NULL); + + free(tempBuffer); + return true; +} + +/** + * Convert from UTF-8 to system codepage + * + * \param inString The input string. + * \param [in,out] outString the output string. + * \param outStringLength Length of the output buffer. + * + * \returns True if it succeeds, false if it fails. + */ + +bool +wUTF8ToSystem(const char *inString, char *outString, unsigned outStringLength) +{ + unsigned int cnt = 2 * (strlen(inString) + 1); + char *tempBuffer = malloc(cnt); + + // convert to wide character (UTF16) + MultiByteToWideChar(CP_UTF8, + 0, + inString, + -1, + (LPWSTR)tempBuffer, + cnt); + + + cnt = WideCharToMultiByte(CP_ACP, + 0, + (LPCWCH)tempBuffer, + -1, + (LPSTR)outString, + 0L, + NULL, + NULL); + + if (outStringLength <= cnt) { + return (false); + } + + // convert from wide char to system codepage + WideCharToMultiByte(CP_ACP, + 0, + (LPCWCH)tempBuffer, + -1, + (LPSTR)outString, + outStringLength, + NULL, + NULL); + + free(tempBuffer); + return true; +} + +/** + * Is passed string in correct UTF-8 format? + * Taken from https://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c + * + * \param string The string to check. + * + * \returns True if UTF 8, false if not. + */ + +bool wIsUTF8(const char * string) +{ + if (!string) { + return 0; + } + + const unsigned char * bytes = (const unsigned char *)string; + while (*bytes) { + if ((// ASCII + // use bytes[0] <= 0x7F to allow ASCII control characters + bytes[0] == 0x09 || + bytes[0] == 0x0A || + bytes[0] == 0x0D || + (0x20 <= bytes[0] && bytes[0] <= 0x7E) + ) + ) { + bytes += 1; + continue; + } + + if ((// non-overlong 2-byte + (0xC2 <= bytes[0] && bytes[0] <= 0xDF) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) + ) + ) { + bytes += 2; + continue; + } + + if ((// excluding overlongs + bytes[0] == 0xE0 && + (0xA0 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) || + (// straight 3-byte + ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) || + bytes[0] == 0xEE || + bytes[0] == 0xEF) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) || + (// excluding surrogates + bytes[0] == 0xED && + (0x80 <= bytes[1] && bytes[1] <= 0x9F) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) + ) { + bytes += 3; + continue; + } + + if ((// planes 1-3 + bytes[0] == 0xF0 && + (0x90 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) || + (// planes 4-15 + (0xF1 <= bytes[0] && bytes[0] <= 0xF3) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) || + (// plane 16 + bytes[0] == 0xF4 && + (0x80 <= bytes[1] && bytes[1] <= 0x8F) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) + ) { + bytes += 4; + continue; + } + + return false; + } + + return true; +}
\ No newline at end of file |