My attempts seem hacky and overly convoluted. Is there a simple way to convert ASCII to UTF16 on Windows and macOS?
(note that the prUTF16Char
I can't change )
Attempt (written via https://stackoverflow.com/a/54376330)
Prelude
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if defined(__APPLE__) && defined(__MACH__)
#include <xcselect.h>
#include <wchar.h>
#include <CoreFoundation/CoreFoundation.h>
typedef unsigned short int prUTF16Char;
#else
typedef wchar_t prUTF16Char;
#endif
#define WIDEN2(x) L ## x
#define WIDEN(x) WIDEN2(x)
#define PROJECT_NAME "foo"
Functions
void copy2ConvertStringLiteralIntoUTF16(const wchar_t* inputString, prUTF16Char* destination) {
size_t length = wcslen(inputString);
#if (defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)) && defined(PLUGIN_MODE)
wcscpy_s(destination, length + 1, inputString);
#elif defined(__APPLE__) && defined(__MACH__)
CFRange range = {0, 150}; range.length = length;
CFStringRef inputStringCFSR = CFStringCreateWithBytes(
kCFAllocatorDefault, reinterpret_cast<const UInt8 *>(inputString),
length * sizeof(wchar_t), kCFStringEncodingUTF32LE, false);
CFStringGetBytes( inputStringCFSR, range, kCFStringEncodingUTF16, 0, false,
reiterpret_cast<UInt8 *>(destination), length * (sizeof (prUTF16Char)), NULL);
destination[length] = 0; // Set NULL-terminator
CFRelease(inputStringCFSR);
#endif
}
const prUTF16Char * to_wchar(const char* message) {
const size_t cSize = strlen(message);
wchar_t *w_str = new wchar_t[cSize];
#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
size_t outSize;
mbstowcs_s(&outSize, w_str, cSize, message, cSize-1);
return w_str;
#else
mbstowcs(w_str, message, cSize);
#endif
#if defined(__APPLE__) && defined(__MACH__)
prUTF16Char *ut16str = new prUTF16Char[cSize];
copy2ConvertStringLiteralIntoUTF16(w_str, ut16str);
return ut16str;
#else
return w_str;
#endif
}
Then I can just define a global var:
static const prUTF16Char* PROJECT_NAME_W =
#if defined(__APPLE__) && defined(__MACH__)
to_wchar
#elif defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
WIDEN
#endif
(PROJECT_NAME);
And the body of a generic print function taking message
:
#if WCHAR_UTF16
wprintf(L"%s",
#else
printf("%ls\n",
#endif
message);
Full attempt:
https://github.com/SamuelMarks/premiere-pro-cmake-plugin/blob/f0d2278/src/common/logger.cpp [rewriting from C++ to C]
Error:
error: initializer element is not a compile-time constant
EDIT: Super hacky, but with @barmak-shemirani's solution I can:
#if defined(__APPLE__) && defined(__MACH__)
extern
#elif defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
static
#endif
const prUTF16Char* PROJECT_NAME_W
#if defined(__APPLE__) && defined(__MACH__)
;
#elif defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
WIDEN(PROJECT_NAME);
#endif
…and only initialise and free
on the extern
variant.
message
includes the null terminating character.strlen
does not count this last character,cSize
has to increase by1
.Usually you need to call setlocal if for example
message
was typed in non-English computer. But it's okay ifmessage
is guaranteed to be ASCII.Windows Example:
Note that
wchar_t
is 2 bytes in Windows, and 4 bytes in POSIX. Then UTF-16 has 2 different version, little-endian and big-endian. UTF-16 has 2-bytes per character for ASCII equivalent codes, but 4-bytes for some non-Latin languages.You should consider UTF-8 output. Most Windows programs are prepared to read UTF-8 from file or network.
Windows byte output for
"123"
:Linux byte output from above code (this won't be recognized as UTF-16 by Winodws):
You can write your own function if you are 100% certain that the message is ASCII