I try to make cross-platform locale aware sorting with ICU library, but result doesn't match with POSIX locale aware sorting.
For example, simple code on C with ICU:
#include <stdio.h>
#include <unicode/ustring.h>
#include <unicode/ucol.h>
int main() {
UErrorCode status = U_ZERO_ERROR;
UChar u_a[2], u_A[2];
u_strFromUTF8(u_a, 2, NULL, "a", -1, &status);
if (U_FAILURE(status)) {
printf("failure: %s\n", u_errorName(status));
}
u_strFromUTF8(u_A, 2, NULL, "A", -1, &status);
if (U_FAILURE(status)) {
printf("failure: %s\n", u_errorName(status));
}
const char *localeList[] = {"en_US", "en_CA"};
int localeListLen = sizeof(localeList) / sizeof(localeList[0]);
int i;
for (i=0; i<localeListLen; i++) {
char localeId[64];
sprintf(localeId, "%s@collation=posix", localeList[i]);
UCollator *collator = ucol_open(localeId, &status);
if (U_SUCCESS(status)) {
UCollationResult result = ucol_strcoll(collator, u_a, -1, u_A, -1);
printf("%s: ", localeList[i]);
if (result == UCOL_LESS) {
printf("%s %s\n", u_a, u_A);
}
else {
printf("%s %s\n", u_A, u_a);
}
ucol_close(collator);
}
else {
printf("failure: %s\n", u_errorName(status));
}
}
}
Result to:
> gcc `pkg-config --libs --cflags icu-uc icu-io` -o ucol_tst ucol_tst.c && ./ucol_tst
en_US: a A
en_CA: a A
And simple code on Python, using POSIX:
#!/usr/bin/env python3
import locale
if __name__ == '__main__':
ar = ('a', 'A')
locale_list = ('en_US', 'en_CA')
for locale_id in locale_list:
locale.setlocale(locale.LC_ALL, locale_id)
print("{}: {}".format(locale_id, sorted(ar, key=locale.strxfrm)))
Result to:
> python3 col_tst.py
en_US: ['a', 'A']
en_CA: ['A', 'a']
Why sorting result for en_CA locale with ICU and POSIX doesn't match? I guess that incorrectly using the ICU library, can anyone show the correct use?