#include <unicode/brkiter.h>
int main( void ) {
const char* iInput;
scanf("Enter the input string: %s", &iInput);
BreakIterator* boundary->setText(iInput);
int32_t iStartOffset = boundary->first();
int32_t iEndOffset = boundary->next(); ;
int32_t iStrLength = strlen(iInput);
printf("iStartOffset: %d, iEndOffset: %d, iStrLength: %d", iStartOffset, iEndOffset, iStrLength);
return 0;
}
Using the API setText() as above gives me the result in number of unicode characters. Is there any API in BreakIterator class which gives the token offsets in terms of number of bytes?