Struct UTF8Decoder
UTF-8 decoder to use with dlib.text.encodings.transcode
Fields
Name | Type | Description |
character
|
int | Current character index
|
index
|
ulong | Current index in an input string
|
input
|
string | Input string. Set it before decoding
|
Methods
Name | Description |
decode
()
|
Range interface.
|
decodeNext
()
|
Decode next character.
|
eos
()
|
Check if decoder is in the end of input.
|
Example
{
auto decoder = UTF8Decoder("Eng 日本語 Кир ©€\xF0\x90\x8D\x88");
assert(decoder.decodeNext() == 'E');
assert(decoder.decodeNext() == 'n');
assert(decoder.decodeNext() == 'g');
assert(decoder.decodeNext() == ' ');
assert(decoder.decodeNext() == '日');
assert(decoder.decodeNext() == '本');
assert(decoder.decodeNext() == '語');
assert(decoder.decodeNext() == ' ');
assert(decoder.decodeNext() == 'К');
assert(decoder.decodeNext() == 'и');
assert(decoder.decodeNext() == 'р');
assert(decoder.decodeNext() == ' ');
assert(decoder.decodeNext() == '©');
assert(decoder.decodeNext() == '€');
assert(decoder.decodeNext() == 0x10348);
assert(decoder.decodeNext() == UTF8_END);
assert(decoder.get() == UTF8_END);
assert(decoder.eos());
}
{
auto decoder = UTF8Decoder("日本語"[0..$-1]);
assert(decoder.decodeNext() == '日');
assert(decoder.decodeNext() == '本');
assert(decoder.decodeNext() == UTF8_ERROR);
}