Struct UTF8Decoder

UTF-8 decoder to use with dlib.text.encodings.transcode

struct UTF8Decoder ;

Fields

NameTypeDescription
character intCurrent character index
index ulongCurrent index in an input string
input stringInput string. Set it before decoding

Methods

NameDescription
decode () Range interface.
decodeNext () Decode next character.
eos () Check if decoder is in the end of input.

Example

{
    auto decoder = UTF8Decoder("Eng 日本語 Кир ©€\xF0\x90\x8D\x88");
    assert(decoder.decodeNext() == 'E');
    assert(decoder.decodeNext() == 'n');
    assert(decoder.decodeNext() == 'g');
    assert(decoder.decodeNext() == ' ');
    assert(decoder.decodeNext() == '日');
    assert(decoder.decodeNext() == '本');
    assert(decoder.decodeNext() == '語');
    assert(decoder.decodeNext() == ' ');
    assert(decoder.decodeNext() == 'К');
    assert(decoder.decodeNext() == 'и');
    assert(decoder.decodeNext() == 'р');
    assert(decoder.decodeNext() == ' ');
    assert(decoder.decodeNext() == '©');
    assert(decoder.decodeNext() == '€');
    assert(decoder.decodeNext() == 0x10348);
    assert(decoder.decodeNext() == UTF8_END);
    assert(decoder.get() == UTF8_END);
    assert(decoder.eos());
}
{
    auto decoder = UTF8Decoder("日本語"[0..$-1]);
    assert(decoder.decodeNext() == '日');
    assert(decoder.decodeNext() == '本');
    assert(decoder.decodeNext() == UTF8_ERROR);
}