25 #define INVAL_BUFSIZE (32)
31 #define READ_BUFSIZE (8)
37 #define WRITE_BUFSIZE (8)
52 const uint8_t **source,
size_t *sourcelen,
53 uint8_t **dest,
size_t *destlen);
56 const uint8_t **source,
size_t *sourcelen,
57 uint8_t **dest,
size_t *destlen);
62 const uint8_t **source,
size_t *sourcelen,
63 uint8_t **dest,
size_t *destlen);
66 uint32_t ucs4, uint8_t **dest,
size_t *destlen);
163 const uint8_t **source,
size_t *sourcelen,
164 uint8_t **dest,
size_t *destlen)
183 for (len = 0; len < c->
write_len; len++) {
196 while (*sourcelen > 0) {
202 while (towritelen > 0) {
215 for (len = 0; len < towritelen; len++)
279 const uint8_t **source,
size_t *sourcelen,
280 uint8_t **dest,
size_t *destlen)
290 *((uint32_t *) (
void *) *dest) =
300 if (*destlen < c->read_len * 4) {
326 (
const uint8_t **) &in, &l, dest, destlen);
332 *source +=
max((
signed) (orig_l - l), 0);
333 *sourcelen -=
max((
signed) (orig_l - l), 0);
338 assert((orig_l + ol) - l != 0);
346 while (*sourcelen > 0) {
348 source, sourcelen, dest, destlen);
409 const uint8_t **source,
size_t *sourcelen,
410 uint8_t **dest,
size_t *destlen)
418 const uint8_t *src = *source;
419 size_t srclen = *sourcelen;
420 uint32_t *uptr = &ucs4;
421 size_t *usptr = &sucs4;
427 ucs4, dest, destlen);
443 memmove(c->
inval_buf, *source, *sourcelen);
447 *source += *sourcelen;
469 const uint8_t *src = *source;
470 size_t srclen = *sourcelen;
472 uint32_t *ncptr = &nextchar;
481 memmove(c->
inval_buf, *source, *sourcelen);
485 *source += *sourcelen;
500 0xFFFD, dest, destlen);
504 *sourcelen -= nextchar;
524 uint32_t ucs4, uint8_t **dest,
size_t *destlen)
Codec factory component definition.
static parserutils_error charset_utf8_codec_decode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Decode a chunk of UTF-8 data into UCS-4 (big endian)
parserutils_error(* destroy)(parserutils_charset_codec *codec)
#define UTF8_NEXT_PARANOID(s, len, off, nextoff, error)
Skip to start of next sequence in UTF-8 input.
static uint32_t endian_host_to_big(uint32_t host)
parserutils_error(* encode)(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
static parserutils_error charset_utf8_codec_create(const char *charset, parserutils_charset_codec **codec)
Create a UTF-8 codec.
static parserutils_error charset_utf8_codec_encode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Encode a chunk of UCS-4 (big endian) data into UTF-8.
const parserutils_charset_handler charset_utf8_codec_handler
static parserutils_error charset_utf8_codec_reset(parserutils_charset_codec *codec)
Clear a UTF-8 codec's encoding state.
#define UTF8_TO_UCS4(s, len, ucs4, clen, error)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
static parserutils_error charset_utf8_codec_output_decoded_char(charset_utf8_codec *c, uint32_t ucs4, uint8_t **dest, size_t *destlen)
Output a UCS-4 character (big endian)
parserutils_charset_codec_errormode errormode
error mode
UTF-8 manipulation macros (implementation).
static uint32_t endian_big_to_host(uint32_t big)
uint32_t read_buf[READ_BUFSIZE]
Buffer for partial output sequences (decode) (host-endian)
static parserutils_error charset_utf8_codec_read_char(charset_utf8_codec *c, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Read a character from the UTF-8 to UCS-4 (big endian)
struct parserutils_charset_codec::@3 handler
Vtable for handler code.
size_t read_len
Character length of read_buf.
Abort processing if unrepresentable character encountered.
Core charset codec definition; implementations extend this.
static bool charset_utf8_codec_handles_charset(const char *charset)
Determine whether this codec handles a specific charset.
size_t write_len
Character length of write_buf.
parserutils_error(* reset)(parserutils_charset_codec *codec)
uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len)
Retrieve the MIB enum value assigned to an encoding name.
#define UTF8_FROM_UCS4(ucs4, s, len, error)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
parserutils_error(* decode)(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
uint8_t inval_buf[INVAL_BUFSIZE]
Buffer for fixing up incomplete input sequences.
parserutils_charset_codec base
Base class.
uint32_t write_buf[WRITE_BUFSIZE]
Buffer for partial output sequences (encode) (host-endian)
struct charset_utf8_codec charset_utf8_codec
UTF-8 charset codec.
static parserutils_error charset_utf8_codec_destroy(parserutils_charset_codec *codec)
Destroy a UTF-8 codec.