Skip to content

Commit 989cdb4

Browse files
committed
tree data BUGFIX check for valid UTF-8 strings
Fixes sysrepo/sysrepo#3173
1 parent 8b4f22f commit 989cdb4

File tree

10 files changed

+239
-33
lines changed

10 files changed

+239
-33
lines changed

src/common.c

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,157 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
247247
return LY_SUCCESS;
248248
}
249249

250+
/**
251+
* @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
252+
*
253+
* (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
254+
*
255+
* @param[in] input UTF-8 string.
256+
* @param[in] bytes Number of bytes to compare.
257+
* @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
258+
* @return Result of the operation.
259+
*/
260+
static int
261+
ly_utf8_and_equal(const char *input, uint8_t bytes, ...)
262+
{
263+
va_list ap;
264+
int i, and, byte;
265+
266+
va_start(ap, bytes);
267+
for (i = 0; i < bytes; ++i) {
268+
and = va_arg(ap, int);
269+
byte = va_arg(ap, int);
270+
271+
/* compare each byte */
272+
if (((uint8_t)input[i] & and) != (uint8_t)byte) {
273+
return 0;
274+
}
275+
}
276+
va_end(ap);
277+
278+
return 1;
279+
}
280+
281+
/**
282+
* @brief Check whether an UTF-8 string is smaller than a hex string.
283+
*
284+
* input < 0x[arg1][arg2]...
285+
*
286+
* @param[in] input UTF-8 string.
287+
* @param[in] bytes Number of bytes to compare.
288+
* @param[in] ... @p bytes number of bytes to compare with.
289+
* @return Result of the operation.
290+
*/
291+
static int
292+
ly_utf8_less(const char *input, uint8_t bytes, ...)
293+
{
294+
va_list ap;
295+
int i, byte;
296+
297+
va_start(ap, bytes);
298+
for (i = 0; i < bytes; ++i) {
299+
byte = va_arg(ap, int);
300+
301+
/* compare until bytes differ */
302+
if ((uint8_t)input[i] > (uint8_t)byte) {
303+
return 0;
304+
} else if ((uint8_t)input[i] < (uint8_t)byte) {
305+
return 1;
306+
}
307+
}
308+
va_end(ap);
309+
310+
/* equals */
311+
return 0;
312+
}
313+
314+
/**
315+
* @brief Check whether an UTF-8 string is greater than a hex string.
316+
*
317+
* input > 0x[arg1][arg2]...
318+
*
319+
* @param[in] input UTF-8 string.
320+
* @param[in] bytes Number of bytes to compare.
321+
* @param[in] ... @p bytes number of bytes to compare with.
322+
* @return Result of the operation.
323+
*/
324+
static int
325+
ly_utf8_greater(const char *input, uint8_t bytes, ...)
326+
{
327+
va_list ap;
328+
int i, byte;
329+
330+
va_start(ap, bytes);
331+
for (i = 0; i < bytes; ++i) {
332+
byte = va_arg(ap, int);
333+
334+
/* compare until bytes differ */
335+
if ((uint8_t)input[i] > (uint8_t)byte) {
336+
return 1;
337+
} else if ((uint8_t)input[i] < (uint8_t)byte) {
338+
return 0;
339+
}
340+
}
341+
va_end(ap);
342+
343+
/* equals */
344+
return 0;
345+
}
346+
347+
LY_ERR
348+
ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len)
349+
{
350+
size_t len;
351+
352+
if (!(input[0] & 0x80)) {
353+
/* one byte character */
354+
len = 1;
355+
356+
if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) {
357+
/* invalid control characters */
358+
return LY_EINVAL;
359+
}
360+
} else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) {
361+
/* two bytes character */
362+
len = 2;
363+
364+
/* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
365+
if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) ||
366+
!ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) {
367+
return LY_EINVAL;
368+
}
369+
} else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) {
370+
/* three bytes character */
371+
len = 3;
372+
373+
/* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
374+
if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) {
375+
/* reject UTF-16 surrogates */
376+
return LY_EINVAL;
377+
}
378+
379+
/* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
380+
if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) ||
381+
!ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) {
382+
return LY_EINVAL;
383+
}
384+
} else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) {
385+
/* four bytes character */
386+
len = 4;
387+
388+
/* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
389+
if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) ||
390+
!ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) {
391+
return LY_EINVAL;
392+
}
393+
} else {
394+
return LY_EINVAL;
395+
}
396+
397+
*utf8_len = len;
398+
return LY_SUCCESS;
399+
}
400+
250401
LY_ERR
251402
ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
252403
{

src/common.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,17 @@ LY_ERR ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t
539539
*/
540540
LY_ERR ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read);
541541

542+
/**
543+
* @brief Check an UTF-8 character is valid.
544+
*
545+
* @param[in] input Input string to process.
546+
* @param[in] in_len Bytes left to read in @p input.
547+
* @param[out] utf8_len Length of a valid UTF-8 character.
548+
* @return LY_SUCCESS on success
549+
* @return LY_EINVAL in case of invalid UTF-8 character.
550+
*/
551+
LY_ERR ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len);
552+
542553
/**
543554
* @brief Store UTF-8 character specified as 4byte integer into the dst buffer.
544555
*

src/parser_common.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ lyd_parser_create_term(struct lyd_ctx *lydctx, const struct lysc_node *schema, c
285285
LY_ERR r;
286286
ly_bool incomplete;
287287

288-
if ((r = lyd_create_term(schema, value, value_len, dynamic, format, prefix_data, hints, &incomplete, node))) {
288+
if ((r = lyd_create_term(schema, value, value_len, 1, dynamic, format, prefix_data, hints, &incomplete, node))) {
289289
if (lydctx->data_ctx->ctx != schema->module->ctx) {
290290
/* move errors to the main context */
291291
ly_err_move(schema->module->ctx, (struct ly_ctx *)lydctx->data_ctx->ctx);
@@ -323,8 +323,8 @@ lyd_parser_create_meta(struct lyd_ctx *lydctx, struct lyd_node *parent, struct l
323323
}
324324
LOG_LOCSET(NULL, NULL, path, NULL);
325325

326-
LY_CHECK_GOTO(rc = lyd_create_meta(parent, meta, mod, name, name_len, value, value_len, dynamic, format, prefix_data,
327-
hints, ctx_node, 0, &incomplete), cleanup);
326+
LY_CHECK_GOTO(rc = lyd_create_meta(parent, meta, mod, name, name_len, value, value_len, 1, dynamic, format,
327+
prefix_data, hints, ctx_node, 0, &incomplete), cleanup);
328328

329329
if (incomplete && !(lydctx->parse_opts & LYD_PARSE_ONLY)) {
330330
LY_CHECK_GOTO(rc = ly_set_add(&lydctx->meta_types, *meta, 1, NULL), cleanup);

src/path.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,8 @@ ly_path_compile_predicate(const struct ly_ctx *ctx, const struct lysc_node *cur_
698698

699699
/* store the value */
700700
LOG_LOCSET(key, NULL, NULL, NULL);
701-
ret = lyd_value_store(ctx, &p->value, ((struct lysc_node_leaf *)key)->type, val, val_len, NULL, format,
702-
prefix_data, LYD_HINT_DATA, key, NULL);
701+
ret = lyd_value_store(ctx, &p->value, ((struct lysc_node_leaf *)key)->type, val, val_len, 0, NULL,
702+
format, prefix_data, LYD_HINT_DATA, key, NULL);
703703
LOG_LOCBACK(key ? 1 : 0, 0, 0, 0);
704704
LY_CHECK_ERR_GOTO(ret, p->value.realtype = NULL, cleanup);
705705

@@ -762,8 +762,8 @@ ly_path_compile_predicate(const struct ly_ctx *ctx, const struct lysc_node *cur_
762762

763763
/* store the value */
764764
LOG_LOCSET(ctx_node, NULL, NULL, NULL);
765-
ret = lyd_value_store(ctx, &p->value, ((struct lysc_node_leaflist *)ctx_node)->type, val, val_len, NULL, format,
766-
prefix_data, LYD_HINT_DATA, ctx_node, NULL);
765+
ret = lyd_value_store(ctx, &p->value, ((struct lysc_node_leaflist *)ctx_node)->type, val, val_len, 0, NULL,
766+
format, prefix_data, LYD_HINT_DATA, ctx_node, NULL);
767767
LOG_LOCBACK(ctx_node ? 1 : 0, 0, 0, 0);
768768
LY_CHECK_ERR_GOTO(ret, p->value.realtype = NULL, cleanup);
769769
++(*tok_idx);

src/plugins_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ LIBYANG_API_DECL LY_ERR lyplg_type_print_xpath10_value(const struct lyd_value_xp
473473
value after calling the type's store callback with this option. */
474474
#define LYPLG_TYPE_STORE_IMPLEMENT 0x02 /**< If a foreign module is needed to be implemented to successfully instantiate
475475
the value, make the module implemented. */
476+
#define LYPLG_TYPE_STORE_IS_UTF8 0x04 /**< The value is guaranteed to be a valid UTF-8 string, if applicable for the type. */
476477
/** @} plugintypestoreopts */
477478

478479
/**

src/plugins_types/string.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
/**
22
* @file string.c
33
* @author Radek Krejci <[email protected]>
4+
* @author Michal Vasko <[email protected]>
45
* @brief Built-in string type plugin.
56
*
6-
* Copyright (c) 2019-2021 CESNET, z.s.p.o.
7+
* Copyright (c) 2019 - 2023 CESNET, z.s.p.o.
78
*
89
* This source code is licensed under BSD 3-Clause License (the "License").
910
* You may not use this file except in compliance with the License.
@@ -33,6 +34,29 @@
3334
* | string length | yes | `char *` | string itself |
3435
*/
3536

37+
/**
38+
* @brief Check string value for invalid characters.
39+
*
40+
* @param[in] value String to check.
41+
* @param[in] value_len Length of @p value.
42+
* @param[out] err Generated error on error.
43+
* @return LY_ERR value.
44+
*/
45+
static LY_ERR
46+
string_check_chars(const char *value, size_t value_len, struct ly_err_item **err)
47+
{
48+
size_t len, parsed = 0;
49+
50+
while (value_len - parsed) {
51+
if (ly_checkutf8(value + parsed, value_len - parsed, &len)) {
52+
return ly_err_new(err, LY_EVALID, LYVE_DATA, NULL, NULL, "Invalid character 0x%hhx.", value[parsed]);
53+
}
54+
parsed += len;
55+
}
56+
57+
return LY_SUCCESS;
58+
}
59+
3660
LIBYANG_API_DEF LY_ERR
3761
lyplg_type_store_string(const struct ly_ctx *ctx, const struct lysc_type *type, const void *value, size_t value_len,
3862
uint32_t options, LY_VALUE_FORMAT UNUSED(format), void *UNUSED(prefix_data), uint32_t hints,
@@ -46,6 +70,12 @@ lyplg_type_store_string(const struct ly_ctx *ctx, const struct lysc_type *type,
4670
memset(storage, 0, sizeof *storage);
4771
storage->realtype = type;
4872

73+
if (!(options & LYPLG_TYPE_STORE_IS_UTF8)) {
74+
/* check the UTF-8 encoding */
75+
ret = string_check_chars(value, value_len, err);
76+
LY_CHECK_GOTO(ret, cleanup);
77+
}
78+
4979
/* check hints */
5080
ret = lyplg_type_check_hints(hints, value, value_len, type->basetype, NULL, err);
5181
LY_CHECK_GOTO(ret, cleanup);

src/tree_data.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ lyd_insert_meta(struct lyd_node *parent, struct lyd_meta *meta, ly_bool clear_df
991991

992992
LY_ERR
993993
lyd_create_meta(struct lyd_node *parent, struct lyd_meta **meta, const struct lys_module *mod, const char *name,
994-
size_t name_len, const char *value, size_t value_len, ly_bool *dynamic, LY_VALUE_FORMAT format,
994+
size_t name_len, const char *value, size_t value_len, ly_bool is_utf8, ly_bool *dynamic, LY_VALUE_FORMAT format,
995995
void *prefix_data, uint32_t hints, const struct lysc_node *ctx_node, ly_bool clear_dflt, ly_bool *incomplete)
996996
{
997997
LY_ERR ret = LY_SUCCESS;
@@ -1023,7 +1023,7 @@ lyd_create_meta(struct lyd_node *parent, struct lyd_meta **meta, const struct ly
10231023
mt->parent = parent;
10241024
mt->annotation = ant;
10251025
lyplg_ext_get_storage(ant, LY_STMT_TYPE, sizeof ant_type, (const void **)&ant_type);
1026-
ret = lyd_value_store(mod->ctx, &mt->value, ant_type, value, value_len, dynamic, format, prefix_data, hints,
1026+
ret = lyd_value_store(mod->ctx, &mt->value, ant_type, value, value_len, is_utf8, dynamic, format, prefix_data, hints,
10271027
ctx_node, incomplete);
10281028
LY_CHECK_ERR_GOTO(ret, free(mt), cleanup);
10291029
ret = lydict_insert(mod->ctx, name, name_len, &mt->name);
@@ -1798,7 +1798,7 @@ lyd_dup_r(const struct lyd_node *node, const struct ly_ctx *trg_ctx, struct lyd_
17981798
/* store canonical value in the target context */
17991799
val_can = lyd_get_value(node);
18001800
type = ((struct lysc_node_leaf *)term->schema)->type;
1801-
ret = lyd_value_store(trg_ctx, &term->value, type, val_can, strlen(val_can), NULL, LY_VALUE_CANON, NULL,
1801+
ret = lyd_value_store(trg_ctx, &term->value, type, val_can, strlen(val_can), 1, NULL, LY_VALUE_CANON, NULL,
18021802
LYD_HINT_DATA, term->schema, NULL);
18031803
LY_CHECK_GOTO(ret, error);
18041804
}
@@ -2727,7 +2727,7 @@ lyd_find_sibling_val(const struct lyd_node *siblings, const struct lysc_node *sc
27272727
/* create a data node and find the instance */
27282728
if (schema->nodetype == LYS_LEAFLIST) {
27292729
/* target used attributes: schema, hash, value */
2730-
rc = lyd_create_term(schema, key_or_value, val_len, NULL, LY_VALUE_JSON, NULL, LYD_HINT_DATA, NULL, &target);
2730+
rc = lyd_create_term(schema, key_or_value, val_len, 0, NULL, LY_VALUE_JSON, NULL, LYD_HINT_DATA, NULL, &target);
27312731
LY_CHECK_RET(rc);
27322732
} else {
27332733
/* target used attributes: schema, hash, child (all keys) */

src/tree_data_common.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -476,12 +476,12 @@ lyd_data_next_module(struct lyd_node **next, struct lyd_node **first)
476476

477477
LY_ERR
478478
lyd_value_store(const struct ly_ctx *ctx, struct lyd_value *val, const struct lysc_type *type, const void *value,
479-
size_t value_len, ly_bool *dynamic, LY_VALUE_FORMAT format, void *prefix_data, uint32_t hints,
479+
size_t value_len, ly_bool is_utf8, ly_bool *dynamic, LY_VALUE_FORMAT format, void *prefix_data, uint32_t hints,
480480
const struct lysc_node *ctx_node, ly_bool *incomplete)
481481
{
482482
LY_ERR ret;
483483
struct ly_err_item *err = NULL;
484-
uint32_t options = (dynamic && *dynamic ? LYPLG_TYPE_STORE_DYNAMIC : 0);
484+
uint32_t options = 0;
485485

486486
if (!value) {
487487
value = "";
@@ -490,6 +490,13 @@ lyd_value_store(const struct ly_ctx *ctx, struct lyd_value *val, const struct ly
490490
*incomplete = 0;
491491
}
492492

493+
if (dynamic && *dynamic) {
494+
options |= LYPLG_TYPE_STORE_DYNAMIC;
495+
}
496+
if (is_utf8) {
497+
options |= LYPLG_TYPE_STORE_IS_UTF8;
498+
}
499+
493500
ret = type->plugin->store(ctx, type, value, value_len, options, format, prefix_data, hints, ctx_node, val, NULL, &err);
494501
if (dynamic) {
495502
*dynamic = 0;
@@ -676,7 +683,7 @@ lyd_value_compare(const struct lyd_node_term *node, const char *value, size_t va
676683

677684
/* store the value */
678685
LOG_LOCSET(node->schema, &node->node, NULL, NULL);
679-
ret = lyd_value_store(ctx, &val, type, value, value_len, NULL, LY_VALUE_JSON, NULL, LYD_HINT_DATA, node->schema, NULL);
686+
ret = lyd_value_store(ctx, &val, type, value, value_len, 0, NULL, LY_VALUE_JSON, NULL, LYD_HINT_DATA, node->schema, NULL);
680687
LOG_LOCBACK(1, 1, 0, 0);
681688
LY_CHECK_RET(ret);
682689

0 commit comments

Comments
 (0)