Skip to content

Commit 51b5606

Browse files
Add support for parsing chunked strings
This is the last remainng parsing API. It provides a pair of functions that extract either a chunk from a byte string or from a text string. They operate also in non-chunked strings and simulates a one-chunk string, so code can be generic. With it, it's possible to support zero-copy parsing of both types of strings with TinyCBOR. Signed-off-by: Thiago Macieira <[email protected]>
1 parent da8e35e commit 51b5606

File tree

4 files changed

+382
-22
lines changed

4 files changed

+382
-22
lines changed

src/cbor.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/****************************************************************************
22
**
3-
** Copyright (C) 2015 Intel Corporation
3+
** Copyright (C) 2017 Intel Corporation
44
**
55
** Permission is hereby granted, free of charge, to any person obtaining a copy
66
** of this software and associated documentation files (the "Software"), to deal
@@ -223,6 +223,7 @@ enum CborParserIteratorFlags
223223
{
224224
CborIteratorFlag_IntegerValueTooLarge = 0x01,
225225
CborIteratorFlag_NegativeInteger = 0x02,
226+
CborIteratorFlag_IteratingStringChunks = 0x02,
226227
CborIteratorFlag_UnknownLength = 0x04,
227228
CborIteratorFlag_ContainerIsMap = 0x20
228229
};
@@ -404,7 +405,20 @@ CBOR_INLINE_API CborError cbor_value_dup_byte_string(const CborValue *value, uin
404405
return _cbor_value_dup_string(value, (void **)buffer, buflen, next);
405406
}
406407

407-
/* ### TBD: partial reading API */
408+
CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr,
409+
size_t *len, CborValue *next);
410+
CBOR_INLINE_API CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr,
411+
size_t *len, CborValue *next)
412+
{
413+
assert(cbor_value_is_text_string(value));
414+
return _cbor_value_get_string_chunk(value, (const void **)bufferptr, len, next);
415+
}
416+
CBOR_INLINE_API CborError cbor_value_get_byte_string_chunk(const CborValue *value, const uint8_t **bufferptr,
417+
size_t *len, CborValue *next)
418+
{
419+
assert(cbor_value_is_byte_string(value));
420+
return _cbor_value_get_string_chunk(value, (const void **)bufferptr, len, next);
421+
}
408422

409423
CBOR_API CborError cbor_value_text_string_equals(const CborValue *value, const char *string, bool *result);
410424

src/cborparser.c

Lines changed: 144 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/****************************************************************************
22
**
3-
** Copyright (C) 2016 Intel Corporation
3+
** Copyright (C) 2017 Intel Corporation
44
**
55
** Permission is hereby granted, free of charge, to any person obtaining a copy
66
** of this software and associated documentation files (the "Software"), to deal
@@ -932,14 +932,154 @@ CborError cbor_value_get_int_checked(const CborValue *value, int *result)
932932
* CborErrorDataTooLarge if the stream indicates a length that is too big to
933933
* fit in 32-bit.
934934
*
935-
* \sa cbor_value_get_string_length(), cbor_value_copy_string(), cbor_value_is_length_known()
935+
* \sa cbor_value_get_string_length(), cbor_value_copy_text_string(), cbor_value_copy_byte_string(), cbor_value_is_length_known()
936936
*/
937937
CborError cbor_value_calculate_string_length(const CborValue *value, size_t *len)
938938
{
939939
*len = SIZE_MAX;
940940
return _cbor_value_copy_string(value, NULL, len, NULL);
941941
}
942942

943+
static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len)
944+
{
945+
CborError err;
946+
947+
/* Possible states:
948+
* length known | iterating | meaning
949+
* no | no | before the first chunk of a chunked string
950+
* yes | no | at a non-chunked string
951+
* no | yes | second or later chunk
952+
* yes | yes | after a non-chunked string
953+
*/
954+
if (it->flags & CborIteratorFlag_IteratingStringChunks) {
955+
/* already iterating */
956+
if (cbor_value_is_length_known(it)) {
957+
/* if the length was known, it wasn't chunked, so finish iteration */
958+
goto last_chunk;
959+
}
960+
} else if (!cbor_value_is_length_known(it)) {
961+
/* chunked string, we're before the first chunk */
962+
++it->ptr;
963+
}
964+
965+
/* are we at the end? */
966+
if (it->ptr == it->parser->end)
967+
return CborErrorUnexpectedEOF;
968+
969+
if (*it->ptr == BreakByte) {
970+
/* last chunk */
971+
++it->ptr;
972+
last_chunk:
973+
*bufferptr = NULL;
974+
return preparse_next_value(it);
975+
} else if ((uint8_t)(*it->ptr & MajorTypeMask) == it->type) {
976+
err = extract_length(it->parser, &it->ptr, len);
977+
if (err)
978+
return err;
979+
if (*len > (size_t)(it->parser->end - it->ptr))
980+
return CborErrorUnexpectedEOF;
981+
982+
*bufferptr = it->ptr;
983+
it->ptr += *len;
984+
} else {
985+
return CborErrorIllegalType;
986+
}
987+
988+
it->flags |= CborIteratorFlag_IteratingStringChunks;
989+
return CborNoError;
990+
}
991+
992+
/**
993+
* \fn CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr, size_t *len, CborValue *next)
994+
*
995+
* Extracts one text string chunk pointed to by \a value and stores a pointer
996+
* to the data in \a buffer and the size in \a len, which must not be null. If
997+
* no more chunks are available, then \a bufferptr will be set to null. This
998+
* function may be used to iterate over any string without causing its contents
999+
* to be copied to a separate buffer, like the convenience function
1000+
* cbor_value_copy_text_string() does.
1001+
*
1002+
* It is designed to be used in code like:
1003+
*
1004+
* \code
1005+
* if (cbor_value_is_text_string(value)) {
1006+
* char *ptr;
1007+
* size_t len;
1008+
* while (1) {
1009+
* err = cbor_value_get_text_string_chunk(value, &ptr, &len, &value));
1010+
* if (err) return err;
1011+
* if (ptr == NULL) return CborNoError;
1012+
* consume(ptr, len);
1013+
* }
1014+
* }
1015+
* \endcode
1016+
*
1017+
* If the iterator \a value does not point to a text string, the behaviour is
1018+
* undefined, so checking with \ref cbor_value_get_type or \ref
1019+
* cbor_value_is_text_string is recommended.
1020+
*
1021+
* The \a next pointer, if not null, will be updated to point to the next item
1022+
* after this string. During iteration, the pointer must only be passed back
1023+
* again to this function; passing it to any other function in this library
1024+
* results in undefined behavior. If there are no more chunks to be read from
1025+
* \a value, then \a next will be set to the next item after this string; if \a
1026+
* value points to the last item, then \a next will be invalid.
1027+
*
1028+
* \note This function does not perform UTF-8 validation on the incoming text
1029+
* string.
1030+
*
1031+
* \sa cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_caculate_string_length(), cbor_value_get_byte_string_chunk()
1032+
*/
1033+
1034+
/**
1035+
* \fn CborError cbor_value_get_byte_string_chunk(const CborValue *value, const char **bufferptr, size_t *len, CborValue *next)
1036+
*
1037+
* Extracts one byte string chunk pointed to by \a value and stores a pointer
1038+
* to the data in \a buffer and the size in \a len, which must not be null. If
1039+
* no more chunks are available, then \a bufferptr will be set to null. This
1040+
* function may be used to iterate over any string without causing its contents
1041+
* to be copied to a separate buffer, like the convenience function
1042+
* cbor_value_copy_byte_string() does.
1043+
*
1044+
* It is designed to be used in code like:
1045+
*
1046+
* \code
1047+
* if (cbor_value_is_byte_string(value)) {
1048+
* char *ptr;
1049+
* size_t len;
1050+
* while (1) {
1051+
* err = cbor_value_get_byte_string_chunk(value, &ptr, &len, &value));
1052+
* if (err) return err;
1053+
* if (ptr == NULL) return CborNoError;
1054+
* consume(ptr, len);
1055+
* }
1056+
* }
1057+
* \endcode
1058+
*
1059+
* If the iterator \a value does not point to a byte string, the behaviour is
1060+
* undefined, so checking with \ref cbor_value_get_type or \ref
1061+
* cbor_value_is_byte_string is recommended.
1062+
*
1063+
* The \a next pointer, if not null, will be updated to point to the next item
1064+
* after this string. During iteration, the pointer must only be passed back
1065+
* again to this function; passing it to any other function in this library
1066+
* results in undefined behavior. If there are no more chunks to be read from
1067+
* \a value, then \a next will be set to the next item after this string; if \a
1068+
* value points to the last item, then \a next will be invalid.
1069+
*
1070+
* \sa cbor_value_dup_byte_string(), cbor_value_copy_byte_string(), cbor_value_caculate_string_length(), cbor_value_get_text_string_chunk()
1071+
*/
1072+
1073+
CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr,
1074+
size_t *len, CborValue *next)
1075+
{
1076+
CborValue tmp;
1077+
if (!next)
1078+
next = &tmp;
1079+
*next = *value;
1080+
return get_string_chunk(next, bufferptr, len);
1081+
}
1082+
9431083
/* We return uintptr_t so that we can pass memcpy directly as the iteration
9441084
* function. The choice is to optimize for memcpy, which is used in the base
9451085
* parser API (cbor_value_copy_string), while memcmp is used in convenience API
@@ -1066,7 +1206,7 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz
10661206
* \note This function does not perform UTF-8 validation on the incoming text
10671207
* string.
10681208
*
1069-
* \sa cbor_value_dup_text_string(), cbor_value_copy_byte_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
1209+
* \sa cbor_value_get_text_string_chunk() cbor_value_dup_text_string(), cbor_value_copy_byte_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
10701210
*/
10711211

10721212
/**
@@ -1097,7 +1237,7 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz
10971237
* This function may not run in constant time (it will run in O(n) time on the
10981238
* number of chunks). It requires constant memory (O(1)).
10991239
*
1100-
* \sa cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
1240+
* \sa cbor_value_get_byte_string_chunk(), cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
11011241
*/
11021242

11031243
CborError _cbor_value_copy_string(const CborValue *value, void *buffer,

src/cborparser_dup_string.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
* \note This function does not perform UTF-8 validation on the incoming text
6161
* string.
6262
*
63-
* \sa cbor_value_copy_text_string(), cbor_value_dup_byte_string()
63+
* \sa cbor_value_get_text_string_chunk(), cbor_value_copy_text_string(), cbor_value_dup_byte_string()
6464
*/
6565

6666
/**
@@ -88,7 +88,7 @@
8888
* number of chunks). It requires constant memory (O(1)) in addition to the
8989
* malloc'ed block.
9090
*
91-
* \sa cbor_value_copy_byte_string(), cbor_value_dup_text_string()
91+
* \sa cbor_value_get_text_string_chunk(), cbor_value_copy_byte_string(), cbor_value_dup_text_string()
9292
*/
9393
CborError _cbor_value_dup_string(const CborValue *value, void **buffer, size_t *buflen, CborValue *next)
9494
{

0 commit comments

Comments
 (0)