Skip to content

Commit 8adc3cf

Browse files
Parser: modify the zero-copy string API
Instead of just one function (_cbor_value_get_string_chunk), we now have _cbor_value_begin_string_iteration, _cbor_value_finish_string_iteration, _cbor_value_get_string_chunk_size, and _cbor_value_get_string_chunk. The "begin" function positions the pointer at the first chunk. That's what makes "get_size" possible, since it doesn't need to check for any state. The "finish" funcntion allows the caller to distinguish an error parsing the string from an error parsing the next value. Signed-off-by: Thiago Macieira <[email protected]>
1 parent 5159ec3 commit 8adc3cf

File tree

7 files changed

+139
-93
lines changed

7 files changed

+139
-93
lines changed

src/cbor.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ typedef enum CborError {
166166
CborErrorIllegalType, /* type not allowed here */
167167
CborErrorIllegalNumber,
168168
CborErrorIllegalSimpleType, /* types of value less than 32 encoded in two bytes */
169+
CborErrorNoMoreStringChunks,
169170

170171
/* parser errors in strict mode parsing only */
171172
CborErrorUnknownSimpleType = 512,
@@ -292,11 +293,23 @@ enum CborParserGlobalFlags
292293

293294
enum CborParserIteratorFlags
294295
{
296+
/* used for all types, but not during string chunk iteration
297+
* (values are static-asserted, don't change) */
295298
CborIteratorFlag_IntegerValueIs64Bit = 0x01,
296299
CborIteratorFlag_IntegerValueTooLarge = 0x02,
300+
301+
/* used only for CborIntegerType */
297302
CborIteratorFlag_NegativeInteger = 0x04,
303+
304+
/* used only during string iteration */
305+
CborIteratorFlag_BeforeFirstStringChunk = 0x04,
298306
CborIteratorFlag_IteratingStringChunks = 0x08,
307+
308+
/* used for arrays, maps and strings, including during chunk iteration */
299309
CborIteratorFlag_UnknownLength = 0x10,
310+
311+
/* used for maps, but must be kept for all types
312+
* (ContainerIsMap value must be CborMapType - CborArrayType) */
300313
CborIteratorFlag_ContainerIsMap = 0x20,
301314
CborIteratorFlag_NextIsMapKey = 0x40
302315
};
@@ -499,6 +512,34 @@ CBOR_INLINE_API CborError cbor_value_dup_byte_string(const CborValue *value, uin
499512
return _cbor_value_dup_string(value, (void **)buffer, buflen, next);
500513
}
501514

515+
CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk_size(const CborValue *value, size_t *len);
516+
CBOR_INLINE_API CborError cbor_value_get_string_chunk_size(const CborValue *value, size_t *len)
517+
{
518+
assert(value->flags & CborIteratorFlag_IteratingStringChunks);
519+
return _cbor_value_get_string_chunk_size(value, len);
520+
}
521+
522+
CBOR_INLINE_API bool cbor_value_string_iteration_at_end(const CborValue *value)
523+
{
524+
size_t dummy;
525+
return cbor_value_get_string_chunk_size(value, &dummy) == CborErrorNoMoreStringChunks;
526+
}
527+
528+
CBOR_PRIVATE_API CborError _cbor_value_begin_string_iteration(CborValue *value);
529+
CBOR_INLINE_API CborError cbor_value_begin_string_iteration(CborValue *value)
530+
{
531+
assert(cbor_value_is_text_string(value) || cbor_value_is_byte_string(value));
532+
assert(!(value->flags & CborIteratorFlag_IteratingStringChunks));
533+
return _cbor_value_begin_string_iteration(value);
534+
}
535+
536+
CBOR_PRIVATE_API CborError _cbor_value_finish_string_iteration(CborValue *value);
537+
CBOR_INLINE_API CborError cbor_value_finish_string_iteration(CborValue *value)
538+
{
539+
assert(cbor_value_string_iteration_at_end(value));
540+
return _cbor_value_finish_string_iteration(value);
541+
}
542+
502543
CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr,
503544
size_t *len, CborValue *next);
504545
CBOR_INLINE_API CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr,

src/cborerrorstrings.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ const char *cbor_error_string(CborError error)
119119
case CborErrorIllegalSimpleType:
120120
return _("illegal encoding of simple type smaller than 32");
121121

122+
case CborErrorNoMoreStringChunks:
123+
return _("no more byte or text strings available");
124+
122125
case CborErrorUnknownSimpleType:
123126
return _("unknown simple type");
124127

src/cborinternal_p.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,6 @@ enum {
161161
BreakByte = (unsigned)Break | (SimpleTypesType << MajorTypeShift)
162162
};
163163

164-
CBOR_INTERNAL_API CborError CBOR_INTERNAL_API_CC _cbor_value_prepare_string_iteration(CborValue *it);
165-
166164
static inline void copy_current_position(CborValue *dst, const CborValue *src)
167165
{
168166
/* This "if" is here for pedantry only: the two branches should perform

src/cborparser.c

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -966,103 +966,99 @@ CborError cbor_value_calculate_string_length(const CborValue *value, size_t *len
966966
return _cbor_value_copy_string(value, NULL, len, NULL);
967967
}
968968

969-
static inline void prepare_string_iteration(CborValue *it)
969+
CborError _cbor_value_begin_string_iteration(CborValue *it)
970970
{
971+
it->flags |= CborIteratorFlag_IteratingStringChunks |
972+
CborIteratorFlag_BeforeFirstStringChunk;
971973
if (!cbor_value_is_length_known(it)) {
972974
/* chunked string: we're before the first chunk;
973975
* advance to the first chunk */
974976
advance_bytes(it, 1);
975-
it->flags |= CborIteratorFlag_IteratingStringChunks;
976977
}
978+
979+
return CborNoError;
977980
}
978981

979-
CborError CBOR_INTERNAL_API_CC _cbor_value_prepare_string_iteration(CborValue *it)
982+
CborError _cbor_value_finish_string_iteration(CborValue *it)
980983
{
981-
cbor_assert((it->flags & CborIteratorFlag_IteratingStringChunks) == 0);
982-
prepare_string_iteration(it);
984+
if (!cbor_value_is_length_known(it))
985+
advance_bytes(it, 1); /* skip the Break */
983986

984-
/* are we at the end? */
985-
if (!can_read_bytes(it, 1))
986-
return CborErrorUnexpectedEOF;
987-
return CborNoError;
987+
return preparse_next_value(it);
988988
}
989989

990-
static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len)
990+
static CborError get_string_chunk_size(const CborValue *it, size_t *offset, size_t *len)
991991
{
992-
/* Possible states:
993-
* length known | iterating | meaning
994-
* no | no | before the first chunk of a chunked string
995-
* yes | no | at a non-chunked string
996-
* no | yes | second or later chunk
997-
* yes | yes | after a non-chunked string
998-
*/
999-
if (it->flags & CborIteratorFlag_IteratingStringChunks) {
1000-
/* already iterating */
1001-
if (cbor_value_is_length_known(it)) {
1002-
/* if the length was known, it wasn't chunked, so finish iteration */
1003-
goto last_chunk;
1004-
}
1005-
} else {
1006-
prepare_string_iteration(it);
1007-
}
992+
uint8_t descriptor;
993+
size_t bytesNeeded = 1;
994+
995+
if (cbor_value_is_length_known(it) && (it->flags & CborIteratorFlag_BeforeFirstStringChunk) == 0)
996+
return CborErrorNoMoreStringChunks;
1008997

1009998
/* are we at the end? */
1010-
uint8_t descriptor;
1011999
if (!read_bytes(it, &descriptor, 0, 1))
10121000
return CborErrorUnexpectedEOF;
10131001

1014-
if (descriptor == BreakByte) {
1015-
/* last chunk */
1016-
advance_bytes(it, 1);
1017-
last_chunk:
1018-
*bufferptr = NULL;
1019-
*len = 0;
1020-
return preparse_next_value(it);
1021-
} else if ((descriptor & MajorTypeMask) == it->type) {
1022-
/* find the string length */
1023-
size_t bytesNeeded = 1;
1024-
1025-
descriptor &= SmallValueMask;
1026-
if (descriptor < Value8Bit) {
1027-
*len = descriptor;
1028-
} else if (unlikely(descriptor > Value64Bit)) {
1029-
return CborErrorIllegalNumber;
1030-
} else {
1031-
uint64_t val;
1032-
bytesNeeded = (size_t)(1 << (descriptor - Value8Bit));
1033-
if (!can_read_bytes(it, 1 + bytesNeeded))
1034-
return CborErrorUnexpectedEOF;
1035-
1036-
if (descriptor <= Value16Bit) {
1037-
if (descriptor == Value16Bit)
1038-
val = read_uint16(it, 1);
1039-
else
1040-
val = read_uint8(it, 1);
1041-
} else {
1042-
if (descriptor == Value32Bit)
1043-
val = read_uint32(it, 1);
1044-
else
1045-
val = read_uint64(it, 1);
1046-
}
1002+
if (descriptor == BreakByte)
1003+
return CborErrorNoMoreStringChunks;
1004+
if ((descriptor & MajorTypeMask) != it->type)
1005+
return CborErrorIllegalType;
10471006

1048-
*len = val;
1049-
if (*len != val)
1050-
return CborErrorDataTooLarge;
1007+
/* find the string length */
1008+
descriptor &= SmallValueMask;
1009+
if (descriptor < Value8Bit) {
1010+
*len = descriptor;
1011+
} else if (unlikely(descriptor > Value64Bit)) {
1012+
return CborErrorIllegalNumber;
1013+
} else {
1014+
uint64_t val;
1015+
bytesNeeded = (size_t)(1 << (descriptor - Value8Bit));
1016+
if (!can_read_bytes(it, 1 + bytesNeeded))
1017+
return CborErrorUnexpectedEOF;
10511018

1052-
++bytesNeeded;
1019+
if (descriptor <= Value16Bit) {
1020+
if (descriptor == Value16Bit)
1021+
val = read_uint16(it, 1);
1022+
else
1023+
val = read_uint8(it, 1);
1024+
} else {
1025+
if (descriptor == Value32Bit)
1026+
val = read_uint32(it, 1);
1027+
else
1028+
val = read_uint64(it, 1);
10531029
}
10541030

1055-
if (*len != (size_t)*len)
1031+
*len = val;
1032+
if (*len != val)
10561033
return CborErrorDataTooLarge;
10571034

1058-
CborError err = transfer_string(it, bufferptr, bytesNeeded, *len);
1059-
if (err)
1060-
return err;
1061-
} else {
1062-
return CborErrorIllegalType;
1035+
++bytesNeeded;
10631036
}
10641037

1065-
it->flags |= CborIteratorFlag_IteratingStringChunks;
1038+
*offset = bytesNeeded;
1039+
return CborNoError;
1040+
}
1041+
1042+
CborError _cbor_value_get_string_chunk_size(const CborValue *value, size_t *len)
1043+
{
1044+
size_t offset;
1045+
return get_string_chunk_size(value, &offset, len);
1046+
}
1047+
1048+
static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len)
1049+
{
1050+
size_t offset;
1051+
CborError err = get_string_chunk_size(it, &offset, len);
1052+
if (err)
1053+
return err;
1054+
1055+
/* we're good, transfer the string now */
1056+
err = transfer_string(it, bufferptr, offset, *len);
1057+
if (err)
1058+
return err;
1059+
1060+
/* we've iterated at least once */
1061+
it->flags &= ~CborIteratorFlag_BeforeFirstStringChunk;
10661062
return CborNoError;
10671063
}
10681064

@@ -1195,14 +1191,18 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz
11951191
*next = *value;
11961192
*result = true;
11971193

1194+
err = _cbor_value_begin_string_iteration(next);
1195+
if (err)
1196+
return err;
1197+
11981198
while (1) {
11991199
size_t newTotal;
12001200
size_t chunkLen;
12011201
err = get_string_chunk(next, &ptr, &chunkLen);
1202+
if (err == CborErrorNoMoreStringChunks)
1203+
break;
12021204
if (err)
12031205
return err;
1204-
if (!ptr)
1205-
break;
12061206

12071207
if (unlikely(add_check_overflow(total, chunkLen, &newTotal)))
12081208
return CborErrorDataTooLarge;
@@ -1221,7 +1221,7 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz
12211221
*result = !!func(buffer + total, nul, 1);
12221222
}
12231223
*buflen = total;
1224-
return CborNoError;
1224+
return _cbor_value_finish_string_iteration(next);
12251225
}
12261226

12271227
/**

src/cborpretty.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -407,25 +407,24 @@ static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue
407407
open[1] = '\0';
408408
}
409409

410-
if (showingFragments) {
410+
if (showingFragments)
411411
err = stream(out, "(_ ");
412-
if (!err)
413-
err = _cbor_value_prepare_string_iteration(it);
414-
} else {
412+
else
415413
err = stream(out, "%s", open);
416-
}
417414

415+
if (!err)
416+
err = cbor_value_begin_string_iteration(it);
418417
while (!err) {
419418
if (showingFragments || indicator == NULL) {
420419
/* any iteration, except the second for a non-chunked string */
421420
indicator = resolve_indicator(it, flags);
422421
}
423422

424423
err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
425-
if (err)
426-
return err;
427-
if (!ptr)
424+
if (err == CborErrorNoMoreStringChunks) {
425+
err = cbor_value_finish_string_iteration(it);
428426
break;
427+
}
429428

430429
if (!err && showingFragments)
431430
err = stream(out, "%s%s", separator, open);

src/cborvalidation.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -543,24 +543,24 @@ static CborError validate_value(CborValue *it, uint32_t flags, int recursionLeft
543543
size_t n = 0;
544544
const void *ptr;
545545

546-
err = _cbor_value_prepare_string_iteration(it);
546+
err = cbor_value_begin_string_iteration(it);
547547
if (err)
548548
return err;
549549

550550
while (1) {
551551
CborValue next;
552552
err = _cbor_value_get_string_chunk(it, &ptr, &n, &next);
553-
if (err)
554-
return err;
555-
if (ptr) {
553+
if (!err) {
556554
err = validate_number(it, type, flags);
557555
if (err)
558556
return err;
559557
}
560558

561559
*it = next;
562-
if (!ptr)
563-
break;
560+
if (err == CborErrorNoMoreStringChunks)
561+
return cbor_value_finish_string_iteration(it);
562+
if (err)
563+
return err;
564564

565565
if (type == CborTextStringType && flags & CborValidateUtf8) {
566566
err = validate_utf8_string(ptr, n);

tests/parser/tst_parser.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -869,18 +869,23 @@ static void chunkedStringTest(const QByteArray &data, const QString &concatenate
869869

870870
CborValue copy = value;
871871

872+
err = cbor_value_begin_string_iteration(&value);
873+
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");
872874
forever {
873875
QString decoded;
874876
err = parseOneChunk(&value, &decoded);
875-
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");
876-
877-
if (decoded.isEmpty())
877+
if (err == CborErrorNoMoreStringChunks)
878878
break; // last chunk
879879

880+
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");
881+
880882
QVERIFY2(!chunks.isEmpty(), "Too many chunks");
881883
QString expected = chunks.takeFirst();
882884
QCOMPARE(decoded, expected);
883885
}
886+
887+
err = cbor_value_finish_string_iteration(&value);
888+
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");
884889
QVERIFY2(chunks.isEmpty(), "Too few chunks");
885890

886891
// compare to the concatenated data

0 commit comments

Comments
 (0)