Skip to content

Commit 0342d72

Browse files
Validation: Implement recognition of known tags
This implements: - CborValidateNoUnknownTagsSA - CborValidateNoUnknownTagsSR - CborValidateNoUnknownTags This commit adds a Perl script that parses a machine-readable list of known tags (also added in this commit). That allows us to more easily update the known tag list when more tags are added by IANA to the registry (see API documentation for the rationale on what tags we recognise). If there's an interest, we can change the file where the tag list is generated into code so that implementations can choose which tags they want to recognise at compile time. Signed-off-by: Thiago Macieira <[email protected]>
1 parent addf804 commit 0342d72

File tree

5 files changed

+255
-15
lines changed

5 files changed

+255
-15
lines changed

src/cbor.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ typedef enum CborType {
9797

9898
typedef uint64_t CborTag;
9999
typedef enum CborKnownTags {
100-
CborDateTimeStringTag = 0, /* RFC 3339 format: YYYY-MM-DD hh:mm:ss+zzzz */
100+
CborDateTimeStringTag = 0,
101101
CborUnixTime_tTag = 1,
102102
CborPositiveBignumTag = 2,
103103
CborNegativeBignumTag = 3,
@@ -106,11 +106,12 @@ typedef enum CborKnownTags {
106106
CborExpectedBase64urlTag = 21,
107107
CborExpectedBase64Tag = 22,
108108
CborExpectedBase16Tag = 23,
109-
CborUriTag = 32,
109+
CborEncodedCborTag = 24,
110+
CborUrlTag = 32,
110111
CborBase64urlTag = 33,
111112
CborBase64Tag = 34,
112113
CborRegularExpressionTag = 35,
113-
CborMimeMessageTag = 36, /* RFC 2045-2047 */
114+
CborMimeMessageTag = 36,
114115
CborSignatureTag = 55799
115116
} CborKnownTags;
116117

src/cborvalidation.c

Lines changed: 130 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,115 @@
116116
*
117117
* \par
118118
* These are the tags known to the current TinyCBOR release:
119-
[will be added in the next commit]
119+
<table>
120+
<tr>
121+
<th>Tag</th>
122+
<th>Data Item</th>
123+
<th>Semantics</th>
124+
</tr>
125+
<tr>
126+
<td>0</td>
127+
<td>UTF-8 text string</td>
128+
<td>Standard date/time string</td>
129+
</td>
130+
<tr>
131+
<td>1</td>
132+
<td>integer</td>
133+
<td>Epoch-based date/time</td>
134+
</td>
135+
<tr>
136+
<td>2</td>
137+
<td>byte string</td>
138+
<td>Positive bignum</td>
139+
</td>
140+
<tr>
141+
<td>3</td>
142+
<td>byte string</td>
143+
<td>Negative bignum</td>
144+
</td>
145+
<tr>
146+
<td>4</td>
147+
<td>array</td>
148+
<td>Decimal fraction</td>
149+
</td>
150+
<tr>
151+
<td>5</td>
152+
<td>array</td>
153+
<td>Bigfloat</td>
154+
</td>
155+
<tr>
156+
<td>21</td>
157+
<td>byte string, array, map</td>
158+
<td>Expected conversion to base64url encoding</td>
159+
</td>
160+
<tr>
161+
<td>22</td>
162+
<td>byte string, array, map</td>
163+
<td>Expected conversion to base64 encoding</td>
164+
</td>
165+
<tr>
166+
<td>23</td>
167+
<td>byte string, array, map</td>
168+
<td>Expected conversion to base16 encoding</td>
169+
</td>
170+
<tr>
171+
<td>24</td>
172+
<td>byte string</td>
173+
<td>Encoded CBOR data item</td>
174+
</td>
175+
<tr>
176+
<td>32</td>
177+
<td>UTF-8 text string</td>
178+
<td>URI</td>
179+
</td>
180+
<tr>
181+
<td>33</td>
182+
<td>UTF-8 text string</td>
183+
<td>base64url</td>
184+
</td>
185+
<tr>
186+
<td>34</td>
187+
<td>UTF-8 text string</td>
188+
<td>base64</td>
189+
</td>
190+
<tr>
191+
<td>35</td>
192+
<td>UTF-8 text string</td>
193+
<td>Regular expression</td>
194+
</td>
195+
<tr>
196+
<td>36</td>
197+
<td>UTF-8 text string</td>
198+
<td>MIME message</td>
199+
</td>
200+
<tr>
201+
<td>55799</td>
202+
<td>any</td>
203+
<td>Self-describe CBOR</td>
204+
</td>
205+
</table>
120206
*/
121207

208+
struct KnownTagData { uint32_t tag; };
209+
static const struct KnownTagData knownTagData[] = {
210+
{ 0 },
211+
{ 1 },
212+
{ 2 },
213+
{ 3 },
214+
{ 4 },
215+
{ 5 },
216+
{ 21 },
217+
{ 22 },
218+
{ 23 },
219+
{ 24 },
220+
{ 32 },
221+
{ 33 },
222+
{ 34 },
223+
{ 35 },
224+
{ 36 },
225+
{ 55799 }
226+
};
227+
122228
static CborError validate_value(CborValue *it, int flags, int recursionLeft);
123229

124230
static inline CborError validate_simple_type(uint8_t simple_type, int flags)
@@ -135,20 +241,34 @@ static inline CborError validate_simple_type(uint8_t simple_type, int flags)
135241
static inline CborError validate_tag(CborValue *it, CborTag tag, int flags, int recursionLeft)
136242
{
137243
CborType type = cbor_value_get_type(it);
244+
const size_t knownTagCount = sizeof(knownTagData) / sizeof(knownTagData[0]);
245+
const struct KnownTagData *tagData = knownTagData;
246+
const struct KnownTagData * const knownTagDataEnd = knownTagData + knownTagCount;
247+
138248
if (!recursionLeft)
139249
return CborErrorNestingTooDeep;
140-
141250
if (flags & CborValidateNoTags)
142251
return CborErrorExcludedType;
143-
if (flags & CborValidateNoUnknownTags) {
144-
if (tag > 255 && (flags & CborValidateNoUnknownTagsSR) == 0)
252+
253+
/* find the tag data, if any */
254+
for ( ; tagData != knownTagDataEnd; ++tagData) {
255+
if (tagData->tag < tag)
256+
continue;
257+
if (tagData->tag > tag)
258+
tagData = NULL;
259+
break;
260+
}
261+
if (tagData == knownTagDataEnd)
262+
tagData = NULL;
263+
264+
if (flags & CborValidateNoUnknownTags && !tagData) {
265+
/* tag not found */
266+
if (flags & CborValidateNoUnknownTagsSA && tag < 24)
267+
return CborErrorUnknownTag;
268+
if ((flags & CborValidateNoUnknownTagsSR) == CborValidateNoUnknownTagsSR && tag < 256)
269+
return CborErrorUnknownTag;
270+
if ((flags & CborValidateNoUnknownTags) == CborValidateNoUnknownTags)
145271
return CborErrorUnknownTag;
146-
if (flags & CborValidateNoUnknownTagsSR) {
147-
if (tag > 23 && (flags & CborValidateNoUnknownTagsSA) == 0)
148-
return CborErrorUnknownTag;
149-
if (flags & CborValidateNoUnknownTagsSA)
150-
return CborErrorUnknownTag;
151-
}
152272
}
153273

154274
return validate_value(it, flags, recursionLeft);

src/parsetags.pl

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/usr/bin/perl -l
2+
## Copyright (C) 2017 Intel Corporation
3+
##
4+
## Permission is hereby granted, free of charge, to any person obtaining a copy
5+
## of this software and associated documentation files (the "Software"), to deal
6+
## in the Software without restriction, including without limitation the rights
7+
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
## copies of the Software, and to permit persons to whom the Software is
9+
## furnished to do so, subject to the following conditions:
10+
##
11+
## The above copyright notice and this permission notice shall be included in
12+
## all copies or substantial portions of the Software.
13+
##
14+
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
## THE SOFTWARE.
21+
##
22+
use strict;
23+
my $fname = shift @ARGV
24+
or die("Usage: parsetags.pl tags.txt");
25+
open TAGS, "<", $fname
26+
or die("Cannot open $fname: $!");
27+
28+
my %typedescriptions = (
29+
"Integer" => "integer",
30+
"ByteString" => "byte string",
31+
"TextString" => "UTF-8 text string",
32+
"Array" => "array",
33+
"Map" => "map",
34+
"Tag" => "tag", # shouldn't happen
35+
"Simple" => "any simple type",
36+
"Boolean" => "boolean",
37+
"Null" => "null",
38+
"Undefined" => "undefined",
39+
"HalfFloat" => "IEEE 754 half-precision floating point",
40+
"Float" => "IEEE 754 single-precision floating point",
41+
"Dobule" => "IEEE 754 double-precision floating point"
42+
);
43+
44+
my %tags;
45+
while (<TAGS>) {
46+
s/\s*#.*$//;
47+
next if /^$/;
48+
chomp;
49+
50+
die("Could not parse line \"$_\"")
51+
unless /^(\d+);(\w+);([\w,]*);(.*)$/;
52+
$tags{$1}{id} = $2;
53+
$tags{$1}{semantic} = $4;
54+
my @types = split(',', $3);
55+
$tags{$1}{types} = \@types;
56+
}
57+
close TAGS or die;
58+
59+
my @tagnumbers = sort { $a <=> $b } keys %tags;
60+
61+
print "==== HTML listing ====";
62+
print "<table>\n <tr>\n <th>Tag</th>\n <th>Data Item</th>\n <th>Semantics</th>\n </tr>";
63+
for my $n (@tagnumbers) {
64+
print " <tr>";
65+
print " <td>$n</td>";
66+
67+
my @types = @{$tags{$n}{types}};
68+
@types = map { $typedescriptions{$_}; } @types;
69+
unshift @types, "any"
70+
if (scalar @types == 0);
71+
printf " <td>%s</td>\n", join(', ', @types);
72+
printf " <td>%s</td>\n", $tags{$n}{semantic};
73+
print " </td>";
74+
}
75+
print "</table>";
76+
77+
print "\n==== enum listing for cbor.h ====\n";
78+
printf "typedef enum CborKnownTags {";
79+
my $comma = "";
80+
for my $n (@tagnumbers) {
81+
printf "%s\n Cbor%sTag%s = %d", $comma,
82+
$tags{$n}{id},
83+
' ' x (23 - length($tags{$n}{id})),
84+
$n;
85+
$comma = ",";
86+
}
87+
print "\n} CborKnownTags;";
88+
89+
print "\n==== search table ====\n";
90+
print "struct KnownTagData { uint32_t tag; };";
91+
printf "static const struct KnownTagData knownTagData[] = {";
92+
$comma = "";
93+
for my $n (@tagnumbers) {
94+
my @types = @{$tags{$n}{types}};
95+
96+
my $typemask;
97+
my $shift = 0;
98+
99+
printf "%s\n { %d }", $comma, $n;
100+
$comma = ",";
101+
}
102+
print "\n};";

src/tags.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Tag number; Tag ID; Applicable types (comma-separated); Semantics
2+
0;DateTimeString;TextString;Standard date/time string
3+
1;UnixTime_t;Integer;Epoch-based date/time
4+
2;PositiveBignum;ByteString;Positive bignum
5+
3;NegativeBignum;ByteString;Negative bignum
6+
4;Decimal;Array;Decimal fraction
7+
5;Bigfloat;Array;Bigfloat
8+
21;ExpectedBase64url;ByteString,Array,Map;Expected conversion to base64url encoding
9+
22;ExpectedBase64;ByteString,Array,Map;Expected conversion to base64 encoding
10+
23;ExpectedBase16;ByteString,Array,Map;Expected conversion to base16 encoding
11+
24;EncodedCbor;ByteString;Encoded CBOR data item
12+
32;Url;TextString;URI
13+
33;Base64url;TextString;base64url
14+
34;Base64;TextString;base64
15+
35;RegularExpression;TextString;Regular expression
16+
36;MimeMessage;TextString;MIME message
17+
55799;Signature;;Self-describe CBOR

tests/parser/tst_parser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1650,8 +1650,8 @@ void tst_Parser::strictValidation_data()
16501650
QTest::newRow("unknown-tag-256") << raw("\xd9\1\0\x60") << int(CborValidateNoUnknownTags) << CborErrorUnknownTag;
16511651
QTest::newRow("unknown-tag-65536") << raw("\xda\0\1\0\0\x60") << int(CborValidateNoUnknownTags) << CborErrorUnknownTag;
16521652
QTest::newRow("unknown-tag-4294967296") << raw("\xdb\0\0\0\1\0\0\0\0\x60") << int(CborValidateNoUnknownTags) << CborErrorUnknownTag;
1653-
// QTest::newRow("allowed-tag-31") << raw("\xd8\x1f\x60") << int(CborValidateNoUnknownTagsSA) << CborNoError;
1654-
// QTest::newRow("allowed-tag-256") << raw("\xd8\x1f\x60") << int(CborValidateNoUnknownTagsSR) << CborNoError;
1653+
QTest::newRow("allowed-tag-31") << raw("\xd8\x1f\x60") << int(CborValidateNoUnknownTagsSA) << CborNoError;
1654+
QTest::newRow("allowed-tag-256") << raw("\xd9\1\0\x60") << int(CborValidateNoUnknownTagsSR) << CborNoError;
16551655

16561656
// excluded tags
16571657
QTest::newRow("excluded-tag-0") << raw("\xc0\x60") << int(CborValidateNoTags) << CborErrorExcludedType;

0 commit comments

Comments
 (0)