Skip to content

Commit 8ad748e

Browse files
hanslenarxbot
authored andcommitted
Add a new CanonicalValue type
This allows programs to sort maps according to the canonical sorting rules. The Value itself is good with the other canonical rules. Fixes #28
1 parent 10b531e commit 8ad748e

File tree

4 files changed

+247
-0
lines changed

4 files changed

+247
-0
lines changed

ciborium/src/value/canonical.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
use crate::value::Value;
4+
use serde::{de, ser};
5+
use std::cmp::Ordering;
6+
7+
/// Manually serialize values to compare them.
8+
fn serialized_canonical_cmp(v1: &Value, v2: &Value) -> Ordering {
9+
// There is an optimization to be done here, but it would take a lot more code
10+
// and using mixing keys, Arrays or Maps as CanonicalValue is probably not the
11+
// best use of this type as it is meant mainly to be used as keys.
12+
13+
let mut bytes1 = Vec::new();
14+
let _ = crate::ser::into_writer(v1, &mut bytes1);
15+
let mut bytes2 = Vec::new();
16+
let _ = crate::ser::into_writer(v2, &mut bytes2);
17+
18+
match bytes1.len().cmp(&bytes2.len()) {
19+
Ordering::Equal => bytes1.cmp(&bytes2),
20+
x => x,
21+
}
22+
}
23+
24+
/// Compares two values uses canonical comparison, as defined in both
25+
/// RFC 7049 Section 3.9 (regarding key sorting) and RFC 8949 4.2.3 (as errata).
26+
///
27+
/// In short, the comparison follow the following rules:
28+
/// - If two keys have different lengths, the shorter one sorts earlier;
29+
/// - If two keys have the same length, the one with the lower value in
30+
/// (byte-wise) lexical order sorts earlier.
31+
///
32+
/// This specific comparison allows Maps and sorting that respect these two rules.
33+
pub fn cmp_value(v1: &Value, v2: &Value) -> Ordering {
34+
use Value::*;
35+
36+
match (v1, v2) {
37+
(Integer(i), Integer(o)) => {
38+
// Because of the first rule above, two numbers might be in a different
39+
// order than regular i128 comparison. For example, 10 < -1 in
40+
// canonical ordering, since 10 serializes to `0x0a` and -1 to `0x20`,
41+
// and -1 < -1000 because of their lengths.
42+
i.canonical_cmp(o)
43+
}
44+
(Text(s), Text(o)) => match s.len().cmp(&o.len()) {
45+
Ordering::Equal => s.cmp(o),
46+
x => x,
47+
},
48+
(Bool(s), Bool(o)) => s.cmp(o),
49+
(Null, Null) => Ordering::Equal,
50+
(Tag(t, v), Tag(ot, ov)) => match Value::from(*t).partial_cmp(&Value::from(*ot)) {
51+
Some(Ordering::Equal) | None => match v.partial_cmp(&ov) {
52+
Some(x) => x,
53+
None => serialized_canonical_cmp(v1, v2),
54+
},
55+
Some(x) => x,
56+
},
57+
(_, _) => serialized_canonical_cmp(v1, v2),
58+
}
59+
}
60+
61+
/// A CBOR Value that impl Ord and Eq to allow sorting of values as defined in both
62+
/// RFC 7049 Section 3.9 (regarding key sorting) and RFC 8949 4.2.3 (as errata).
63+
///
64+
/// Since a regular [Value] can be
65+
#[derive(Clone, Debug)]
66+
pub struct CanonicalValue(Value);
67+
68+
impl PartialEq for CanonicalValue {
69+
fn eq(&self, other: &Self) -> bool {
70+
self.cmp(other) == Ordering::Equal
71+
}
72+
}
73+
74+
impl Eq for CanonicalValue {}
75+
76+
impl From<Value> for CanonicalValue {
77+
fn from(v: Value) -> Self {
78+
Self(v)
79+
}
80+
}
81+
82+
impl Into<Value> for CanonicalValue {
83+
fn into(self) -> Value {
84+
self.0
85+
}
86+
}
87+
88+
impl ser::Serialize for CanonicalValue {
89+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
90+
where
91+
S: ser::Serializer,
92+
{
93+
self.0.serialize(serializer)
94+
}
95+
}
96+
97+
impl<'de> de::Deserialize<'de> for CanonicalValue {
98+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
99+
where
100+
D: de::Deserializer<'de>,
101+
{
102+
Value::deserialize(deserializer).map(Into::into)
103+
}
104+
105+
fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error>
106+
where
107+
D: de::Deserializer<'de>,
108+
{
109+
Value::deserialize_in_place(deserializer, &mut place.0)
110+
}
111+
}
112+
113+
impl Ord for CanonicalValue {
114+
fn cmp(&self, other: &Self) -> Ordering {
115+
cmp_value(&self.0, &other.0)
116+
}
117+
}
118+
119+
impl PartialOrd for CanonicalValue {
120+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121+
Some(cmp_value(&self.0, &other.0))
122+
}
123+
}

ciborium/src/value/integer.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// SPDX-License-Identifier: Apache-2.0
2+
use std::cmp::Ordering;
23

34
macro_rules! implfrom {
45
($( $(#[$($attr:meta)+])? $t:ident)+) => {
@@ -33,6 +34,67 @@ macro_rules! implfrom {
3334
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
3435
pub struct Integer(i128);
3536

37+
impl Integer {
38+
/// Returns the canonical length this integer will have when serialized to bytes.
39+
/// This is called `canonical` as it is only used for canonically comparing two
40+
/// values. It shouldn't be used in any other context.
41+
fn canonical_len(&self) -> usize {
42+
let x = *&self.0;
43+
44+
if let Ok(x) = u8::try_from(x) {
45+
if x < 24 {
46+
1
47+
} else {
48+
2
49+
}
50+
} else if let Ok(x) = i8::try_from(x) {
51+
if x >= -24i8 {
52+
1
53+
} else {
54+
2
55+
}
56+
} else if let Ok(_) = u16::try_from(x) {
57+
3
58+
} else if let Ok(_) = i16::try_from(x) {
59+
3
60+
} else if let Ok(_) = u32::try_from(x) {
61+
5
62+
} else if let Ok(_) = i32::try_from(x) {
63+
5
64+
} else if let Ok(_) = u64::try_from(x) {
65+
9
66+
} else if let Ok(_) = i64::try_from(x) {
67+
9
68+
} else {
69+
// Ciborium serializes u128/i128 as BigPos if they don't fit in 64 bits.
70+
// In this special case we have to calculate the length.
71+
// The Tag itself will always be 1 byte.
72+
x.to_be_bytes().len() + 1
73+
}
74+
}
75+
76+
/// Compare two integers as if we were to serialize them, but more efficiently.
77+
pub fn canonical_cmp(&self, other: &Self) -> Ordering {
78+
match self.canonical_len().cmp(&other.canonical_len()) {
79+
Ordering::Equal => {
80+
// Negative numbers are higher in byte-order than positive numbers.
81+
match (self.0.is_negative(), other.0.is_negative()) {
82+
(false, true) => {
83+
Ordering::Less
84+
}
85+
(true, false) => {
86+
Ordering::Greater
87+
}
88+
(_, _) => {
89+
self.0.cmp(&other.0)
90+
}
91+
}
92+
},
93+
x => x,
94+
}
95+
}
96+
}
97+
3698
implfrom! {
3799
u8 u16 u32 u64
38100
i8 i16 i32 i64

ciborium/src/value/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
//! A dynamic CBOR value
44
55
mod integer;
6+
mod canonical;
67

78
mod de;
89
mod error;
910
mod ser;
1011

1112
pub use error::Error;
1213
pub use integer::Integer;
14+
pub use canonical::CanonicalValue;
1315

1416
use alloc::{boxed::Box, string::String, vec::Vec};
1517

ciborium/tests/canonical.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
extern crate std;
4+
5+
use std::collections::BTreeMap;
6+
use ciborium::cbor;
7+
use ciborium::value::{ CanonicalValue, };
8+
use rand::prelude::*;
9+
10+
macro_rules! cval {
11+
($x:expr) => {
12+
CanonicalValue::from(val!($x))
13+
};
14+
}
15+
16+
macro_rules! val {
17+
($x:expr) => {
18+
cbor!($x).unwrap()
19+
};
20+
}
21+
22+
#[test]
23+
fn rfc8949_example() {
24+
let mut array: Vec<CanonicalValue> = vec![
25+
cval!(10),
26+
cval!(-1),
27+
cval!(false),
28+
cval!(100),
29+
cval!("z"),
30+
cval!([-1]),
31+
cval!("aa"),
32+
cval!([100]),
33+
];
34+
let golden = array.clone();
35+
36+
// Shuffle the array.
37+
array.shuffle(&mut rand::thread_rng());
38+
39+
array.sort();
40+
41+
assert_eq!(array, golden);
42+
}
43+
44+
#[test]
45+
fn map() {
46+
let mut map = BTreeMap::new();
47+
map.insert(cval!(false), val!(2));
48+
map.insert(cval!([-1]), val!(5));
49+
map.insert(cval!(-1), val!(1));
50+
map.insert(cval!(10), val!(0));
51+
map.insert(cval!(100), val!(3));
52+
map.insert(cval!([100]), val!(7));
53+
map.insert(cval!("z"), val!(4));
54+
map.insert(cval!("aa"), val!(6));
55+
56+
let mut bytes1 = Vec::new();
57+
ciborium::ser::into_writer(&map, &mut bytes1).unwrap();
58+
59+
assert_eq!(hex::encode(&bytes1), "a80a002001f402186403617a048120056261610681186407");
60+
}

0 commit comments

Comments
 (0)