Skip to content

Commit c2341f1

Browse files
committed
feat: Support more content encodings
Signed-off-by: Dmitry Dygalo <[email protected]>
1 parent 1c89b62 commit c2341f1

File tree

4 files changed

+186
-5
lines changed

4 files changed

+186
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
- `evaluate()` top-level function for convenient access to structured validation output.
88
- **CLI**: Schema-only validation now also validates all referenced schemas. [#804](https://github.com/Stranger6667/jsonschema/issues/804)
9+
- Support for additional `contentEncoding` values per RFC 4648: `base64url`, `base32`, `base32hex`, and `base16`. These encodings are now validated alongside the existing `base64` support in Draft 6 and 7. [#26](https://github.com/Stranger6667/jsonschema/issues/26)
910

1011
### Changed
1112

crates/jsonschema-py/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Added
66

77
- Support for `decimal.Decimal` type in both schemas and instances. [#319](https://github.com/Stranger6667/jsonschema/issues/319)
8+
- Support for additional `contentEncoding` values per RFC 4648: `base64url`, `base32`, `base32hex`, and `base16`. These encodings are now validated alongside the existing `base64` support in Draft 6 and 7. [#26](https://github.com/Stranger6667/jsonschema/issues/26)
89

910
### Performance
1011

crates/jsonschema/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ tokio = { version = "1.0", features = ["rt"], optional = true }
3939
[dependencies]
4040
ahash.workspace = true
4141
async-trait = { version = "0.1.86", optional = true }
42-
base64 = "0.22"
4342
bytecount = { version = "0.6", features = ["runtime-dispatch-simd"] }
43+
data-encoding = "2.9"
4444
email_address = "0.2.9"
4545
fancy-regex = "0.16"
4646
fraction = { version = "0.15", default-features = false, features = [
Lines changed: 183 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,209 @@
11
use crate::error::ValidationError;
22
use ahash::AHashMap;
3-
use base64::{engine::general_purpose, Engine as _};
3+
use data_encoding::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER};
44
use std::sync::LazyLock;
55

66
pub(crate) type ContentEncodingCheckType = fn(&str) -> bool;
77
pub(crate) type ContentEncodingConverterType =
88
fn(&str) -> Result<Option<String>, ValidationError<'static>>;
99

10+
// RFC 4648 §4: Base 64 Encoding
11+
// https://datatracker.ietf.org/doc/html/rfc4648#section-4
1012
pub(crate) fn is_base64(instance_string: &str) -> bool {
11-
general_purpose::STANDARD.decode(instance_string).is_ok()
13+
BASE64.decode(instance_string.as_bytes()).is_ok()
1214
}
1315

1416
pub(crate) fn from_base64(
1517
instance_string: &str,
1618
) -> Result<Option<String>, ValidationError<'static>> {
17-
match general_purpose::STANDARD.decode(instance_string) {
19+
match BASE64.decode(instance_string.as_bytes()) {
1820
Ok(value) => Ok(Some(String::from_utf8(value)?)),
1921
Err(_) => Ok(None),
2022
}
2123
}
2224

25+
// RFC 4648 §5: Base 64 Encoding with URL and Filename Safe Alphabet
26+
// https://datatracker.ietf.org/doc/html/rfc4648#section-5
27+
pub(crate) fn is_base64url(instance_string: &str) -> bool {
28+
BASE64URL.decode(instance_string.as_bytes()).is_ok()
29+
}
30+
31+
pub(crate) fn from_base64url(
32+
instance_string: &str,
33+
) -> Result<Option<String>, ValidationError<'static>> {
34+
match BASE64URL.decode(instance_string.as_bytes()) {
35+
Ok(value) => Ok(Some(String::from_utf8(value)?)),
36+
Err(_) => Ok(None),
37+
}
38+
}
39+
40+
// RFC 4648 §6: Base 32 Encoding
41+
// https://datatracker.ietf.org/doc/html/rfc4648#section-6
42+
pub(crate) fn is_base32(instance_string: &str) -> bool {
43+
BASE32.decode(instance_string.as_bytes()).is_ok()
44+
}
45+
46+
pub(crate) fn from_base32(
47+
instance_string: &str,
48+
) -> Result<Option<String>, ValidationError<'static>> {
49+
match BASE32.decode(instance_string.as_bytes()) {
50+
Ok(value) => Ok(Some(String::from_utf8(value)?)),
51+
Err(_) => Ok(None),
52+
}
53+
}
54+
55+
// RFC 4648 §7: Base 32 Encoding with Extended Hex Alphabet
56+
// https://datatracker.ietf.org/doc/html/rfc4648#section-7
57+
pub(crate) fn is_base32hex(instance_string: &str) -> bool {
58+
BASE32HEX.decode(instance_string.as_bytes()).is_ok()
59+
}
60+
61+
pub(crate) fn from_base32hex(
62+
instance_string: &str,
63+
) -> Result<Option<String>, ValidationError<'static>> {
64+
match BASE32HEX.decode(instance_string.as_bytes()) {
65+
Ok(value) => Ok(Some(String::from_utf8(value)?)),
66+
Err(_) => Ok(None),
67+
}
68+
}
69+
70+
// RFC 4648 §8: Base 16 Encoding
71+
// https://datatracker.ietf.org/doc/html/rfc4648#section-8
72+
pub(crate) fn is_base16(instance_string: &str) -> bool {
73+
HEXUPPER.decode(instance_string.as_bytes()).is_ok()
74+
|| HEXUPPER
75+
.decode(instance_string.to_uppercase().as_bytes())
76+
.is_ok()
77+
}
78+
79+
pub(crate) fn from_base16(
80+
instance_string: &str,
81+
) -> Result<Option<String>, ValidationError<'static>> {
82+
// Base16 is case-insensitive per RFC 4648
83+
let result = HEXUPPER
84+
.decode(instance_string.as_bytes())
85+
.or_else(|_| HEXUPPER.decode(instance_string.to_uppercase().as_bytes()));
86+
match result {
87+
Ok(value) => Ok(Some(String::from_utf8(value)?)),
88+
Err(_) => Ok(None),
89+
}
90+
}
91+
92+
// Supported in JSON Schema Draft 6, 7, 2019-09, and 2020-12
93+
// Per JSON Schema Validation spec §8.3, encoding values are defined in:
94+
// - RFC 4648 (base16, base32, base32hex, base64, base64url)
95+
// - RFC 2045 §6.7-6.8 (quoted-printable, 7bit, 8bit, binary)
96+
// We implement the RFC 4648 encodings as they are transformation encodings.
2397
pub(crate) static DEFAULT_CONTENT_ENCODING_CHECKS_AND_CONVERTERS: LazyLock<
2498
AHashMap<&'static str, (ContentEncodingCheckType, ContentEncodingConverterType)>,
2599
> = LazyLock::new(|| {
26100
let mut map: AHashMap<&'static str, (ContentEncodingCheckType, ContentEncodingConverterType)> =
27-
AHashMap::with_capacity(1);
101+
AHashMap::with_capacity(5);
28102
map.insert("base64", (is_base64, from_base64));
103+
map.insert("base64url", (is_base64url, from_base64url));
104+
map.insert("base32", (is_base32, from_base32));
105+
map.insert("base32hex", (is_base32hex, from_base32hex));
106+
map.insert("base16", (is_base16, from_base16));
29107
map
30108
});
109+
110+
#[cfg(test)]
111+
mod tests {
112+
use super::*;
113+
use test_case::test_case;
114+
115+
// Test string: "foobar"
116+
const TEST_STRING: &str = "foobar";
117+
const TEST_BASE64: &str = "Zm9vYmFy";
118+
const TEST_BASE64URL: &str = "Zm9vYmFy"; // same as base64 for "foobar" (no +/)
119+
const TEST_BASE32: &str = "MZXW6YTBOI======";
120+
const TEST_BASE32HEX: &str = "CPNMUOJ1E8======";
121+
const TEST_BASE16_UPPER: &str = "666F6F626172";
122+
const TEST_BASE16_LOWER: &str = "666f6f626172";
123+
const TEST_BASE16_MIXED: &str = "666F6f626172";
124+
125+
#[test_case(TEST_BASE64, true ; "valid base64")]
126+
#[test_case("not valid base64!!!", false ; "invalid base64 with special chars")]
127+
#[test_case("Zm9v====", false ; "invalid base64 padding")]
128+
fn test_is_base64(input: &str, expected: bool) {
129+
assert_eq!(is_base64(input), expected);
130+
}
131+
132+
#[test_case(TEST_BASE64, Some(TEST_STRING) ; "decode valid base64")]
133+
#[test_case("invalid!", None ; "decode invalid base64")]
134+
fn test_from_base64(input: &str, expected: Option<&str>) {
135+
assert_eq!(
136+
from_base64(input).unwrap(),
137+
expected.map(std::string::ToString::to_string)
138+
);
139+
}
140+
141+
#[test_case(TEST_BASE64URL, true ; "valid base64url")]
142+
#[test_case("PDw_Pz4-", true ; "base64url with url safe chars")]
143+
#[test_case("Zm9v+YmFy", false ; "base64 plus char invalid in base64url")]
144+
#[test_case("Zm9v/YmFy", false ; "base64 slash char invalid in base64url")]
145+
fn test_is_base64url(input: &str, expected: bool) {
146+
assert_eq!(is_base64url(input), expected);
147+
}
148+
149+
#[test_case(TEST_BASE64URL, Some(TEST_STRING) ; "decode valid base64url")]
150+
#[test_case("invalid!", None ; "decode invalid base64url")]
151+
fn test_from_base64url(input: &str, expected: Option<&str>) {
152+
assert_eq!(
153+
from_base64url(input).unwrap(),
154+
expected.map(std::string::ToString::to_string)
155+
);
156+
}
157+
158+
#[test_case(TEST_BASE32, true ; "valid base32")]
159+
#[test_case("not valid", false ; "invalid base32 text")]
160+
#[test_case("189", false ; "base32 invalid chars 1,8,9")]
161+
fn test_is_base32(input: &str, expected: bool) {
162+
assert_eq!(is_base32(input), expected);
163+
}
164+
165+
#[test_case(TEST_BASE32, Some(TEST_STRING) ; "decode valid base32")]
166+
#[test_case("189!!!", None ; "decode invalid base32")]
167+
fn test_from_base32(input: &str, expected: Option<&str>) {
168+
assert_eq!(
169+
from_base32(input).unwrap(),
170+
expected.map(std::string::ToString::to_string)
171+
);
172+
}
173+
174+
#[test_case(TEST_BASE32HEX, true ; "valid base32hex")]
175+
#[test_case("not valid", false ; "invalid base32hex text")]
176+
#[test_case("XYZ", false ; "base32hex invalid chars X,Y,Z")]
177+
fn test_is_base32hex(input: &str, expected: bool) {
178+
assert_eq!(is_base32hex(input), expected);
179+
}
180+
181+
#[test_case(TEST_BASE32HEX, Some(TEST_STRING) ; "decode valid base32hex")]
182+
#[test_case("XYZ!!!", None ; "decode invalid base32hex")]
183+
fn test_from_base32hex(input: &str, expected: Option<&str>) {
184+
assert_eq!(
185+
from_base32hex(input).unwrap(),
186+
expected.map(std::string::ToString::to_string)
187+
);
188+
}
189+
190+
#[test_case(TEST_BASE16_UPPER, true ; "valid base16 uppercase")]
191+
#[test_case(TEST_BASE16_LOWER, true ; "valid base16 lowercase")]
192+
#[test_case(TEST_BASE16_MIXED, true ; "valid base16 mixed case")]
193+
#[test_case("not valid", false ; "invalid base16 text")]
194+
#[test_case("GHIJ", false ; "base16 invalid chars G-J")]
195+
fn test_is_base16(input: &str, expected: bool) {
196+
assert_eq!(is_base16(input), expected);
197+
}
198+
199+
#[test_case(TEST_BASE16_UPPER, Some(TEST_STRING) ; "decode base16 uppercase")]
200+
#[test_case(TEST_BASE16_LOWER, Some(TEST_STRING) ; "decode base16 lowercase")]
201+
#[test_case(TEST_BASE16_MIXED, Some(TEST_STRING) ; "decode base16 mixed")]
202+
#[test_case("GHIJ", None ; "decode invalid base16")]
203+
fn test_from_base16(input: &str, expected: Option<&str>) {
204+
assert_eq!(
205+
from_base16(input).unwrap(),
206+
expected.map(std::string::ToString::to_string)
207+
);
208+
}
209+
}

0 commit comments

Comments
 (0)