Skip to content

Commit e7dd43d

Browse files
author
Eric Sessoms
committed
Add nameprep profile
Implements the stringprep profile described in RFC3491. This implementation does not currently handle case-folding. Case-folding is not currently supported by the Rust standard libraries (see rust-lang/rfcs#791 ), and this implementation compromises with `to_lowercase`, which works for ASCII and a few other random cases. A compliant implementation is TBD. We also do not properly handle unassigned code-points. This applies to both the `nameprep` and `saslprep` implementation. Fixing this is another TBD.
1 parent 061df0b commit e7dd43d

File tree

3 files changed

+340
-29
lines changed

3 files changed

+340
-29
lines changed

src/lib.rs

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub mod tables;
1616

1717
/// Describes why a string failed stringprep normalization.
1818
#[derive(Debug)]
19-
pub enum ErrorCause {
19+
enum ErrorCause {
2020
/// Contains stringprep prohibited characters.
2121
ProhibitedCharacter(char),
2222
/// Violates stringprep rules for bidirectional text.
@@ -25,7 +25,7 @@ pub enum ErrorCause {
2525

2626
/// An error performing the stringprep algorithm.
2727
#[derive(Debug)]
28-
pub struct Error(pub ErrorCause);
28+
pub struct Error(ErrorCause);
2929

3030
impl fmt::Display for Error {
3131
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
@@ -104,6 +104,61 @@ pub fn saslprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
104104
}
105105

106106
// 2.5 Unassigned Code Points
107+
// FIXME: Reject unassigned code points.
108+
109+
Ok(Cow::Owned(normalized))
110+
}
111+
112+
/// [RFC 3419]: https://tools.ietf.org/html/rfc3419
113+
pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
114+
// 3. Mapping
115+
let mapped = s.chars()
116+
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
117+
.collect::<String>();
118+
119+
// FIXME: using `to_lowercase` as proxy for case folding
120+
let mapped = mapped.to_lowercase();
121+
122+
// 4. Normalization
123+
let normalized = mapped.nfkc().collect::<String>();
124+
125+
// 5. Prohibited Output
126+
let prohibited = normalized
127+
.chars()
128+
.filter(|&c| {
129+
tables::non_ascii_space_character(c) /* C.1.2 */ ||
130+
tables::non_ascii_control_character(c) /* C.2.2 */ ||
131+
tables::private_use(c) /* C.3 */ ||
132+
tables::non_character_code_point(c) /* C.4 */ ||
133+
tables::surrogate_code(c) /* C.5 */ ||
134+
tables::inappropriate_for_plain_text(c) /* C.6 */ ||
135+
tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
136+
tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
137+
tables::tagging_character(c) /* C.9 */
138+
})
139+
.next();
140+
if let Some(c) = prohibited {
141+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
142+
}
143+
144+
// RFC3454, 6. Bidirectional Characters
145+
if normalized.contains(tables::bidi_r_or_al) {
146+
// 2) If a string contains any RandALCat character, the string
147+
// MUST NOT contain any LCat character.
148+
if normalized.contains(tables::bidi_l) {
149+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
150+
}
151+
152+
// 3) If a string contains any RandALCat character, a RandALCat
153+
// character MUST be the first character of the string, and a
154+
// RandALCat character MUST be the last character of the string.
155+
if !tables::bidi_r_or_al(normalized.chars().next().unwrap()) ||
156+
!tables::bidi_r_or_al(normalized.chars().next_back().unwrap()) {
157+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
158+
}
159+
}
160+
161+
// 7 Unassigned Code Points
107162
// TODO: Reject unassigned code points.
108163

109164
Ok(Cow::Owned(normalized))
@@ -116,26 +171,9 @@ mod test {
116171
// RFC4013, 3. Examples
117172
#[test]
118173
fn saslprep_examples() {
119-
assert_eq!(saslprep("I\u{00AD}X").unwrap(), "IX");
120-
assert_eq!(saslprep("user").unwrap(), "user");
121-
assert_eq!(saslprep("USER").unwrap(), "USER");
122-
assert_eq!(saslprep("\u{00AA}").unwrap(), "a");
123-
assert_eq!(saslprep("\u{2168}").unwrap(), "IX");
124-
assert_prohibited_character(saslprep("\u{0007}"));
125-
assert_prohibited_bidirectional_text(saslprep("\u{0627}\u{0031}"));
126-
}
127-
128-
fn assert_prohibited_character<T>(result: Result<T, Error>) {
129-
match result {
174+
match saslprep("\u{0007}") {
130175
Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
131176
_ => assert!(false)
132177
}
133178
}
134-
135-
fn assert_prohibited_bidirectional_text<T>(result: Result<T, Error>) {
136-
match result {
137-
Err(Error(ErrorCause::ProhibitedBidirectionalText)) => (),
138-
_ => assert!(false)
139-
}
140-
}
141179
}

tests/nameprep_tests.rs

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
// Integration tests from https://tools.ietf.org/html/draft-josefsson-idn-test-vectors-00
2+
extern crate stringprep;
3+
4+
use stringprep::{Error, nameprep};
5+
6+
fn assert_prohibited_character<T>(result: Result<T, Error>) {
7+
assert!(result.is_err());
8+
}
9+
10+
fn assert_prohibited_bidirectional_text<T>(result: Result<T, Error>) {
11+
assert!(result.is_err());
12+
}
13+
14+
// 4.1 Map to nothing
15+
#[test]
16+
fn should_map_to_nothing() {
17+
let input = "foo\u{00ad}\u{034f}\u{1806}\u{180b}bar\u{200b}\u{2060}baz\u{fe00}\u{fe08}\u{fe0f}\u{feff}";
18+
assert_eq!("foobarbaz", nameprep(input).unwrap());
19+
}
20+
21+
// 4.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045
22+
#[test]
23+
fn should_case_fold_ascii() {
24+
assert_eq!("cafe", nameprep("CAFE").unwrap());
25+
}
26+
27+
// 4.3 Case folding 8bit U+00DF (german sharp s)
28+
#[ignore]
29+
#[test]
30+
fn should_case_fold_8bit() {
31+
assert_eq!("ss", nameprep("\u{00df}").unwrap());
32+
}
33+
34+
// 4.4 Case folding U+0130 (turkish capital I with dot)
35+
#[test]
36+
fn should_case_fold_16bit() {
37+
assert_eq!("\u{0069}\u{0307}", nameprep("\u{0130}").unwrap());
38+
}
39+
40+
// 4.5 Case folding multibyte U+0143 U+037A
41+
#[ignore]
42+
#[test]
43+
fn should_case_fold_multibyte() {
44+
let input = "\u{0143}\u{037a}";
45+
let output = "\u{0144} \u{03b9}";
46+
assert_eq!(output, nameprep(input).unwrap());
47+
}
48+
49+
// 4.6 Case folding U+2121 U+33C6 U+1D7BB
50+
#[ignore]
51+
#[test]
52+
fn should_case_fold() {
53+
let input = "\u{2121}\u{33c6}\u{1d7bb}";
54+
let output = "telc\u{2215}\u{006b}\u{0067}\u{03c3}";
55+
assert_eq!(output, nameprep(input).unwrap());
56+
}
57+
58+
// 4.7 Normalization of U+006a U+030c U+00A0 U+00AA
59+
#[test]
60+
fn should_normalize() {
61+
let input = "j\u{030c}\u{00a0}\u{00aa}";
62+
let output = "\u{01f0} a";
63+
assert_eq!(output, nameprep(input).unwrap());
64+
}
65+
66+
// 4.8 Case folding U+1FB7 and normalization
67+
#[ignore]
68+
#[test]
69+
fn should_case_fold_and_normalize() {
70+
let input = "\u{1fb7}";
71+
let output = "\u{1fb6}\u{03b9}";
72+
assert_eq!(output, nameprep(input).unwrap());
73+
}
74+
75+
// 4.9 Self-reverting case folding U+01F0 and normalization
76+
// 4.10 Self-reverting case folding U+0390 and normalization
77+
// 4.11 Self-reverting case folding U+03B0 and normalization
78+
// 4.12 Self-reverting case folding U+1E96 and normalization
79+
// 4.13 Self-reverting case folding U+1F56 and normalization
80+
#[test]
81+
fn should_revert_case_fold_and_normalization() {
82+
let inputs = ["\u{01f0}", "\u{0390}", "\u{03b0}", "\u{1e96}", "\u{1f56}"];
83+
for input in inputs.iter() {
84+
assert_eq!(input.clone(), nameprep(input).unwrap());
85+
}
86+
}
87+
88+
// 4.14 ASCII space character U+0020
89+
#[test]
90+
fn should_permit_ascii_space() {
91+
assert_eq!(" ", nameprep(" ").unwrap());
92+
}
93+
94+
// 4.15 Non-ASCII 8bit space character U+00A0
95+
#[test]
96+
fn should_map_8bit_space() {
97+
assert_eq!(" ", nameprep("\u{00a0}").unwrap());
98+
}
99+
100+
// 4.16 Non-ASCII multibyte space character U+1680
101+
#[test]
102+
fn should_prohibit_multibyte_space() {
103+
assert_prohibited_character(nameprep("\u{1680}"));
104+
}
105+
106+
// 4.17 Non-ASCII multibyte space character U+2000
107+
#[test]
108+
fn should_map_multibyte_space1() {
109+
assert_eq!(" ", nameprep("\u{2000}").unwrap());
110+
}
111+
112+
// 4.18 Zero Width Space U+200b
113+
#[test]
114+
fn should_drop_zero_width_space() {
115+
assert_eq!("", nameprep("\u{200b}").unwrap());
116+
}
117+
118+
// 4.19 Non-ASCII multibyte space character U+3000
119+
#[test]
120+
fn should_map_multibyte_space2() {
121+
assert_eq!(" ", nameprep("\u{3000}").unwrap());
122+
}
123+
124+
// 4.20 ASCII control characters U+0010 U+007F
125+
#[test]
126+
fn should_permit_ascii_control() {
127+
assert_eq!("\u{0010}\u{007f}", nameprep("\u{0010}\u{007f}").unwrap());
128+
}
129+
130+
// 4.21 Non-ASCII 8bit control character U+0085
131+
#[test]
132+
fn should_prohibit_8bit_control() {
133+
assert_prohibited_character(nameprep("\u{0085}"));
134+
}
135+
136+
// 4.22 Non-ASCII multibyte control character U+180E
137+
#[test]
138+
fn should_prohibit_multibyte_control() {
139+
assert_prohibited_character(nameprep("\u{180e}"));
140+
}
141+
142+
// 4.23 Zero Width No-Break Space U+FEFF
143+
#[test]
144+
fn should_drop_zero_width_no_break_space() {
145+
assert_eq!("", nameprep("\u{feff}").unwrap());
146+
}
147+
148+
// 4.24 Non-ASCII control character U+1D175
149+
#[test]
150+
fn should_prohibit_non_ascii_control() {
151+
assert_prohibited_character(nameprep("\u{1d175}"));
152+
}
153+
154+
// 4.25 Plane 0 private use character U+F123
155+
#[test]
156+
fn should_prohibit_plane0_private_use() {
157+
assert_prohibited_character(nameprep("\u{f123}"));
158+
}
159+
160+
// 4.26 Plane 15 private use character U+F1234
161+
#[test]
162+
fn should_prohibit_plane15_private_use() {
163+
assert_prohibited_character(nameprep("\u{f1234}"));
164+
}
165+
166+
// 4.27 Plane 16 private use character U+10F234
167+
#[test]
168+
fn should_prohibit_plane16_private_use() {
169+
assert_prohibited_character(nameprep("\u{10f234}"));
170+
}
171+
172+
// 4.28 Non-character code point U+8FFFE
173+
#[test]
174+
fn should_prohibit_non_character1() {
175+
assert_prohibited_character(nameprep("\u{8fffe}"));
176+
}
177+
178+
// 4.29 Non-character code point U+10FFFF
179+
#[test]
180+
fn should_prohibit_non_character2() {
181+
assert_prohibited_character(nameprep("\u{10ffff}"));
182+
}
183+
184+
// 4.31 Non-plain text character U+FFFD
185+
#[test]
186+
fn should_prohibit_non_plain_text() {
187+
assert_prohibited_character(nameprep("\u{fffd}"));
188+
}
189+
190+
// 4.32 Ideographic description character U+2FF5
191+
#[test]
192+
fn should_prohibit_ideographic_description() {
193+
assert_prohibited_character(nameprep("\u{2ff5}"));
194+
}
195+
196+
// 4.33 Display property character U+0341
197+
#[test]
198+
fn should_normalize_display_property() {
199+
assert_eq!("\u{0301}", nameprep("\u{0341}").unwrap());
200+
}
201+
202+
// 4.34 Left-to-right mark U+200E
203+
#[test]
204+
fn should_prohibit_left_to_right_mark() {
205+
assert_prohibited_character(nameprep("\u{200e}"));
206+
}
207+
208+
// 4.35 Deprecated U+202A
209+
#[test]
210+
fn should_prohibit_deprecated() {
211+
assert_prohibited_character(nameprep("\u{202a}"));
212+
}
213+
214+
// 4.36 Language tagging character U+E0001
215+
#[test]
216+
fn should_prohibit_language_tagging1() {
217+
assert_prohibited_character(nameprep("\u{e0001}"));
218+
}
219+
220+
// 4.37 Language tagging character U+E0042
221+
#[test]
222+
fn should_prohibit_language_tagging2() {
223+
assert_prohibited_character(nameprep("\u{e0042}"));
224+
}
225+
226+
// 4.38 Bidi: RandALCat character U+05BE and LCat characters
227+
#[test]
228+
fn should_prohibit_randalcat_with_lcat1() {
229+
assert_prohibited_bidirectional_text(nameprep("foo\u{05be}bar"));
230+
}
231+
232+
// 4.39 Bidi: RandALCat character U+FD50 and LCat characters
233+
#[test]
234+
fn should_prohibit_randalcat_with_lcat2() {
235+
assert_prohibited_bidirectional_text(nameprep("foo\u{fd50}bar"));
236+
}
237+
238+
// 4.40 Bidi: RandALCat character U+FB38 and LCat characters
239+
#[test]
240+
fn should_permit_randalcat1() {
241+
assert_eq!("foo \u{064e}bar", nameprep("foo\u{fe76}bar").unwrap());
242+
}
243+
244+
// 4.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031
245+
#[test]
246+
fn should_prohibit_mixed_randalcat() {
247+
assert_prohibited_bidirectional_text(nameprep("\u{0672}\u{0031}"));
248+
}
249+
250+
// 4.42 Bidi: RandALCat character U+0627 U+0031 U+0628
251+
#[test]
252+
fn should_permit_randalcat2() {
253+
assert_eq!("\u{0627}\u{0031}\u{0628}", nameprep("\u{0627}\u{0031}\u{0628}").unwrap());
254+
}
255+
256+
// 4.43 Unassigned code point U+E0002
257+
#[ignore]
258+
#[test]
259+
fn should_prohibit_unassigned_code_point() {
260+
assert_prohibited_character(nameprep("\u{e0002}"));
261+
}
262+
263+
// 4.44 Larger test (shrinking)
264+
#[ignore]
265+
#[test]
266+
fn should_shrink() {
267+
let input = "X\u{00ad}\u{00df}\u{0130}\u{2121}j\u{030c}\u{00a0}\u{00aa}\u{03b0}\u{2000}";
268+
let output = "xssi\u{0307}tel\u{01f0} a\u{03b0}\u{0020}";
269+
assert_eq!(output, nameprep(input).unwrap());
270+
}
271+
272+
// 4.45 Larger test (expanding)
273+
#[ignore]
274+
#[test]
275+
fn should_expand() {
276+
let input = "X\u{00df}\u{3316}\u{0130}\u{2121}\u{249f}\u{3300}";
277+
let output = "xss\u{30ad}\u{30ed}\u{30e1}\u{30fc}\u{30c8}\u{30eb}\u{0069}\u{0307}\u{0074}\u{0065}\u{006c}\u{0028}\u{0064}\u{0029}\u{30a2}\u{30d1}\u{30fc}\u{30c8}";
278+
assert_eq!(output, nameprep(input).unwrap());
279+
}

0 commit comments

Comments
 (0)