Skip to content

Commit d9030ff

Browse files
author
Eric Sessoms
committed
Add nodeprep and resourceprep profiles
Incomplete in that case-folding and unassigned code-points are not yet addressed, but otherwise follows the specification.
1 parent e7dd43d commit d9030ff

File tree

4 files changed

+168
-29
lines changed

4 files changed

+168
-29
lines changed

src/lib.rs

Lines changed: 135 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -87,26 +87,35 @@ pub fn saslprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
8787
}
8888

8989
// RFC3454, 6. Bidirectional Characters
90-
if normalized.contains(tables::bidi_r_or_al) {
90+
if is_prohibited_bidirectional_text(&normalized) {
91+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
92+
}
93+
94+
// 2.5 Unassigned Code Points
95+
// FIXME: Reject unassigned code points.
96+
97+
Ok(Cow::Owned(normalized))
98+
}
99+
100+
// RFC3454, 6. Bidirectional Characters
101+
fn is_prohibited_bidirectional_text(s: &str) -> bool {
102+
if s.contains(tables::bidi_r_or_al) {
91103
// 2) If a string contains any RandALCat character, the string
92104
// MUST NOT contain any LCat character.
93-
if normalized.contains(tables::bidi_l) {
94-
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
105+
if s.contains(tables::bidi_l) {
106+
return true;
95107
}
96108

97109
// 3) If a string contains any RandALCat character, a RandALCat
98110
// character MUST be the first character of the string, and a
99111
// RandALCat character MUST be the last character of the string.
100-
if !tables::bidi_r_or_al(normalized.chars().next().unwrap()) ||
101-
!tables::bidi_r_or_al(normalized.chars().next_back().unwrap()) {
102-
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
112+
if !tables::bidi_r_or_al(s.chars().next().unwrap()) ||
113+
!tables::bidi_r_or_al(s.chars().next_back().unwrap()) {
114+
return true;
103115
}
104116
}
105117

106-
// 2.5 Unassigned Code Points
107-
// FIXME: Reject unassigned code points.
108-
109-
Ok(Cow::Owned(normalized))
118+
false
110119
}
111120

112121
/// [RFC 3419]: https://tools.ietf.org/html/rfc3419
@@ -142,24 +151,105 @@ pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
142151
}
143152

144153
// RFC3454, 6. Bidirectional Characters
145-
if normalized.contains(tables::bidi_r_or_al) {
146-
// 2) If a string contains any RandALCat character, the string
147-
// MUST NOT contain any LCat character.
148-
if normalized.contains(tables::bidi_l) {
149-
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
150-
}
151-
152-
// 3) If a string contains any RandALCat character, a RandALCat
153-
// character MUST be the first character of the string, and a
154-
// RandALCat character MUST be the last character of the string.
155-
if !tables::bidi_r_or_al(normalized.chars().next().unwrap()) ||
156-
!tables::bidi_r_or_al(normalized.chars().next_back().unwrap()) {
157-
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
158-
}
154+
if is_prohibited_bidirectional_text(&normalized) {
155+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
159156
}
160157

161158
// 7 Unassigned Code Points
162-
// TODO: Reject unassigned code points.
159+
// FIXME: Reject unassigned code points.
160+
161+
Ok(Cow::Owned(normalized))
162+
}
163+
164+
/// [RFC 3920, Appendix A] https://tools.ietf.org/html/rfc3920#appendix-A
165+
pub fn nodeprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
166+
// A.3. Mapping
167+
let mapped = s.chars()
168+
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
169+
.collect::<String>();
170+
171+
// FIXME: using `to_lowercase` as proxy for case folding
172+
let mapped = mapped.to_lowercase();
173+
174+
// A.4. Normalization
175+
let normalized = mapped.nfkc().collect::<String>();
176+
177+
// A.5. Prohibited Output
178+
let prohibited = normalized
179+
.chars()
180+
.filter(|&c| {
181+
tables::ascii_space_character(c) /* C.1.1 */ ||
182+
tables::non_ascii_space_character(c) /* C.1.2 */ ||
183+
tables::ascii_control_character(c) /* C.2.1 */ ||
184+
tables::non_ascii_control_character(c) /* C.2.2 */ ||
185+
tables::private_use(c) /* C.3 */ ||
186+
tables::non_character_code_point(c) /* C.4 */ ||
187+
tables::surrogate_code(c) /* C.5 */ ||
188+
tables::inappropriate_for_plain_text(c) /* C.6 */ ||
189+
tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
190+
tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
191+
tables::tagging_character(c) /* C.9 */ ||
192+
prohibited_node_character(c)
193+
})
194+
.next();
195+
if let Some(c) = prohibited {
196+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
197+
}
198+
199+
// RFC3454, 6. Bidirectional Characters
200+
if is_prohibited_bidirectional_text(&normalized) {
201+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
202+
}
203+
204+
// FIXME: Reject unassigned code points.
205+
206+
Ok(Cow::Owned(normalized))
207+
}
208+
209+
// Additional characters not allowed in JID nodes, by RFC3920.
210+
fn prohibited_node_character(c: char) -> bool {
211+
match c {
212+
'"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@' => true,
213+
_ => false
214+
}
215+
}
216+
217+
/// [RFC 3920, Appendix B] https://tools.ietf.org/html/rfc3920#appendix-B
218+
pub fn resourceprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
219+
// B.3. Mapping
220+
let mapped = s.chars()
221+
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
222+
.collect::<String>();
223+
224+
// B.4. Normalization
225+
let normalized = mapped.nfkc().collect::<String>();
226+
227+
// B.5. Prohibited Output
228+
let prohibited = normalized
229+
.chars()
230+
.filter(|&c| {
231+
tables::non_ascii_space_character(c) /* C.1.2 */ ||
232+
tables::ascii_control_character(c) /* C.2.1 */ ||
233+
tables::non_ascii_control_character(c) /* C.2.2 */ ||
234+
tables::private_use(c) /* C.3 */ ||
235+
tables::non_character_code_point(c) /* C.4 */ ||
236+
tables::surrogate_code(c) /* C.5 */ ||
237+
tables::inappropriate_for_plain_text(c) /* C.6 */ ||
238+
tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
239+
tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
240+
tables::tagging_character(c) /* C.9 */
241+
})
242+
.next();
243+
if let Some(c) = prohibited {
244+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
245+
}
246+
247+
// RFC3454, 6. Bidirectional Characters
248+
if is_prohibited_bidirectional_text(&normalized) {
249+
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
250+
}
251+
252+
// FIXME: Reject unassigned code points.
163253

164254
Ok(Cow::Owned(normalized))
165255
}
@@ -168,12 +258,28 @@ pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
168258
mod test {
169259
use super::*;
170260

261+
fn assert_prohibited_character<T>(result: Result<T, Error>) {
262+
match result {
263+
Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
264+
_ => assert!(false)
265+
}
266+
}
267+
171268
// RFC4013, 3. Examples
172269
#[test]
173270
fn saslprep_examples() {
174-
match saslprep("\u{0007}") {
175-
Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
176-
_ => assert!(false)
177-
}
271+
assert_prohibited_character(saslprep("\u{0007}"));
272+
}
273+
274+
#[test]
275+
fn nodeprep_examples() {
276+
assert_prohibited_character(nodeprep(" "));
277+
assert_prohibited_character(nodeprep("\u{00a0}"));
278+
assert_prohibited_character(nodeprep("foo@bar"));
279+
}
280+
281+
#[test]
282+
fn resourceprep_examples() {
283+
assert_eq!("foo@bar", resourceprep("foo@bar").unwrap());
178284
}
179285
}

src/tables.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ pub fn commonly_mapped_to_nothing(c: char) -> bool {
1313
}
1414
}
1515

16+
/// C.1.1 ASCII space characters
17+
pub fn ascii_space_character(c: char) -> bool {
18+
c == ' '
19+
}
20+
1621
/// C.1.2 Non-ASCII space characters
1722
pub fn non_ascii_space_character(c: char) -> bool {
1823
match c {

tests/nameprep_tests.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,24 @@ fn assert_prohibited_bidirectional_text<T>(result: Result<T, Error>) {
1111
assert!(result.is_err());
1212
}
1313

14+
// Additional examples from http://josefsson.org/idn.php
15+
#[test]
16+
fn test_nameprep() {
17+
assert_eq!("安室奈美恵-with-super-monkeys", nameprep("安室奈美恵-with-SUPER-MONKEYS").unwrap());
18+
assert_eq!("미술", nameprep("미술").unwrap()); // Korean
19+
assert_eq!("ليهمابتكلموشعربي؟", nameprep("ليهمابتكلموشعربي؟").unwrap()); // Egyptian
20+
assert_eq!("他们为什么不说中文", nameprep("他们为什么不说中文").unwrap()); // Chinese
21+
assert_eq!("למההםפשוטלאמדבריםעברית", nameprep("למההםפשוטלאמדבריםעברית").unwrap()); // Hebrew
22+
assert_eq!("почемужеонинеговорятпорусски", nameprep("почемужеонинеговорятпорусски").unwrap()); // Russian
23+
assert_eq!("tạisaohọkhôngthểchỉnóitiếngviệt", nameprep("TạisaohọkhôngthểchỉnóitiếngViệt").unwrap()); // Vietnamese
24+
assert_eq!("ひとつ屋根の下2", nameprep("ひとつ屋根の下2").unwrap()); // Japanese
25+
assert_eq!("pročprostěnemluvíčesky", nameprep("Pročprostěnemluvíčesky").unwrap()); // Czech
26+
assert_eq!("यहलोगहिन्दीक्योंनहींबोलसकतेहैं", nameprep("यहलोगहिन्दीक्योंनहींबोलसकतेहैं").unwrap()); // Hindi
27+
assert_eq!("ພາສາລາວ", nameprep("ພາສາລາວ").unwrap()); // Lao
28+
assert_eq!("bonġusaħħa", nameprep("bonġusaħħa").unwrap()); // Maltese
29+
assert_eq!("ελληνικά", nameprep("ελληνικά").unwrap()); // Greek
30+
}
31+
1432
// 4.1 Map to nothing
1533
#[test]
1634
fn should_map_to_nothing() {

tests/nodeprep_tests.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Examples from http://josefsson.org/idn.php
2+
extern crate stringprep;
3+
4+
use stringprep::nodeprep;
5+
6+
#[ignore]
7+
#[test]
8+
fn test_nodeprep() {
9+
assert_eq!("räksmörgås.josefsson.org", nodeprep("räksmörgås.josefßon.org").unwrap());
10+
}

0 commit comments

Comments
 (0)