11//! An implementation of the "stringprep" algorithm defined in [RFC 3454][].
22//!
33//! [RFC 3454]: https://tools.ietf.org/html/rfc3454
4- #![ doc( html_root_url= "https://docs.rs/stringprep/0.1.2" ) ]
4+ #![ doc( html_root_url = "https://docs.rs/stringprep/0.1.2" ) ]
55#![ warn( missing_docs) ]
6+ extern crate finl_unicode;
67extern crate unicode_bidi;
78extern crate unicode_normalization;
8- extern crate finl_unicode;
99
10+ use finl_unicode:: categories:: CharacterCategories ;
1011use std:: borrow:: Cow ;
1112use std:: fmt;
12- use finl_unicode:: categories:: CharacterCategories ;
1313use unicode_normalization:: UnicodeNormalization ;
1414
1515mod rfc3454;
@@ -37,7 +37,9 @@ impl fmt::Display for Error {
3737 match self . 0 {
3838 ErrorCause :: ProhibitedCharacter ( c) => write ! ( fmt, "prohibited character `{}`" , c) ,
3939 ErrorCause :: ProhibitedBidirectionalText => write ! ( fmt, "prohibited bidirectional text" ) ,
40- ErrorCause :: StartsWithCombiningCharacter => write ! ( fmt, "starts with combining character" ) ,
40+ ErrorCause :: StartsWithCombiningCharacter => {
41+ write ! ( fmt, "starts with combining character" )
42+ }
4143 ErrorCause :: EmptyString => write ! ( fmt, "empty string" ) ,
4244 }
4345 }
@@ -59,22 +61,23 @@ pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
5961 }
6062
6163 // 2.1 Mapping
62- let mapped = s. chars ( )
63- . map ( |c| if tables:: non_ascii_space_character ( c) {
64- ' '
65- } else {
66- c
67- } )
64+ let mapped = s
65+ . chars ( )
66+ . map ( |c| {
67+ if tables:: non_ascii_space_character ( c) {
68+ ' '
69+ } else {
70+ c
71+ }
72+ } )
6873 . filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) ) ;
6974
7075 // 2.2 Normalization
7176 let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
7277
7378 // 2.3 Prohibited Output
74- let prohibited = normalized
75- . chars ( )
76- . find ( |& c| {
77- tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
79+ let prohibited = normalized. chars ( ) . find ( |& c| {
80+ tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
7881 tables:: ascii_control_character ( c) /* C.2.1 */ ||
7982 tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
8083 tables:: private_use ( c) /* C.3 */ ||
@@ -84,7 +87,7 @@ pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
8487 tables:: inappropriate_for_canonical_representation ( c) /* C.7 */ ||
8588 tables:: change_display_properties_or_deprecated ( c) /* C.8 */ ||
8689 tables:: tagging_character ( c) /* C.9 */
87- } ) ;
90+ } ) ;
8891 if let Some ( c) = prohibited {
8992 return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
9093 }
@@ -117,8 +120,9 @@ fn is_prohibited_bidirectional_text(s: &str) -> bool {
117120 // 3) If a string contains any RandALCat character, a RandALCat
118121 // character MUST be the first character of the string, and a
119122 // RandALCat character MUST be the last character of the string.
120- if !tables:: bidi_r_or_al ( s. chars ( ) . next ( ) . unwrap ( ) ) ||
121- !tables:: bidi_r_or_al ( s. chars ( ) . next_back ( ) . unwrap ( ) ) {
123+ if !tables:: bidi_r_or_al ( s. chars ( ) . next ( ) . unwrap ( ) )
124+ || !tables:: bidi_r_or_al ( s. chars ( ) . next_back ( ) . unwrap ( ) )
125+ {
122126 return true ;
123127 }
124128 }
@@ -140,18 +144,17 @@ pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
140144 }
141145
142146 // 3. Mapping
143- let mapped = s. chars ( )
147+ let mapped = s
148+ . chars ( )
144149 . filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) )
145150 . flat_map ( tables:: case_fold_for_nfkc) ;
146151
147152 // 4. Normalization
148153 let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
149154
150155 // 5. Prohibited Output
151- let prohibited = normalized
152- . chars ( )
153- . find ( |& c| {
154- tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
156+ let prohibited = normalized. chars ( ) . find ( |& c| {
157+ tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
155158 tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
156159 tables:: private_use ( c) /* C.3 */ ||
157160 tables:: non_character_code_point ( c) /* C.4 */ ||
@@ -160,7 +163,7 @@ pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
160163 tables:: inappropriate_for_canonical_representation ( c) /* C.7 */ ||
161164 tables:: change_display_properties_or_deprecated ( c) /* C.9 */ ||
162165 tables:: tagging_character ( c) /* C.9 */
163- } ) ;
166+ } ) ;
164167 if let Some ( c) = prohibited {
165168 return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
166169 }
@@ -195,18 +198,17 @@ pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
195198 }
196199
197200 // A.3. Mapping
198- let mapped = s. chars ( )
201+ let mapped = s
202+ . chars ( )
199203 . filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) )
200204 . flat_map ( tables:: case_fold_for_nfkc) ;
201205
202206 // A.4. Normalization
203207 let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
204208
205209 // A.5. Prohibited Output
206- let prohibited = normalized
207- . chars ( )
208- . find ( |& c| {
209- tables:: ascii_space_character ( c) /* C.1.1 */ ||
210+ let prohibited = normalized. chars ( ) . find ( |& c| {
211+ tables:: ascii_space_character ( c) /* C.1.1 */ ||
210212 tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
211213 tables:: ascii_control_character ( c) /* C.2.1 */ ||
212214 tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
@@ -218,7 +220,7 @@ pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
218220 tables:: change_display_properties_or_deprecated ( c) /* C.9 */ ||
219221 tables:: tagging_character ( c) /* C.9 */ ||
220222 prohibited_node_character ( c)
221- } ) ;
223+ } ) ;
222224 if let Some ( c) = prohibited {
223225 return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
224226 }
@@ -240,10 +242,7 @@ pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
240242
241243// Additional characters not allowed in JID nodes, by RFC3920.
242244fn prohibited_node_character ( c : char ) -> bool {
243- match c {
244- '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@' => true ,
245- _ => false
246- }
245+ matches ! ( c, '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@' )
247246}
248247
249248/// Prepares a string with the Resourceprep profile of the stringprep algorithm.
@@ -253,25 +252,22 @@ fn prohibited_node_character(c: char) -> bool {
253252/// [RFC 3920, Appendix B]: https://tools.ietf.org/html/rfc3920#appendix-B
254253pub fn resourceprep ( s : & str ) -> Result < Cow < ' _ , str > , Error > {
255254 // fast path for ascii text
256- if s. chars ( )
257- . all ( |c| matches ! ( c, ' ' ..='~' ) )
258- {
255+ if s. chars ( ) . all ( |c| matches ! ( c, ' ' ..='~' ) ) {
259256 return Ok ( Cow :: Borrowed ( s) ) ;
260257 }
261258
262259 // B.3. Mapping
263- let mapped = s. chars ( )
260+ let mapped = s
261+ . chars ( )
264262 . filter ( |& c| !tables:: commonly_mapped_to_nothing ( c) )
265263 . collect :: < String > ( ) ;
266264
267265 // B.4. Normalization
268266 let normalized = mapped. nfkc ( ) . collect :: < String > ( ) ;
269267
270268 // B.5. Prohibited Output
271- let prohibited = normalized
272- . chars ( )
273- . find ( |& c| {
274- tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
269+ let prohibited = normalized. chars ( ) . find ( |& c| {
270+ tables:: non_ascii_space_character ( c) /* C.1.2 */ ||
275271 tables:: ascii_control_character ( c) /* C.2.1 */ ||
276272 tables:: non_ascii_control_character ( c) /* C.2.2 */ ||
277273 tables:: private_use ( c) /* C.3 */ ||
@@ -281,7 +277,7 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
281277 tables:: inappropriate_for_canonical_representation ( c) /* C.7 */ ||
282278 tables:: change_display_properties_or_deprecated ( c) /* C.9 */ ||
283279 tables:: tagging_character ( c) /* C.9 */
284- } ) ;
280+ } ) ;
285281 if let Some ( c) = prohibited {
286282 return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
287283 }
@@ -301,48 +297,36 @@ pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
301297 Ok ( Cow :: Owned ( normalized) )
302298}
303299
304- /// Determines if `c` is to be removed according to section 7.2 of
305- /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
306- fn x520_mapped_to_nothing ( c : char ) -> bool {
307- match c {
308- '\u{00AD}' | '\u{1806}' | '\u{034F}' | '\u{180B}' ..='\u{180D}' |
309- '\u{FE00}' ..='\u{FE0F}' | '\u{FFFC}' | '\u{200B}' => true ,
310- // Technically control characters, but mapped to whitespace in X.520.
311- '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => false ,
312- _ => c. is_control ( ) ,
313- }
314- }
315-
316- /// Determines if `c` is to be replaced by SPACE (0x20) according to section 7.2 of
317- /// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
318- fn x520_mapped_to_space ( c : char ) -> bool {
319- match c {
320- '\u{09}' | '\u{0A}' ..='\u{0D}' | '\u{85}' => true ,
321- _ => c. is_separator ( ) ,
322- }
323- }
324-
325300/// Prepares a string according to the procedures described in Section 7 of
326301/// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
327302///
328303/// Note that this function does _not_ remove leading, trailing, or inner
329304/// spaces as described in Section 7.6, because the characters needing removal
330305/// will vary across the matching rules and ASN.1 syntaxes used.
331306pub fn x520prep ( s : & str , case_fold : bool ) -> Result < Cow < ' _ , str > , Error > {
332- if s. len ( ) == 0 {
307+ if s. is_empty ( ) {
333308 return Err ( Error ( ErrorCause :: EmptyString ) ) ;
334309 }
335- if s. chars ( ) . all ( |c| matches ! ( c, ' ' ..='~' ) && ( !case_fold || c. is_ascii_lowercase ( ) ) ) {
310+ if s. chars ( )
311+ . all ( |c| matches ! ( c, ' ' ..='~' ) && ( !case_fold || c. is_ascii_lowercase ( ) ) )
312+ {
336313 return Ok ( Cow :: Borrowed ( s) ) ;
337314 }
338315
339316 // 1. Transcode
340317 // Already done because &str is enforced to be Unicode.
341318
342319 // 2. Map
343- let mapped = s. chars ( )
344- . filter ( |& c| !x520_mapped_to_nothing ( c) )
345- . map ( |c| if x520_mapped_to_space ( c) { ' ' } else { c } ) ;
320+ let mapped = s
321+ . chars ( )
322+ . filter ( |& c| !tables:: x520_mapped_to_nothing ( c) )
323+ . map ( |c| {
324+ if tables:: x520_mapped_to_space ( c) {
325+ ' '
326+ } else {
327+ c
328+ }
329+ } ) ;
346330
347331 // 3. Normalize
348332 let normalized = if case_fold {
@@ -354,24 +338,27 @@ pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
354338 } ;
355339
356340 // 4. Prohibit
357- let prohibited = normalized. chars ( ) . find ( |& c| tables:: unassigned_code_point ( c)
358- || tables:: private_use ( c)
359- || tables:: non_character_code_point ( c)
360- || tables:: surrogate_code ( c)
361- || c == '\u{FFFD}' // REPLACEMENT CHARACTER
341+ let prohibited = normalized. chars ( ) . find (
342+ |& c| {
343+ tables:: unassigned_code_point ( c)
344+ || tables:: private_use ( c)
345+ || tables:: non_character_code_point ( c)
346+ || tables:: surrogate_code ( c)
347+ || c == '\u{FFFD}'
348+ } , // REPLACEMENT CHARACTER
362349 ) ;
363350 if let Some ( c) = prohibited {
364351 return Err ( Error ( ErrorCause :: ProhibitedCharacter ( c) ) ) ;
365352 }
366353 // From ITU-T Recommendation X.520, Section 7.4:
367354 // "The first code point of a string is prohibited from being a combining character."
368- let first_char = s. chars ( ) . next ( ) ;
369- if let Some ( c) = first_char {
370- if c. is_mark ( ) {
371- return Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) ;
355+ match s. chars ( ) . next ( ) {
356+ Some ( c) => {
357+ if c. is_mark ( ) {
358+ return Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) ;
359+ }
372360 }
373- } else {
374- return Err ( Error ( ErrorCause :: EmptyString ) ) ;
361+ None => return Err ( Error ( ErrorCause :: EmptyString ) ) ,
375362 }
376363
377364 // 5. Check bidi
@@ -389,32 +376,32 @@ pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
389376mod test {
390377 use super :: * ;
391378
392- fn assert_prohibited_character < T > ( result : Result < T , Error > ) {
393- match result {
394- Err ( Error ( ErrorCause :: ProhibitedCharacter ( _) ) ) => ( ) ,
395- _ => assert ! ( false )
396- }
397- }
379+ fn assert_prohibited_character < T > ( result : Result < T , Error > ) {
380+ match result {
381+ Err ( Error ( ErrorCause :: ProhibitedCharacter ( _) ) ) => ( ) ,
382+ _ => panic ! ( ) ,
383+ }
384+ }
398385
399386 fn assert_starts_with_combining_char < T > ( result : Result < T , Error > ) {
400- match result {
401- Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) => ( ) ,
402- _ => assert ! ( false )
403- }
404- }
387+ match result {
388+ Err ( Error ( ErrorCause :: StartsWithCombiningCharacter ) ) => ( ) ,
389+ _ => panic ! ( ) ,
390+ }
391+ }
405392
406393 // RFC4013, 3. Examples
407394 #[ test]
408395 fn saslprep_examples ( ) {
409- assert_prohibited_character ( saslprep ( "\u{0007} " ) ) ;
396+ assert_prohibited_character ( saslprep ( "\u{0007} " ) ) ;
410397 }
411398
412- #[ test]
413- fn nodeprep_examples ( ) {
399+ #[ test]
400+ fn nodeprep_examples ( ) {
414401 assert_prohibited_character ( nodeprep ( " " ) ) ;
415402 assert_prohibited_character ( nodeprep ( "\u{00a0} " ) ) ;
416403 assert_prohibited_character ( nodeprep ( "foo@bar" ) ) ;
417- }
404+ }
418405
419406 #[ test]
420407 fn resourceprep_examples ( ) {
@@ -424,8 +411,14 @@ mod test {
424411 #[ test]
425412 fn x520prep_examples ( ) {
426413 assert_eq ! ( x520prep( "foo@bar" , true ) . unwrap( ) , "foo@bar" ) ;
427- assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , false ) . unwrap( ) , "J. W. wuz h\u{0115} re" ) ;
428- assert_eq ! ( x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , true ) . unwrap( ) , "j. w. wuz h\u{0115} re" ) ;
414+ assert_eq ! (
415+ x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , false ) . unwrap( ) ,
416+ "J. W. wuz h\u{0115} re"
417+ ) ;
418+ assert_eq ! (
419+ x520prep( "J.\u{FE00} \u{9} W. \u{B} wuz h\u{0115} re" , true ) . unwrap( ) ,
420+ "j. w. wuz h\u{0115} re"
421+ ) ;
429422 assert_eq ! ( x520prep( "UPPERCASED" , true ) . unwrap( ) , "uppercased" ) ;
430423 assert_starts_with_combining_char ( x520prep ( "\u{0306} hello" , true ) ) ;
431424 }
0 commit comments