|
| 1 | +// Integration tests from https://tools.ietf.org/html/draft-josefsson-idn-test-vectors-00 |
| 2 | +extern crate stringprep; |
| 3 | + |
| 4 | +use stringprep::{Error, nameprep}; |
| 5 | + |
| 6 | +fn assert_prohibited_character<T>(result: Result<T, Error>) { |
| 7 | + assert!(result.is_err()); |
| 8 | +} |
| 9 | + |
| 10 | +fn assert_prohibited_bidirectional_text<T>(result: Result<T, Error>) { |
| 11 | + assert!(result.is_err()); |
| 12 | +} |
| 13 | + |
| 14 | +// 4.1 Map to nothing |
| 15 | +#[test] |
| 16 | +fn should_map_to_nothing() { |
| 17 | + let input = "foo\u{00ad}\u{034f}\u{1806}\u{180b}bar\u{200b}\u{2060}baz\u{fe00}\u{fe08}\u{fe0f}\u{feff}"; |
| 18 | + assert_eq!("foobarbaz", nameprep(input).unwrap()); |
| 19 | +} |
| 20 | + |
| 21 | +// 4.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045 |
| 22 | +#[test] |
| 23 | +fn should_case_fold_ascii() { |
| 24 | + assert_eq!("cafe", nameprep("CAFE").unwrap()); |
| 25 | +} |
| 26 | + |
| 27 | +// 4.3 Case folding 8bit U+00DF (german sharp s) |
| 28 | +#[ignore] |
| 29 | +#[test] |
| 30 | +fn should_case_fold_8bit() { |
| 31 | + assert_eq!("ss", nameprep("\u{00df}").unwrap()); |
| 32 | +} |
| 33 | + |
| 34 | +// 4.4 Case folding U+0130 (turkish capital I with dot) |
| 35 | +#[test] |
| 36 | +fn should_case_fold_16bit() { |
| 37 | + assert_eq!("\u{0069}\u{0307}", nameprep("\u{0130}").unwrap()); |
| 38 | +} |
| 39 | + |
| 40 | +// 4.5 Case folding multibyte U+0143 U+037A |
| 41 | +#[ignore] |
| 42 | +#[test] |
| 43 | +fn should_case_fold_multibyte() { |
| 44 | + let input = "\u{0143}\u{037a}"; |
| 45 | + let output = "\u{0144} \u{03b9}"; |
| 46 | + assert_eq!(output, nameprep(input).unwrap()); |
| 47 | +} |
| 48 | + |
| 49 | +// 4.6 Case folding U+2121 U+33C6 U+1D7BB |
| 50 | +#[ignore] |
| 51 | +#[test] |
| 52 | +fn should_case_fold() { |
| 53 | + let input = "\u{2121}\u{33c6}\u{1d7bb}"; |
| 54 | + let output = "telc\u{2215}\u{006b}\u{0067}\u{03c3}"; |
| 55 | + assert_eq!(output, nameprep(input).unwrap()); |
| 56 | +} |
| 57 | + |
| 58 | +// 4.7 Normalization of U+006a U+030c U+00A0 U+00AA |
| 59 | +#[test] |
| 60 | +fn should_normalize() { |
| 61 | + let input = "j\u{030c}\u{00a0}\u{00aa}"; |
| 62 | + let output = "\u{01f0} a"; |
| 63 | + assert_eq!(output, nameprep(input).unwrap()); |
| 64 | +} |
| 65 | + |
| 66 | +// 4.8 Case folding U+1FB7 and normalization |
| 67 | +#[ignore] |
| 68 | +#[test] |
| 69 | +fn should_case_fold_and_normalize() { |
| 70 | + let input = "\u{1fb7}"; |
| 71 | + let output = "\u{1fb6}\u{03b9}"; |
| 72 | + assert_eq!(output, nameprep(input).unwrap()); |
| 73 | +} |
| 74 | + |
| 75 | +// 4.9 Self-reverting case folding U+01F0 and normalization |
| 76 | +// 4.10 Self-reverting case folding U+0390 and normalization |
| 77 | +// 4.11 Self-reverting case folding U+03B0 and normalization |
| 78 | +// 4.12 Self-reverting case folding U+1E96 and normalization |
| 79 | +// 4.13 Self-reverting case folding U+1F56 and normalization |
| 80 | +#[test] |
| 81 | +fn should_revert_case_fold_and_normalization() { |
| 82 | + let inputs = ["\u{01f0}", "\u{0390}", "\u{03b0}", "\u{1e96}", "\u{1f56}"]; |
| 83 | + for input in inputs.iter() { |
| 84 | + assert_eq!(input.clone(), nameprep(input).unwrap()); |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +// 4.14 ASCII space character U+0020 |
| 89 | +#[test] |
| 90 | +fn should_permit_ascii_space() { |
| 91 | + assert_eq!(" ", nameprep(" ").unwrap()); |
| 92 | +} |
| 93 | + |
| 94 | +// 4.15 Non-ASCII 8bit space character U+00A0 |
| 95 | +#[test] |
| 96 | +fn should_map_8bit_space() { |
| 97 | + assert_eq!(" ", nameprep("\u{00a0}").unwrap()); |
| 98 | +} |
| 99 | + |
| 100 | +// 4.16 Non-ASCII multibyte space character U+1680 |
| 101 | +#[test] |
| 102 | +fn should_prohibit_multibyte_space() { |
| 103 | + assert_prohibited_character(nameprep("\u{1680}")); |
| 104 | +} |
| 105 | + |
| 106 | +// 4.17 Non-ASCII multibyte space character U+2000 |
| 107 | +#[test] |
| 108 | +fn should_map_multibyte_space1() { |
| 109 | + assert_eq!(" ", nameprep("\u{2000}").unwrap()); |
| 110 | +} |
| 111 | + |
| 112 | +// 4.18 Zero Width Space U+200b |
| 113 | +#[test] |
| 114 | +fn should_drop_zero_width_space() { |
| 115 | + assert_eq!("", nameprep("\u{200b}").unwrap()); |
| 116 | +} |
| 117 | + |
| 118 | +// 4.19 Non-ASCII multibyte space character U+3000 |
| 119 | +#[test] |
| 120 | +fn should_map_multibyte_space2() { |
| 121 | + assert_eq!(" ", nameprep("\u{3000}").unwrap()); |
| 122 | +} |
| 123 | + |
| 124 | +// 4.20 ASCII control characters U+0010 U+007F |
| 125 | +#[test] |
| 126 | +fn should_permit_ascii_control() { |
| 127 | + assert_eq!("\u{0010}\u{007f}", nameprep("\u{0010}\u{007f}").unwrap()); |
| 128 | +} |
| 129 | + |
| 130 | +// 4.21 Non-ASCII 8bit control character U+0085 |
| 131 | +#[test] |
| 132 | +fn should_prohibit_8bit_control() { |
| 133 | + assert_prohibited_character(nameprep("\u{0085}")); |
| 134 | +} |
| 135 | + |
| 136 | +// 4.22 Non-ASCII multibyte control character U+180E |
| 137 | +#[test] |
| 138 | +fn should_prohibit_multibyte_control() { |
| 139 | + assert_prohibited_character(nameprep("\u{180e}")); |
| 140 | +} |
| 141 | + |
| 142 | +// 4.23 Zero Width No-Break Space U+FEFF |
| 143 | +#[test] |
| 144 | +fn should_drop_zero_width_no_break_space() { |
| 145 | + assert_eq!("", nameprep("\u{feff}").unwrap()); |
| 146 | +} |
| 147 | + |
| 148 | +// 4.24 Non-ASCII control character U+1D175 |
| 149 | +#[test] |
| 150 | +fn should_prohibit_non_ascii_control() { |
| 151 | + assert_prohibited_character(nameprep("\u{1d175}")); |
| 152 | +} |
| 153 | + |
| 154 | +// 4.25 Plane 0 private use character U+F123 |
| 155 | +#[test] |
| 156 | +fn should_prohibit_plane0_private_use() { |
| 157 | + assert_prohibited_character(nameprep("\u{f123}")); |
| 158 | +} |
| 159 | + |
| 160 | +// 4.26 Plane 15 private use character U+F1234 |
| 161 | +#[test] |
| 162 | +fn should_prohibit_plane15_private_use() { |
| 163 | + assert_prohibited_character(nameprep("\u{f1234}")); |
| 164 | +} |
| 165 | + |
| 166 | +// 4.27 Plane 16 private use character U+10F234 |
| 167 | +#[test] |
| 168 | +fn should_prohibit_plane16_private_use() { |
| 169 | + assert_prohibited_character(nameprep("\u{10f234}")); |
| 170 | +} |
| 171 | + |
| 172 | +// 4.28 Non-character code point U+8FFFE |
| 173 | +#[test] |
| 174 | +fn should_prohibit_non_character1() { |
| 175 | + assert_prohibited_character(nameprep("\u{8fffe}")); |
| 176 | +} |
| 177 | + |
| 178 | +// 4.29 Non-character code point U+10FFFF |
| 179 | +#[test] |
| 180 | +fn should_prohibit_non_character2() { |
| 181 | + assert_prohibited_character(nameprep("\u{10ffff}")); |
| 182 | +} |
| 183 | + |
| 184 | +// 4.31 Non-plain text character U+FFFD |
| 185 | +#[test] |
| 186 | +fn should_prohibit_non_plain_text() { |
| 187 | + assert_prohibited_character(nameprep("\u{fffd}")); |
| 188 | +} |
| 189 | + |
| 190 | +// 4.32 Ideographic description character U+2FF5 |
| 191 | +#[test] |
| 192 | +fn should_prohibit_ideographic_description() { |
| 193 | + assert_prohibited_character(nameprep("\u{2ff5}")); |
| 194 | +} |
| 195 | + |
| 196 | +// 4.33 Display property character U+0341 |
| 197 | +#[test] |
| 198 | +fn should_normalize_display_property() { |
| 199 | + assert_eq!("\u{0301}", nameprep("\u{0341}").unwrap()); |
| 200 | +} |
| 201 | + |
| 202 | +// 4.34 Left-to-right mark U+200E |
| 203 | +#[test] |
| 204 | +fn should_prohibit_left_to_right_mark() { |
| 205 | + assert_prohibited_character(nameprep("\u{200e}")); |
| 206 | +} |
| 207 | + |
| 208 | +// 4.35 Deprecated U+202A |
| 209 | +#[test] |
| 210 | +fn should_prohibit_deprecated() { |
| 211 | + assert_prohibited_character(nameprep("\u{202a}")); |
| 212 | +} |
| 213 | + |
| 214 | +// 4.36 Language tagging character U+E0001 |
| 215 | +#[test] |
| 216 | +fn should_prohibit_language_tagging1() { |
| 217 | + assert_prohibited_character(nameprep("\u{e0001}")); |
| 218 | +} |
| 219 | + |
| 220 | +// 4.37 Language tagging character U+E0042 |
| 221 | +#[test] |
| 222 | +fn should_prohibit_language_tagging2() { |
| 223 | + assert_prohibited_character(nameprep("\u{e0042}")); |
| 224 | +} |
| 225 | + |
| 226 | +// 4.38 Bidi: RandALCat character U+05BE and LCat characters |
| 227 | +#[test] |
| 228 | +fn should_prohibit_randalcat_with_lcat1() { |
| 229 | + assert_prohibited_bidirectional_text(nameprep("foo\u{05be}bar")); |
| 230 | +} |
| 231 | + |
| 232 | +// 4.39 Bidi: RandALCat character U+FD50 and LCat characters |
| 233 | +#[test] |
| 234 | +fn should_prohibit_randalcat_with_lcat2() { |
| 235 | + assert_prohibited_bidirectional_text(nameprep("foo\u{fd50}bar")); |
| 236 | +} |
| 237 | + |
| 238 | +// 4.40 Bidi: RandALCat character U+FB38 and LCat characters |
| 239 | +#[test] |
| 240 | +fn should_permit_randalcat1() { |
| 241 | + assert_eq!("foo \u{064e}bar", nameprep("foo\u{fe76}bar").unwrap()); |
| 242 | +} |
| 243 | + |
| 244 | +// 4.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031 |
| 245 | +#[test] |
| 246 | +fn should_prohibit_mixed_randalcat() { |
| 247 | + assert_prohibited_bidirectional_text(nameprep("\u{0672}\u{0031}")); |
| 248 | +} |
| 249 | + |
| 250 | +// 4.42 Bidi: RandALCat character U+0627 U+0031 U+0628 |
| 251 | +#[test] |
| 252 | +fn should_permit_randalcat2() { |
| 253 | + assert_eq!("\u{0627}\u{0031}\u{0628}", nameprep("\u{0627}\u{0031}\u{0628}").unwrap()); |
| 254 | +} |
| 255 | + |
| 256 | +// 4.43 Unassigned code point U+E0002 |
| 257 | +#[ignore] |
| 258 | +#[test] |
| 259 | +fn should_prohibit_unassigned_code_point() { |
| 260 | + assert_prohibited_character(nameprep("\u{e0002}")); |
| 261 | +} |
| 262 | + |
| 263 | +// 4.44 Larger test (shrinking) |
| 264 | +#[ignore] |
| 265 | +#[test] |
| 266 | +fn should_shrink() { |
| 267 | + let input = "X\u{00ad}\u{00df}\u{0130}\u{2121}j\u{030c}\u{00a0}\u{00aa}\u{03b0}\u{2000}"; |
| 268 | + let output = "xssi\u{0307}tel\u{01f0} a\u{03b0}\u{0020}"; |
| 269 | + assert_eq!(output, nameprep(input).unwrap()); |
| 270 | +} |
| 271 | + |
| 272 | +// 4.45 Larger test (expanding) |
| 273 | +#[ignore] |
| 274 | +#[test] |
| 275 | +fn should_expand() { |
| 276 | + let input = "X\u{00df}\u{3316}\u{0130}\u{2121}\u{249f}\u{3300}"; |
| 277 | + let output = "xss\u{30ad}\u{30ed}\u{30e1}\u{30fc}\u{30c8}\u{30eb}\u{0069}\u{0307}\u{0074}\u{0065}\u{006c}\u{0028}\u{0064}\u{0029}\u{30a2}\u{30d1}\u{30fc}\u{30c8}"; |
| 278 | + assert_eq!(output, nameprep(input).unwrap()); |
| 279 | +} |
0 commit comments