Skip to content

Commit 0b883c7

Browse files
author
Eric Sessoms
committed
Implement unassigned code-points and case-folding
Now properly prohibits unassigned code-points and correctly performs case-folding for NFKC normalization. All tests pass. Note that this change is NOT backwards compatible, because earlier versions did not prohibit unassigned code-points and thus incorrectly allowed some bad strings.
1 parent d9030ff commit 0b883c7

File tree

6 files changed

+1878
-35
lines changed

6 files changed

+1878
-35
lines changed

src/lib.rs

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::error;
1212
use std::fmt;
1313
use unicode_normalization::UnicodeNormalization;
1414

15+
mod rfc3454;
1516
pub mod tables;
1617

1718
/// Describes why a string failed stringprep normalization.
@@ -69,7 +70,7 @@ pub fn saslprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
6970
// 2.3 Prohibited Output
7071
let prohibited = normalized
7172
.chars()
72-
.filter(|&c| {
73+
.find(|&c| {
7374
tables::non_ascii_space_character(c) /* C.1.2 */ ||
7475
tables::ascii_control_character(c) /* C.2.1 */ ||
7576
tables::non_ascii_control_character(c) /* C.2.2 */ ||
@@ -80,19 +81,23 @@ pub fn saslprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
8081
tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
8182
tables::change_display_properties_or_deprecated(c) /* C.8 */ ||
8283
tables::tagging_character(c) /* C.9 */
83-
})
84-
.next();
84+
});
8585
if let Some(c) = prohibited {
8686
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
8787
}
8888

89-
// RFC3454, 6. Bidirectional Characters
89+
// 2.4. Bidirectional Characters
9090
if is_prohibited_bidirectional_text(&normalized) {
9191
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
9292
}
9393

9494
// 2.5 Unassigned Code Points
95-
// FIXME: Reject unassigned code points.
95+
let unassigned = normalized
96+
.chars()
97+
.find(|&c| tables::unassigned_code_point(c));
98+
if let Some(c) = unassigned {
99+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
100+
}
96101

97102
Ok(Cow::Owned(normalized))
98103
}
@@ -125,16 +130,15 @@ pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
125130
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
126131
.collect::<String>();
127132

128-
// FIXME: using `to_lowercase` as proxy for case folding
129-
let mapped = mapped.to_lowercase();
133+
let mapped = tables::case_fold(&mapped);
130134

131135
// 4. Normalization
132136
let normalized = mapped.nfkc().collect::<String>();
133137

134138
// 5. Prohibited Output
135139
let prohibited = normalized
136140
.chars()
137-
.filter(|&c| {
141+
.find(|&c| {
138142
tables::non_ascii_space_character(c) /* C.1.2 */ ||
139143
tables::non_ascii_control_character(c) /* C.2.2 */ ||
140144
tables::private_use(c) /* C.3 */ ||
@@ -144,19 +148,23 @@ pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
144148
tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
145149
tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
146150
tables::tagging_character(c) /* C.9 */
147-
})
148-
.next();
151+
});
149152
if let Some(c) = prohibited {
150153
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
151154
}
152155

153-
// RFC3454, 6. Bidirectional Characters
156+
// 6. Bidirectional Characters
154157
if is_prohibited_bidirectional_text(&normalized) {
155158
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
156159
}
157160

158161
// 7 Unassigned Code Points
159-
// FIXME: Reject unassigned code points.
162+
let unassigned = normalized
163+
.chars()
164+
.find(|&c| tables::unassigned_code_point(c));
165+
if let Some(c) = unassigned {
166+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
167+
}
160168

161169
Ok(Cow::Owned(normalized))
162170
}
@@ -168,16 +176,15 @@ pub fn nodeprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
168176
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
169177
.collect::<String>();
170178

171-
// FIXME: using `to_lowercase` as proxy for case folding
172-
let mapped = mapped.to_lowercase();
179+
let mapped = tables::case_fold(&mapped);
173180

174181
// A.4. Normalization
175182
let normalized = mapped.nfkc().collect::<String>();
176183

177184
// A.5. Prohibited Output
178185
let prohibited = normalized
179186
.chars()
180-
.filter(|&c| {
187+
.find(|&c| {
181188
tables::ascii_space_character(c) /* C.1.1 */ ||
182189
tables::non_ascii_space_character(c) /* C.1.2 */ ||
183190
tables::ascii_control_character(c) /* C.2.1 */ ||
@@ -190,18 +197,22 @@ pub fn nodeprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
190197
tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
191198
tables::tagging_character(c) /* C.9 */ ||
192199
prohibited_node_character(c)
193-
})
194-
.next();
200+
});
195201
if let Some(c) = prohibited {
196202
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
197203
}
198204

199-
// RFC3454, 6. Bidirectional Characters
205+
// A.6. Bidirectional Characters
200206
if is_prohibited_bidirectional_text(&normalized) {
201207
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
202208
}
203209

204-
// FIXME: Reject unassigned code points.
210+
let unassigned = normalized
211+
.chars()
212+
.find(|&c| tables::unassigned_code_point(c));
213+
if let Some(c) = unassigned {
214+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
215+
}
205216

206217
Ok(Cow::Owned(normalized))
207218
}
@@ -227,7 +238,7 @@ pub fn resourceprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
227238
// B.5. Prohibited Output
228239
let prohibited = normalized
229240
.chars()
230-
.filter(|&c| {
241+
.find(|&c| {
231242
tables::non_ascii_space_character(c) /* C.1.2 */ ||
232243
tables::ascii_control_character(c) /* C.2.1 */ ||
233244
tables::non_ascii_control_character(c) /* C.2.2 */ ||
@@ -238,18 +249,22 @@ pub fn resourceprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
238249
tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
239250
tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
240251
tables::tagging_character(c) /* C.9 */
241-
})
242-
.next();
252+
});
243253
if let Some(c) = prohibited {
244254
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
245255
}
246256

247-
// RFC3454, 6. Bidirectional Characters
257+
// B.6. Bidirectional Characters
248258
if is_prohibited_bidirectional_text(&normalized) {
249259
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
250260
}
251261

252-
// FIXME: Reject unassigned code points.
262+
let unassigned = normalized
263+
.chars()
264+
.find(|&c| tables::unassigned_code_point(c));
265+
if let Some(c) = unassigned {
266+
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
267+
}
253268

254269
Ok(Cow::Owned(normalized))
255270
}

0 commit comments

Comments
 (0)