Skip to content

Commit ae4ca96

Browse files
committed
Improve Content-Disposition parameter parsing
1 parent bc11861 commit ae4ca96

2 files changed

Lines changed: 225 additions & 27 deletions

File tree

src/constants.rs

Lines changed: 212 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::borrow::Cow;
22

3+
use encoding_rs::Encoding;
4+
35
pub(crate) const DEFAULT_WHOLE_STREAM_SIZE_LIMIT: u64 = u64::MAX;
46
pub(crate) const DEFAULT_PER_FIELD_SIZE_LIMIT: u64 = u64::MAX;
57

@@ -31,47 +33,203 @@ fn trim_ascii_ws_then(bytes: &[u8], char: u8) -> Option<&[u8]> {
3133
}
3234
}
3335

36+
fn trim_ascii_ws_end(bytes: &[u8]) -> &[u8] {
37+
bytes
38+
.iter()
39+
.rposition(|b| !b.is_ascii_whitespace())
40+
.map_or(&bytes[..0], |i| &bytes[..=i])
41+
}
42+
43+
fn skip_to_next_parameter(header: &[u8], index: &mut usize) {
44+
while *index < header.len() && header[*index] != b';' {
45+
*index += 1;
46+
}
47+
if *index < header.len() {
48+
*index += 1;
49+
}
50+
}
51+
52+
fn skip_ascii_ws(header: &[u8], index: &mut usize) {
53+
while *index < header.len() && header[*index].is_ascii_whitespace() {
54+
*index += 1;
55+
}
56+
}
57+
58+
fn parse_quoted_value(mut header: &[u8]) -> Option<(&[u8], bool)> {
59+
header = trim_ascii_ws_then(header, b'"')?;
60+
let start = 0;
61+
let (mut index, mut escaped) = (start, false);
62+
63+
while index < header.len() {
64+
if header[index] == b'"' {
65+
let mut backslashes = 0;
66+
let mut cursor = index;
67+
while cursor > start && header[cursor - 1] == b'\\' {
68+
backslashes += 1;
69+
cursor -= 1;
70+
}
71+
72+
if backslashes % 2 == 0 {
73+
return Some((&header[..index], escaped));
74+
}
75+
76+
escaped = true;
77+
}
78+
79+
index += 1;
80+
}
81+
82+
None
83+
}
84+
85+
fn parse_unquoted_value(header: &[u8]) -> &[u8] {
86+
let value = trim_ascii_ws_start(header);
87+
trim_ascii_ws_end(&value[..memchr::memchr(b';', value).unwrap_or(value.len())])
88+
}
89+
90+
fn decode_percent_bytes(bytes: &[u8]) -> Option<Vec<u8>> {
91+
if !bytes.contains(&b'%') {
92+
return Some(bytes.to_vec());
93+
}
94+
95+
let mut decoded = Vec::with_capacity(bytes.len());
96+
let mut index = 0;
97+
while index < bytes.len() {
98+
if bytes[index] == b'%' {
99+
let hi = bytes.get(index + 1)?;
100+
let lo = bytes.get(index + 2)?;
101+
let hex = [*hi, *lo];
102+
decoded.push(u8::from_str_radix(std::str::from_utf8(&hex).ok()?, 16).ok()?);
103+
index += 3;
104+
} else {
105+
decoded.push(bytes[index]);
106+
index += 1;
107+
}
108+
}
109+
110+
Some(decoded)
111+
}
112+
113+
fn decode_value<'h>(bytes: &'h [u8], is_escaped: bool) -> Option<Cow<'h, str>> {
114+
if bytes.contains(&b'%') {
115+
return Some(String::from_utf8(decode_percent_bytes(bytes)?).ok()?.into());
116+
}
117+
118+
let value = std::str::from_utf8(bytes).ok()?;
119+
if is_escaped {
120+
Some(value.replace(r#"\""#, "\"").into())
121+
} else {
122+
Some(value.into())
123+
}
124+
}
125+
126+
fn decode_extended_value(bytes: &[u8]) -> Option<String> {
127+
let value = std::str::from_utf8(bytes).ok()?;
128+
let mut parts = value.splitn(3, '\'');
129+
let charset = parts.next()?;
130+
let _language = parts.next()?;
131+
let encoded = parts.next()?;
132+
133+
let encoding = Encoding::for_label(charset.as_bytes())?;
134+
let decoded = decode_percent_bytes(encoded.as_bytes())?;
135+
let (text, _, had_errors) = encoding.decode(&decoded);
136+
if had_errors {
137+
return None;
138+
}
139+
140+
Some(text.into_owned())
141+
}
142+
34143
impl ContentDispositionAttr {
35144
/// Extract ContentDisposition Attribute from header.
36145
///
37-
/// Some older clients may not quote the name or filename, so we allow them,
38-
/// but require them to be percent encoded. Only allocates if percent
39-
/// decoding, and there are characters that need to be decoded.
40-
pub fn extract_from<'h>(&self, mut header: &'h [u8]) -> Option<Cow<'h, str>> {
41-
// TODO: The prefix should be matched case-insensitively.
146+
/// Some older clients may not quote the name or filename, so we allow them.
147+
/// If they percent-encode the value, we decode it before returning.
148+
pub fn extract_from<'h>(&self, header: &'h [u8]) -> Option<Cow<'h, str>> {
149+
if self == &ContentDispositionAttr::FileName
150+
&& let Some(value) = self.extract_extended_from(header)
151+
{
152+
return Some(value);
153+
}
154+
42155
let prefix = match self {
43156
ContentDispositionAttr::Name => &b"name"[..],
44157
ContentDispositionAttr::FileName => &b"filename"[..],
45158
};
159+
let mut index = 0;
46160

47-
while let Some(i) = memchr::memmem::find(header, prefix) {
48-
// Check if we found a superstring of `prefix`; continue if so.
49-
let suffix = &header[(i + prefix.len())..];
50-
if i > 0 && !(header[i - 1].is_ascii_whitespace() || header[i - 1] == b';') {
51-
header = suffix;
52-
continue;
161+
while index < header.len() {
162+
skip_to_next_parameter(header, &mut index);
163+
skip_ascii_ws(header, &mut index);
164+
if index >= header.len() {
165+
break;
166+
}
167+
168+
let key_start = index;
169+
while index < header.len()
170+
&& !header[index].is_ascii_whitespace()
171+
&& header[index] != b'='
172+
&& header[index] != b';'
173+
{
174+
index += 1;
53175
}
54176

55-
// Now find and trim the `=`. Handle quoted strings first.
56-
let rest = trim_ascii_ws_then(suffix, b'=')?;
57-
let (bytes, is_escaped) = if let Some(rest) = trim_ascii_ws_then(rest, b'"') {
58-
let (mut k, mut escaped) = (memchr::memchr(b'"', rest)?, false);
59-
while k > 0 && rest[k - 1] == b'\\' {
60-
escaped = true;
61-
k = k + 1 + memchr::memchr(b'"', &rest[(k + 1)..])?;
62-
}
177+
let key = &header[key_start..index];
178+
skip_ascii_ws(header, &mut index);
179+
if index >= header.len() || header[index] != b'=' {
180+
continue;
181+
}
63182

64-
(&rest[..k], escaped)
183+
index += 1;
184+
let rest = &header[index..];
185+
let (bytes, is_escaped) = if let Some((value, escaped)) = parse_quoted_value(rest) {
186+
(value, escaped)
65187
} else {
66-
let rest = trim_ascii_ws_start(rest);
67-
let j = memchr::memchr2(b';', b' ', rest).unwrap_or(rest.len());
68-
(&rest[..j], false)
188+
(parse_unquoted_value(rest), false)
69189
};
70190

71-
return match std::str::from_utf8(bytes).ok()? {
72-
name if is_escaped => Some(name.replace(r#"\""#, "\"").into()),
73-
name => Some(name.into()),
74-
};
191+
if key.eq_ignore_ascii_case(prefix) {
192+
return decode_value(bytes, is_escaped);
193+
}
194+
}
195+
196+
None
197+
}
198+
199+
fn extract_extended_from<'h>(&self, header: &'h [u8]) -> Option<Cow<'h, str>> {
200+
let prefix = match self {
201+
ContentDispositionAttr::Name => return None,
202+
ContentDispositionAttr::FileName => &b"filename*"[..],
203+
};
204+
let mut index = 0;
205+
206+
while index < header.len() {
207+
skip_to_next_parameter(header, &mut index);
208+
skip_ascii_ws(header, &mut index);
209+
if index >= header.len() {
210+
break;
211+
}
212+
213+
let key_start = index;
214+
while index < header.len()
215+
&& !header[index].is_ascii_whitespace()
216+
&& header[index] != b'='
217+
&& header[index] != b';'
218+
{
219+
index += 1;
220+
}
221+
222+
let key = &header[key_start..index];
223+
skip_ascii_ws(header, &mut index);
224+
if index >= header.len() || header[index] != b'=' {
225+
continue;
226+
}
227+
228+
index += 1;
229+
if key.eq_ignore_ascii_case(prefix) {
230+
let value = parse_unquoted_value(&header[index..]);
231+
return Some(decode_extended_value(value)?.into());
232+
}
75233
}
76234

77235
None
@@ -220,4 +378,31 @@ mod tests {
220378
let name = ContentDispositionAttr::Name.extract_from(val);
221379
assert_eq!(name.unwrap(), r#"myfield"name"#);
222380
}
381+
382+
#[test]
383+
fn test_content_disposition_case_insensitive_parameters() {
384+
let val = br#"form-data; NAME="my_field"; FILENAME="file-name.txt""#;
385+
let name = ContentDispositionAttr::Name.extract_from(val);
386+
let filename = ContentDispositionAttr::FileName.extract_from(val);
387+
assert_eq!(name.unwrap(), "my_field");
388+
assert_eq!(filename.unwrap(), "file-name.txt");
389+
}
390+
391+
#[test]
392+
fn test_content_disposition_percent_decoded_values() {
393+
let val = br#"form-data; name=my%20field; filename=file%20name.txt"#;
394+
let name = ContentDispositionAttr::Name.extract_from(val);
395+
let filename = ContentDispositionAttr::FileName.extract_from(val);
396+
assert_eq!(name.unwrap(), "my field");
397+
assert_eq!(filename.unwrap(), "file name.txt");
398+
}
399+
400+
#[test]
401+
fn test_content_disposition_filename_star_preferred() {
402+
let val = br#"form-data; name="upload"; filename="fallback.txt"; filename*=UTF-8''%E4%BD%A0%E5%A5%BD.txt"#;
403+
let name = ContentDispositionAttr::Name.extract_from(val);
404+
let filename = ContentDispositionAttr::FileName.extract_from(val);
405+
assert_eq!(name.unwrap(), "upload");
406+
assert_eq!(filename.unwrap(), "你好.txt");
407+
}
223408
}

tests/integration.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,19 @@ async fn test_multipart_transport_padding() {
8585
assert!(m.next_field().await.is_err());
8686
}
8787

88+
#[tokio::test]
89+
async fn test_multipart_content_disposition_compatibility() {
90+
let data = "--X-BOUNDARY\r\nContent-Disposition: form-data; NAME=\"my_file_field\"; FILENAME=\"fallback.txt\"; FILENAME*=UTF-8''%E4%BD%A0%E5%A5%BD.txt\r\nContent-Type: text/plain\r\n\r\nhello\r\n--X-BOUNDARY--\r\n";
91+
let stream = str_stream(data);
92+
let mut m = Multipart::new(stream, "X-BOUNDARY");
93+
94+
let field = m.next_field().await.unwrap().unwrap();
95+
assert_eq!(field.name(), Some("my_file_field"));
96+
assert_eq!(field.file_name(), Some("你好.txt"));
97+
assert_eq!(field.content_type(), Some(&mime::TEXT_PLAIN));
98+
assert_eq!(field.text().await, Ok("hello".to_owned()));
99+
}
100+
88101
#[tokio::test]
89102
async fn test_multipart_header() {
90103
let should_pass = [

0 commit comments

Comments
 (0)