|
1 | 1 | use std::borrow::Cow; |
2 | 2 |
|
| 3 | +use encoding_rs::Encoding; |
| 4 | + |
3 | 5 | pub(crate) const DEFAULT_WHOLE_STREAM_SIZE_LIMIT: u64 = u64::MAX; |
4 | 6 | pub(crate) const DEFAULT_PER_FIELD_SIZE_LIMIT: u64 = u64::MAX; |
5 | 7 |
|
@@ -31,47 +33,203 @@ fn trim_ascii_ws_then(bytes: &[u8], char: u8) -> Option<&[u8]> { |
31 | 33 | } |
32 | 34 | } |
33 | 35 |
|
| 36 | +fn trim_ascii_ws_end(bytes: &[u8]) -> &[u8] { |
| 37 | + bytes |
| 38 | + .iter() |
| 39 | + .rposition(|b| !b.is_ascii_whitespace()) |
| 40 | + .map_or(&bytes[..0], |i| &bytes[..=i]) |
| 41 | +} |
| 42 | + |
| 43 | +fn skip_to_next_parameter(header: &[u8], index: &mut usize) { |
| 44 | + while *index < header.len() && header[*index] != b';' { |
| 45 | + *index += 1; |
| 46 | + } |
| 47 | + if *index < header.len() { |
| 48 | + *index += 1; |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +fn skip_ascii_ws(header: &[u8], index: &mut usize) { |
| 53 | + while *index < header.len() && header[*index].is_ascii_whitespace() { |
| 54 | + *index += 1; |
| 55 | + } |
| 56 | +} |
| 57 | + |
| 58 | +fn parse_quoted_value(mut header: &[u8]) -> Option<(&[u8], bool)> { |
| 59 | + header = trim_ascii_ws_then(header, b'"')?; |
| 60 | + let start = 0; |
| 61 | + let (mut index, mut escaped) = (start, false); |
| 62 | + |
| 63 | + while index < header.len() { |
| 64 | + if header[index] == b'"' { |
| 65 | + let mut backslashes = 0; |
| 66 | + let mut cursor = index; |
| 67 | + while cursor > start && header[cursor - 1] == b'\\' { |
| 68 | + backslashes += 1; |
| 69 | + cursor -= 1; |
| 70 | + } |
| 71 | + |
| 72 | + if backslashes % 2 == 0 { |
| 73 | + return Some((&header[..index], escaped)); |
| 74 | + } |
| 75 | + |
| 76 | + escaped = true; |
| 77 | + } |
| 78 | + |
| 79 | + index += 1; |
| 80 | + } |
| 81 | + |
| 82 | + None |
| 83 | +} |
| 84 | + |
| 85 | +fn parse_unquoted_value(header: &[u8]) -> &[u8] { |
| 86 | + let value = trim_ascii_ws_start(header); |
| 87 | + trim_ascii_ws_end(&value[..memchr::memchr(b';', value).unwrap_or(value.len())]) |
| 88 | +} |
| 89 | + |
| 90 | +fn decode_percent_bytes(bytes: &[u8]) -> Option<Vec<u8>> { |
| 91 | + if !bytes.contains(&b'%') { |
| 92 | + return Some(bytes.to_vec()); |
| 93 | + } |
| 94 | + |
| 95 | + let mut decoded = Vec::with_capacity(bytes.len()); |
| 96 | + let mut index = 0; |
| 97 | + while index < bytes.len() { |
| 98 | + if bytes[index] == b'%' { |
| 99 | + let hi = bytes.get(index + 1)?; |
| 100 | + let lo = bytes.get(index + 2)?; |
| 101 | + let hex = [*hi, *lo]; |
| 102 | + decoded.push(u8::from_str_radix(std::str::from_utf8(&hex).ok()?, 16).ok()?); |
| 103 | + index += 3; |
| 104 | + } else { |
| 105 | + decoded.push(bytes[index]); |
| 106 | + index += 1; |
| 107 | + } |
| 108 | + } |
| 109 | + |
| 110 | + Some(decoded) |
| 111 | +} |
| 112 | + |
| 113 | +fn decode_value<'h>(bytes: &'h [u8], is_escaped: bool) -> Option<Cow<'h, str>> { |
| 114 | + if bytes.contains(&b'%') { |
| 115 | + return Some(String::from_utf8(decode_percent_bytes(bytes)?).ok()?.into()); |
| 116 | + } |
| 117 | + |
| 118 | + let value = std::str::from_utf8(bytes).ok()?; |
| 119 | + if is_escaped { |
| 120 | + Some(value.replace(r#"\""#, "\"").into()) |
| 121 | + } else { |
| 122 | + Some(value.into()) |
| 123 | + } |
| 124 | +} |
| 125 | + |
| 126 | +fn decode_extended_value(bytes: &[u8]) -> Option<String> { |
| 127 | + let value = std::str::from_utf8(bytes).ok()?; |
| 128 | + let mut parts = value.splitn(3, '\''); |
| 129 | + let charset = parts.next()?; |
| 130 | + let _language = parts.next()?; |
| 131 | + let encoded = parts.next()?; |
| 132 | + |
| 133 | + let encoding = Encoding::for_label(charset.as_bytes())?; |
| 134 | + let decoded = decode_percent_bytes(encoded.as_bytes())?; |
| 135 | + let (text, _, had_errors) = encoding.decode(&decoded); |
| 136 | + if had_errors { |
| 137 | + return None; |
| 138 | + } |
| 139 | + |
| 140 | + Some(text.into_owned()) |
| 141 | +} |
| 142 | + |
34 | 143 | impl ContentDispositionAttr { |
35 | 144 | /// Extract ContentDisposition Attribute from header. |
36 | 145 | /// |
37 | | - /// Some older clients may not quote the name or filename, so we allow them, |
38 | | - /// but require them to be percent encoded. Only allocates if percent |
39 | | - /// decoding, and there are characters that need to be decoded. |
40 | | - pub fn extract_from<'h>(&self, mut header: &'h [u8]) -> Option<Cow<'h, str>> { |
41 | | - // TODO: The prefix should be matched case-insensitively. |
| 146 | + /// Some older clients may not quote the name or filename, so we allow them. |
| 147 | + /// If they percent-encode the value, we decode it before returning. |
| 148 | + pub fn extract_from<'h>(&self, header: &'h [u8]) -> Option<Cow<'h, str>> { |
| 149 | + if self == &ContentDispositionAttr::FileName |
| 150 | + && let Some(value) = self.extract_extended_from(header) |
| 151 | + { |
| 152 | + return Some(value); |
| 153 | + } |
| 154 | + |
42 | 155 | let prefix = match self { |
43 | 156 | ContentDispositionAttr::Name => &b"name"[..], |
44 | 157 | ContentDispositionAttr::FileName => &b"filename"[..], |
45 | 158 | }; |
| 159 | + let mut index = 0; |
46 | 160 |
|
47 | | - while let Some(i) = memchr::memmem::find(header, prefix) { |
48 | | - // Check if we found a superstring of `prefix`; continue if so. |
49 | | - let suffix = &header[(i + prefix.len())..]; |
50 | | - if i > 0 && !(header[i - 1].is_ascii_whitespace() || header[i - 1] == b';') { |
51 | | - header = suffix; |
52 | | - continue; |
| 161 | + while index < header.len() { |
| 162 | + skip_to_next_parameter(header, &mut index); |
| 163 | + skip_ascii_ws(header, &mut index); |
| 164 | + if index >= header.len() { |
| 165 | + break; |
| 166 | + } |
| 167 | + |
| 168 | + let key_start = index; |
| 169 | + while index < header.len() |
| 170 | + && !header[index].is_ascii_whitespace() |
| 171 | + && header[index] != b'=' |
| 172 | + && header[index] != b';' |
| 173 | + { |
| 174 | + index += 1; |
53 | 175 | } |
54 | 176 |
|
55 | | - // Now find and trim the `=`. Handle quoted strings first. |
56 | | - let rest = trim_ascii_ws_then(suffix, b'=')?; |
57 | | - let (bytes, is_escaped) = if let Some(rest) = trim_ascii_ws_then(rest, b'"') { |
58 | | - let (mut k, mut escaped) = (memchr::memchr(b'"', rest)?, false); |
59 | | - while k > 0 && rest[k - 1] == b'\\' { |
60 | | - escaped = true; |
61 | | - k = k + 1 + memchr::memchr(b'"', &rest[(k + 1)..])?; |
62 | | - } |
| 177 | + let key = &header[key_start..index]; |
| 178 | + skip_ascii_ws(header, &mut index); |
| 179 | + if index >= header.len() || header[index] != b'=' { |
| 180 | + continue; |
| 181 | + } |
63 | 182 |
|
64 | | - (&rest[..k], escaped) |
| 183 | + index += 1; |
| 184 | + let rest = &header[index..]; |
| 185 | + let (bytes, is_escaped) = if let Some((value, escaped)) = parse_quoted_value(rest) { |
| 186 | + (value, escaped) |
65 | 187 | } else { |
66 | | - let rest = trim_ascii_ws_start(rest); |
67 | | - let j = memchr::memchr2(b';', b' ', rest).unwrap_or(rest.len()); |
68 | | - (&rest[..j], false) |
| 188 | + (parse_unquoted_value(rest), false) |
69 | 189 | }; |
70 | 190 |
|
71 | | - return match std::str::from_utf8(bytes).ok()? { |
72 | | - name if is_escaped => Some(name.replace(r#"\""#, "\"").into()), |
73 | | - name => Some(name.into()), |
74 | | - }; |
| 191 | + if key.eq_ignore_ascii_case(prefix) { |
| 192 | + return decode_value(bytes, is_escaped); |
| 193 | + } |
| 194 | + } |
| 195 | + |
| 196 | + None |
| 197 | + } |
| 198 | + |
| 199 | + fn extract_extended_from<'h>(&self, header: &'h [u8]) -> Option<Cow<'h, str>> { |
| 200 | + let prefix = match self { |
| 201 | + ContentDispositionAttr::Name => return None, |
| 202 | + ContentDispositionAttr::FileName => &b"filename*"[..], |
| 203 | + }; |
| 204 | + let mut index = 0; |
| 205 | + |
| 206 | + while index < header.len() { |
| 207 | + skip_to_next_parameter(header, &mut index); |
| 208 | + skip_ascii_ws(header, &mut index); |
| 209 | + if index >= header.len() { |
| 210 | + break; |
| 211 | + } |
| 212 | + |
| 213 | + let key_start = index; |
| 214 | + while index < header.len() |
| 215 | + && !header[index].is_ascii_whitespace() |
| 216 | + && header[index] != b'=' |
| 217 | + && header[index] != b';' |
| 218 | + { |
| 219 | + index += 1; |
| 220 | + } |
| 221 | + |
| 222 | + let key = &header[key_start..index]; |
| 223 | + skip_ascii_ws(header, &mut index); |
| 224 | + if index >= header.len() || header[index] != b'=' { |
| 225 | + continue; |
| 226 | + } |
| 227 | + |
| 228 | + index += 1; |
| 229 | + if key.eq_ignore_ascii_case(prefix) { |
| 230 | + let value = parse_unquoted_value(&header[index..]); |
| 231 | + return Some(decode_extended_value(value)?.into()); |
| 232 | + } |
75 | 233 | } |
76 | 234 |
|
77 | 235 | None |
@@ -220,4 +378,31 @@ mod tests { |
220 | 378 | let name = ContentDispositionAttr::Name.extract_from(val); |
221 | 379 | assert_eq!(name.unwrap(), r#"myfield"name"#); |
222 | 380 | } |
| 381 | + |
| 382 | + #[test] |
| 383 | + fn test_content_disposition_case_insensitive_parameters() { |
| 384 | + let val = br#"form-data; NAME="my_field"; FILENAME="file-name.txt""#; |
| 385 | + let name = ContentDispositionAttr::Name.extract_from(val); |
| 386 | + let filename = ContentDispositionAttr::FileName.extract_from(val); |
| 387 | + assert_eq!(name.unwrap(), "my_field"); |
| 388 | + assert_eq!(filename.unwrap(), "file-name.txt"); |
| 389 | + } |
| 390 | + |
| 391 | + #[test] |
| 392 | + fn test_content_disposition_percent_decoded_values() { |
| 393 | + let val = br#"form-data; name=my%20field; filename=file%20name.txt"#; |
| 394 | + let name = ContentDispositionAttr::Name.extract_from(val); |
| 395 | + let filename = ContentDispositionAttr::FileName.extract_from(val); |
| 396 | + assert_eq!(name.unwrap(), "my field"); |
| 397 | + assert_eq!(filename.unwrap(), "file name.txt"); |
| 398 | + } |
| 399 | + |
| 400 | + #[test] |
| 401 | + fn test_content_disposition_filename_star_preferred() { |
| 402 | + let val = br#"form-data; name="upload"; filename="fallback.txt"; filename*=UTF-8''%E4%BD%A0%E5%A5%BD.txt"#; |
| 403 | + let name = ContentDispositionAttr::Name.extract_from(val); |
| 404 | + let filename = ContentDispositionAttr::FileName.extract_from(val); |
| 405 | + assert_eq!(name.unwrap(), "upload"); |
| 406 | + assert_eq!(filename.unwrap(), "你好.txt"); |
| 407 | + } |
223 | 408 | } |
0 commit comments