|
| 1 | +use std::borrow::Cow; |
| 2 | + |
| 3 | +/// https://tools.ietf.org/html/rfc7230#section-3.2.6 |
| 4 | +pub(crate) fn parse_token(input: &str) -> (Option<&str>, &str) { |
| 5 | + let mut end_of_token = 0; |
| 6 | + for (i, c) in input.char_indices() { |
| 7 | + if tchar(c) { |
| 8 | + end_of_token = i; |
| 9 | + } else { |
| 10 | + break; |
| 11 | + } |
| 12 | + } |
| 13 | + |
| 14 | + if end_of_token == 0 { |
| 15 | + (None, input) |
| 16 | + } else { |
| 17 | + (Some(&input[..end_of_token + 1]), &input[end_of_token + 1..]) |
| 18 | + } |
| 19 | +} |
| 20 | + |
| 21 | +/// https://tools.ietf.org/html/rfc7230#section-3.2.6 |
| 22 | +fn tchar(c: char) -> bool { |
| 23 | + matches!( |
| 24 | + c, 'a'..='z' |
| 25 | + | 'A'..='Z' |
| 26 | + | '0'..='9' |
| 27 | + | '!' |
| 28 | + | '#' |
| 29 | + | '$' |
| 30 | + | '%' |
| 31 | + | '&' |
| 32 | + | '\'' |
| 33 | + | '*' |
| 34 | + | '+' |
| 35 | + | '-' |
| 36 | + | '.' |
| 37 | + | '^' |
| 38 | + | '_' |
| 39 | + | '`' |
| 40 | + | '|' |
| 41 | + | '~' |
| 42 | + ) |
| 43 | +} |
| 44 | + |
| 45 | +/// https://tools.ietf.org/html/rfc7230#section-3.2.6 |
| 46 | +fn vchar(c: char) -> bool { |
| 47 | + matches!(c as u8, b'\t' | 32..=126 | 128..=255) |
| 48 | +} |
| 49 | + |
| 50 | +/// https://tools.ietf.org/html/rfc7230#section-3.2.6 |
| 51 | +pub(crate) fn parse_quoted_string(input: &str) -> (Option<Cow<'_, str>>, &str) { |
| 52 | + // quoted-string must start with a DQUOTE |
| 53 | + if !input.starts_with('"') { |
| 54 | + return (None, input); |
| 55 | + } |
| 56 | + |
| 57 | + let mut end_of_string = None; |
| 58 | + let mut backslashes: Vec<usize> = vec![]; |
| 59 | + |
| 60 | + for (i, c) in input.char_indices().skip(1) { |
| 61 | + if i > 1 && backslashes.last() == Some(&(i - 2)) { |
| 62 | + if !vchar(c) { |
| 63 | + // only VCHARs can be escaped |
| 64 | + return (None, input); |
| 65 | + } |
| 66 | + // otherwise, we skip over this character while parsing |
| 67 | + } else { |
| 68 | + match c as u8 { |
| 69 | + // we have reached a quoted-pair |
| 70 | + b'\\' => { |
| 71 | + backslashes.push(i - 1); |
| 72 | + } |
| 73 | + |
| 74 | + // end of the string, DQUOTE |
| 75 | + b'"' => { |
| 76 | + end_of_string = Some(i + 1); |
| 77 | + break; |
| 78 | + } |
| 79 | + |
| 80 | + // qdtext |
| 81 | + b'\t' | b' ' | 15 | 35..=91 | 93..=126 | 128..=255 => {} |
| 82 | + |
| 83 | + // unexpected character, bail |
| 84 | + _ => return (None, input), |
| 85 | + } |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + if let Some(end_of_string) = end_of_string { |
| 90 | + let value = &input[1..end_of_string - 1]; // strip DQUOTEs from start and end |
| 91 | + |
| 92 | + let value = if backslashes.is_empty() { |
| 93 | + // no backslashes means we don't need to allocate |
| 94 | + value.into() |
| 95 | + } else { |
| 96 | + backslashes.reverse(); // so that we can use pop. goes from low-to-high to high-to-low sorting |
| 97 | + |
| 98 | + value |
| 99 | + .char_indices() |
| 100 | + .filter_map(|(i, c)| { |
| 101 | + if Some(&i) == backslashes.last() { |
| 102 | + // they're already sorted highest to lowest, so we only need to check the last one |
| 103 | + backslashes.pop(); |
| 104 | + None // remove the backslash from the output |
| 105 | + } else { |
| 106 | + Some(c) |
| 107 | + } |
| 108 | + }) |
| 109 | + .collect::<String>() |
| 110 | + .into() |
| 111 | + }; |
| 112 | + |
| 113 | + (Some(value), &input[end_of_string..]) |
| 114 | + } else { |
| 115 | + // we never reached a closing DQUOTE, so we do not have a valid quoted-string |
| 116 | + (None, input) |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +#[cfg(test)] |
| 121 | +mod test { |
| 122 | + use super::*; |
| 123 | + #[test] |
| 124 | + fn token_successful_parses() { |
| 125 | + assert_eq!(parse_token("key=value"), (Some("key"), "=value")); |
| 126 | + assert_eq!(parse_token("KEY=value"), (Some("KEY"), "=value")); |
| 127 | + assert_eq!(parse_token("0123)=value"), (Some("0123"), ")=value")); |
| 128 | + |
| 129 | + assert_eq!( |
| 130 | + parse_token("!#$%&'*+-.^_`|~=value"), |
| 131 | + (Some("!#$%&'*+-.^_`|~"), "=value",) |
| 132 | + ); |
| 133 | + } |
| 134 | + |
| 135 | + #[test] |
| 136 | + fn token_unsuccessful_parses() { |
| 137 | + assert_eq!(parse_token(""), (None, "")); |
| 138 | + assert_eq!(parse_token("=value"), (None, "=value")); |
| 139 | + for c in r#"(),/:;<=>?@[\]{}"#.chars() { |
| 140 | + let s = c.to_string(); |
| 141 | + assert_eq!(parse_token(&s), (None, &*s)); |
| 142 | + |
| 143 | + let s = format!("match{}rest", s); |
| 144 | + assert_eq!(parse_token(&s), (Some("match"), &*format!("{}rest", c))); |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + #[test] |
| 149 | + fn qstring_successful_parses() { |
| 150 | + assert_eq!( |
| 151 | + parse_quoted_string(r#""key"=value"#), |
| 152 | + (Some(Cow::Borrowed("key")), "=value") |
| 153 | + ); |
| 154 | + |
| 155 | + assert_eq!( |
| 156 | + parse_quoted_string(r#""escaped \" quote \""rest"#), |
| 157 | + ( |
| 158 | + Some(Cow::Owned(String::from(r#"escaped " quote ""#))), |
| 159 | + r#"rest"# |
| 160 | + ) |
| 161 | + ); |
| 162 | + } |
| 163 | + |
| 164 | + #[test] |
| 165 | + fn qstring_unsuccessful_parses() { |
| 166 | + assert_eq!(parse_quoted_string(r#""abc"#), (None, "\"abc")); |
| 167 | + assert_eq!(parse_quoted_string(r#"hello""#), (None, "hello\"",)); |
| 168 | + assert_eq!(parse_quoted_string(r#"=value\"#), (None, "=value\\")); |
| 169 | + assert_eq!(parse_quoted_string(r#"\""#), (None, r#"\""#)); |
| 170 | + assert_eq!(parse_quoted_string(r#""\""#), (None, r#""\""#)); |
| 171 | + } |
| 172 | +} |
0 commit comments