Skip to content

Commit b69d3bc

Browse files
committed
Treat crlf before boundary as part of the boundary
Fixes #127.
1 parent 1e084b5 commit b69d3bc

File tree

1 file changed

+35
-15
lines changed

1 file changed

+35
-15
lines changed

src/lib.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ impl<'a> Iterator for PartsIterator<'a> {
927927
/// Some("This is a test email".to_string()));
928928
/// assert_eq!(parsed.subparts.len(), 2);
929929
/// assert_eq!(parsed.subparts[0].get_body().unwrap(),
930-
/// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}\r\n");
930+
/// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}");
931931
/// assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64");
932932
/// assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html");
933933
/// assert!(parsed.subparts[1].get_body().unwrap().starts_with("<html>"));
@@ -937,6 +937,20 @@ pub fn parse_mail(raw_data: &[u8]) -> Result<ParsedMail, MailParseError> {
937937
parse_mail_recursive(raw_data, false)
938938
}
939939

940+
/// Strips LF or CRLF if there is one at the end of the string raw_data[ix_start..ix].
941+
/// This is used to ensure that CRLF just before a boundary is treated as part of the
942+
/// boundary, not the body part that was before the boundary. See discussion in
943+
/// https://github.com/staktrace/mailparse/issues/127.
944+
fn strip_trailing_crlf(raw_data: &[u8], ix_start: usize, mut ix: usize) -> usize {
945+
if ix > ix_start && raw_data[ix - 1] == b'\n' {
946+
ix -= 1;
947+
if ix > ix_start && raw_data[ix - 1] == b'\r' {
948+
ix -= 1;
949+
}
950+
}
951+
ix
952+
}
953+
940954
fn parse_mail_recursive(
941955
raw_data: &[u8],
942956
in_multipart_digest: bool,
@@ -961,23 +975,29 @@ fn parse_mail_recursive(
961975
{
962976
let in_multipart_digest = result.ctype.mimetype == "multipart/digest";
963977
let boundary = String::from("--") + &result.ctype.params["boundary"];
964-
if let Some(ix_body_end) = find_from_u8_line_prefix(raw_data, ix_body, boundary.as_bytes())
978+
if let Some(ix_boundary_start) =
979+
find_from_u8_line_prefix(raw_data, ix_body, boundary.as_bytes())
965980
{
981+
let ix_body_end = strip_trailing_crlf(raw_data, ix_body, ix_boundary_start);
966982
result.body_bytes = &raw_data[ix_body..ix_body_end];
967-
let mut ix_boundary_end = ix_body_end + boundary.len();
983+
let mut ix_boundary_end = ix_boundary_start + boundary.len();
968984
while let Some(ix_part_start) =
969985
find_from_u8(raw_data, ix_boundary_end, b"\n").map(|v| v + 1)
970986
{
971-
// if there is no terminating boundary, assume the part end is the end of the email
972-
let ix_part_end =
973-
find_from_u8_line_prefix(raw_data, ix_part_start, boundary.as_bytes())
974-
.unwrap_or(raw_data.len());
987+
let ix_part_boundary_start =
988+
find_from_u8_line_prefix(raw_data, ix_part_start, boundary.as_bytes());
989+
let ix_part_end = ix_part_boundary_start
990+
.map(|x| strip_trailing_crlf(raw_data, ix_part_start, x))
991+
// if there is no terminating boundary, assume the part end is the end of the email
992+
.unwrap_or(raw_data.len());
975993

976994
result.subparts.push(parse_mail_recursive(
977995
&raw_data[ix_part_start..ix_part_end],
978996
in_multipart_digest,
979997
)?);
980-
ix_boundary_end = ix_part_end + boundary.len();
998+
ix_boundary_end = ix_part_boundary_start
999+
.map(|x| x + boundary.len())
1000+
.unwrap_or(raw_data.len());
9811001
if ix_boundary_end + 2 > raw_data.len()
9821002
|| (raw_data[ix_boundary_end] == b'-' && raw_data[ix_boundary_end + 1] == b'-')
9831003
{
@@ -1653,7 +1673,7 @@ mod tests {
16531673
.as_bytes(),
16541674
)
16551675
.unwrap();
1656-
assert_eq!(mail.subparts[0].get_body().unwrap(), "part0\r\n");
1676+
assert_eq!(mail.subparts[0].get_body().unwrap(), "part0");
16571677
assert_eq!(mail.subparts[1].get_body().unwrap(), "part1\r\n");
16581678
}
16591679

@@ -2021,20 +2041,20 @@ mod tests {
20212041
part = parts.next().unwrap(); // mail.subparts[0]
20222042
assert_eq!(part.headers.len(), 0);
20232043
assert_eq!(part.ctype.mimetype, "text/plain");
2024-
assert_eq!(part.get_body_raw().unwrap(), b"blah blah blah\n");
2044+
assert_eq!(part.get_body_raw().unwrap(), b"blah blah blah");
20252045

20262046
part = parts.next().unwrap(); // mail.subparts[1]
20272047
assert_eq!(part.ctype.mimetype, "multipart/digest");
20282048

20292049
part = parts.next().unwrap(); // mail.subparts[1].subparts[0]
20302050
assert_eq!(part.headers.len(), 0);
20312051
assert_eq!(part.ctype.mimetype, "message/rfc822");
2032-
assert_eq!(part.get_body_raw().unwrap(), b"nested default part\n");
2052+
assert_eq!(part.get_body_raw().unwrap(), b"nested default part");
20332053

20342054
part = parts.next().unwrap(); // mail.subparts[1].subparts[1]
20352055
assert_eq!(part.headers.len(), 1);
20362056
assert_eq!(part.ctype.mimetype, "text/html");
2037-
assert_eq!(part.get_body_raw().unwrap(), b"nested html part\n");
2057+
assert_eq!(part.get_body_raw().unwrap(), b"nested html part");
20382058

20392059
part = parts.next().unwrap(); // mail.subparts[1].subparts[2]
20402060
assert_eq!(part.headers.len(), 1);
@@ -2043,7 +2063,7 @@ mod tests {
20432063
part = parts.next().unwrap(); // mail.subparts[1].subparts[2].subparts[0]
20442064
assert_eq!(part.headers.len(), 0);
20452065
assert_eq!(part.ctype.mimetype, "text/plain");
2046-
assert_eq!(part.get_body_raw().unwrap(), b"inside part\n");
2066+
assert_eq!(part.get_body_raw().unwrap(), b"inside part");
20472067

20482068
assert!(parts.next().is_none());
20492069
}
@@ -2088,13 +2108,13 @@ mod tests {
20882108
part = parts.next().unwrap(); // mail.subparts[0].subparts[0]
20892109
assert_eq!(part.headers.len(), 1);
20902110
assert_eq!(part.ctype.mimetype, "text/html");
2091-
assert_eq!(part.get_body_raw().unwrap(), b"<em>Good evening!</em>\n");
2111+
assert_eq!(part.get_body_raw().unwrap(), b"<em>Good evening!</em>");
20922112
assert_eq!(part.subparts.len(), 0);
20932113

20942114
part = parts.next().unwrap(); // mail.subparts[0].subparts[1]
20952115
assert_eq!(part.headers.len(), 1);
20962116
assert_eq!(part.ctype.mimetype, "text/plain");
2097-
assert_eq!(part.get_body_raw().unwrap(), b"Good evening!\n");
2117+
assert_eq!(part.get_body_raw().unwrap(), b"Good evening!");
20982118
assert_eq!(part.subparts.len(), 0);
20992119

21002120
assert!(parts.next().is_none());

0 commit comments

Comments
 (0)