@@ -927,7 +927,7 @@ impl<'a> Iterator for PartsIterator<'a> {
927927/// Some("This is a test email".to_string()));
928928/// assert_eq!(parsed.subparts.len(), 2);
929929/// assert_eq!(parsed.subparts[0].get_body().unwrap(),
930- /// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}\r\n ");
930+ /// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}");
931931/// assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64");
932932/// assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html");
933933/// assert!(parsed.subparts[1].get_body().unwrap().starts_with("<html>"));
@@ -937,6 +937,20 @@ pub fn parse_mail(raw_data: &[u8]) -> Result<ParsedMail, MailParseError> {
937937 parse_mail_recursive ( raw_data, false )
938938}
939939
940+ /// Strips LF or CRLF if there is one at the end of the string raw_data[ix_start..ix].
941+ /// This is used to ensure that CRLF just before a boundary is treated as part of the
942+ /// boundary, not the body part that was before the boundary. See discussion in
943+ /// https://github.com/staktrace/mailparse/issues/127.
944+ fn strip_trailing_crlf ( raw_data : & [ u8 ] , ix_start : usize , mut ix : usize ) -> usize {
945+ if ix > ix_start && raw_data[ ix - 1 ] == b'\n' {
946+ ix -= 1 ;
947+ if ix > ix_start && raw_data[ ix - 1 ] == b'\r' {
948+ ix -= 1 ;
949+ }
950+ }
951+ ix
952+ }
953+
940954fn parse_mail_recursive (
941955 raw_data : & [ u8 ] ,
942956 in_multipart_digest : bool ,
@@ -961,23 +975,29 @@ fn parse_mail_recursive(
961975 {
962976 let in_multipart_digest = result. ctype . mimetype == "multipart/digest" ;
963977 let boundary = String :: from ( "--" ) + & result. ctype . params [ "boundary" ] ;
964- if let Some ( ix_body_end) = find_from_u8_line_prefix ( raw_data, ix_body, boundary. as_bytes ( ) )
978+ if let Some ( ix_boundary_start) =
979+ find_from_u8_line_prefix ( raw_data, ix_body, boundary. as_bytes ( ) )
965980 {
981+ let ix_body_end = strip_trailing_crlf ( raw_data, ix_body, ix_boundary_start) ;
966982 result. body_bytes = & raw_data[ ix_body..ix_body_end] ;
967- let mut ix_boundary_end = ix_body_end + boundary. len ( ) ;
983+ let mut ix_boundary_end = ix_boundary_start + boundary. len ( ) ;
968984 while let Some ( ix_part_start) =
969985 find_from_u8 ( raw_data, ix_boundary_end, b"\n " ) . map ( |v| v + 1 )
970986 {
971- // if there is no terminating boundary, assume the part end is the end of the email
972- let ix_part_end =
973- find_from_u8_line_prefix ( raw_data, ix_part_start, boundary. as_bytes ( ) )
974- . unwrap_or ( raw_data. len ( ) ) ;
987+ let ix_part_boundary_start =
988+ find_from_u8_line_prefix ( raw_data, ix_part_start, boundary. as_bytes ( ) ) ;
989+ let ix_part_end = ix_part_boundary_start
990+ . map ( |x| strip_trailing_crlf ( raw_data, ix_part_start, x) )
991+ // if there is no terminating boundary, assume the part end is the end of the email
992+ . unwrap_or ( raw_data. len ( ) ) ;
975993
976994 result. subparts . push ( parse_mail_recursive (
977995 & raw_data[ ix_part_start..ix_part_end] ,
978996 in_multipart_digest,
979997 ) ?) ;
980- ix_boundary_end = ix_part_end + boundary. len ( ) ;
998+ ix_boundary_end = ix_part_boundary_start
999+ . map ( |x| x + boundary. len ( ) )
1000+ . unwrap_or ( raw_data. len ( ) ) ;
9811001 if ix_boundary_end + 2 > raw_data. len ( )
9821002 || ( raw_data[ ix_boundary_end] == b'-' && raw_data[ ix_boundary_end + 1 ] == b'-' )
9831003 {
@@ -1653,7 +1673,7 @@ mod tests {
16531673 . as_bytes ( ) ,
16541674 )
16551675 . unwrap ( ) ;
1656- assert_eq ! ( mail. subparts[ 0 ] . get_body( ) . unwrap( ) , "part0\r \n " ) ;
1676+ assert_eq ! ( mail. subparts[ 0 ] . get_body( ) . unwrap( ) , "part0" ) ;
16571677 assert_eq ! ( mail. subparts[ 1 ] . get_body( ) . unwrap( ) , "part1\r \n " ) ;
16581678 }
16591679
@@ -2021,20 +2041,20 @@ mod tests {
20212041 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[0]
20222042 assert_eq ! ( part. headers. len( ) , 0 ) ;
20232043 assert_eq ! ( part. ctype. mimetype, "text/plain" ) ;
2024- assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"blah blah blah\n " ) ;
2044+ assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"blah blah blah" ) ;
20252045
20262046 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[1]
20272047 assert_eq ! ( part. ctype. mimetype, "multipart/digest" ) ;
20282048
20292049 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[1].subparts[0]
20302050 assert_eq ! ( part. headers. len( ) , 0 ) ;
20312051 assert_eq ! ( part. ctype. mimetype, "message/rfc822" ) ;
2032- assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"nested default part\n " ) ;
2052+ assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"nested default part" ) ;
20332053
20342054 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[1].subparts[1]
20352055 assert_eq ! ( part. headers. len( ) , 1 ) ;
20362056 assert_eq ! ( part. ctype. mimetype, "text/html" ) ;
2037- assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"nested html part\n " ) ;
2057+ assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"nested html part" ) ;
20382058
20392059 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[1].subparts[2]
20402060 assert_eq ! ( part. headers. len( ) , 1 ) ;
@@ -2043,7 +2063,7 @@ mod tests {
20432063 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[1].subparts[2].subparts[0]
20442064 assert_eq ! ( part. headers. len( ) , 0 ) ;
20452065 assert_eq ! ( part. ctype. mimetype, "text/plain" ) ;
2046- assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"inside part\n " ) ;
2066+ assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"inside part" ) ;
20472067
20482068 assert ! ( parts. next( ) . is_none( ) ) ;
20492069 }
@@ -2088,13 +2108,13 @@ mod tests {
20882108 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[0].subparts[0]
20892109 assert_eq ! ( part. headers. len( ) , 1 ) ;
20902110 assert_eq ! ( part. ctype. mimetype, "text/html" ) ;
2091- assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"<em>Good evening!</em>\n " ) ;
2111+ assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"<em>Good evening!</em>" ) ;
20922112 assert_eq ! ( part. subparts. len( ) , 0 ) ;
20932113
20942114 part = parts. next ( ) . unwrap ( ) ; // mail.subparts[0].subparts[1]
20952115 assert_eq ! ( part. headers. len( ) , 1 ) ;
20962116 assert_eq ! ( part. ctype. mimetype, "text/plain" ) ;
2097- assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"Good evening!\n " ) ;
2117+ assert_eq ! ( part. get_body_raw( ) . unwrap( ) , b"Good evening!" ) ;
20982118 assert_eq ! ( part. subparts. len( ) , 0 ) ;
20992119
21002120 assert ! ( parts. next( ) . is_none( ) ) ;
0 commit comments