@@ -104,37 +104,40 @@ def get_forwarded_email_parts(email_text: str) -> list[str]:
104104 parts : list [str ] = split_delimiter .split (email_text )
105105 return _remove_empty_strings (parts )
106106
107+ # Precompiled regex for reply/forward delimiters and quoted reply headers
108+ _THREAD_DELIMITERS = re .compile (
109+ "|" .join ([
110+ r"^from: .+$" , # From: someone
111+ r"^sent: .+$" , # Sent: ...
112+ r"^to: .+$" , # To: ...
113+ r"^subject: .+$" , # Subject: ...
114+ r"^-{2,}\s*Original Message\s*-{2,}$" , # -----Original Message-----
115+ r"^-{2,}\s*Forwarded by.*$" , # ----- Forwarded by
116+ r"^_{5,}$" , # _________
117+ r"^on .+wrote:\s*(?:\r?\n\s*)+>" , # On ... wrote: followed by quoted text
118+ ]),
119+ re .IGNORECASE | re .MULTILINE
120+ )
121+
122+ # Precompiled regex for trailing line delimiters (underscores, dashes, equals, spaces)
123+ _TRAILING_LINE_DELIMITERS = re .compile (r"[\r\n][_\-= ]+\s*$" )
107124
108125# Simple way to get the last response on an email thread in MIME format
109126def get_last_response_in_thread (email_text : str ) -> str :
110127 if not email_text :
111128 return ""
112129
113- delimiters = [
114- "From:" ,
115- "Sent:" ,
116- "To:" ,
117- "Subject:" ,
118- "-----Original Message-----" ,
119- "----- Forwarded by" ,
120- "________________________________________" ,
121- ]
122-
123- first_delimiter_at = - 1
124- for delimiter in delimiters :
125- index = email_text .find (delimiter )
126- if index != - 1 and (first_delimiter_at == - 1 or index < first_delimiter_at ):
127- first_delimiter_at = index
130+ match = _THREAD_DELIMITERS .search (email_text )
131+ if match :
132+ email_text = email_text [:match .start ()]
128133
129- if first_delimiter_at > 0 :
130- email_text = email_text [:first_delimiter_at ]
131134
132135 email_text = email_text .strip ()
133- # Remove trailing line delimiters
134- email_text = re .sub (r"[\r\n]_+\s*$" , "" , email_text )
136+ # Remove trailing line delimiters (e.g. underscores, dashes, equals)
137+ _TRAILING_LINE_DELIMITER_REGEX = _TRAILING_LINE_DELIMITERS
138+ email_text = _TRAILING_LINE_DELIMITER_REGEX .sub ("" , email_text )
135139 return email_text
136140
137-
138141# Extracts the plain text body from an email.message.Message object.
139142def _extract_email_body (msg : Message ) -> str :
140143 """Extracts the plain text body from an email.message.Message object."""
0 commit comments