Skip to content

Commit 3872ac3

Browse files
Merge pull request #124 from networktocode/gfm-non-ascii-subjects
Handle encoding of non-ASCII characters in email subjects
2 parents 5358ee7 + 48cba25 commit 3872ac3

File tree

6 files changed

+486
-1
lines changed

6 files changed

+486
-1
lines changed

circuit_maintenance_parser/data.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,16 @@ def init_from_emailmessage(cls: Type["NotificationData"], email_message) -> Opti
7575
cls.walk_email(email_message, data_parts)
7676

7777
# Adding extra headers that are interesting to be parsed
78-
data_parts.add(DataPart(EMAIL_HEADER_SUBJECT, email_message["Subject"].encode()))
78+
data_parts.add(
79+
DataPart(
80+
EMAIL_HEADER_SUBJECT,
81+
# decode_header() handles conversion from RFC2047 ASCII representation of non-ASCII content to
82+
# a list of (string, charset) tuples.
83+
# make_header() merges these back into a single Header object containing this text
84+
# str() gets the simple Unicode representation of the Header.
85+
str(email.header.make_header(email.header.decode_header(email_message["Subject"]))).encode(),
86+
)
87+
)
7988
data_parts.add(DataPart(EMAIL_HEADER_DATE, email_message["Date"].encode()))
8089
# Ensure the data parts are processed in a consistent order
8190
return cls(data_parts=sorted(data_parts, key=lambda part: part.type))

0 commit comments

Comments
 (0)