diff --git a/parse_emails/handle_eml.py b/parse_emails/handle_eml.py index 67c9bd4..b33fcc2 100644 --- a/parse_emails/handle_eml.py +++ b/parse_emails/handle_eml.py @@ -358,6 +358,8 @@ def decode_content(mime): Decode content """ charset = mime.get_content_charset() + if (charset == 'utf-8' and mime.get('content-transfer-encoding', '') == '8bit'): + return mime.get_payload() payload = mime.get_payload(decode=True) try: if payload: diff --git a/parse_emails/tests/parse_emails_test.py b/parse_emails/tests/parse_emails_test.py index efd642b..1bd1791 100644 --- a/parse_emails/tests/parse_emails_test.py +++ b/parse_emails/tests/parse_emails_test.py @@ -887,3 +887,20 @@ def test_multipart_defective(): results = email_parser.parse() assert results.get('Text') assert results.get('HTML') + + +def test_handle_eml_utf8_8bit(): + """ + Given: + - chinese eml file encoded in utf-8, 8bit + + When: + - parsing the file. + + Then: + - make sure the chinese characters were decoded successfully. + """ + email_parser = EmailParser(file_path='parse_emails/tests/test_data/chinese_email_test.eml') + results = email_parser.parse() + expected_response = '这是一个示例邮件,用于演示指定的条件。\r\n它使用了 UTF-8 编码,可以支持多种语言的字符,包括中文。\r\n祝好,\r\n发件人' + assert results['Text'] == expected_response diff --git a/parse_emails/tests/test_data/chinese_email_test.eml b/parse_emails/tests/test_data/chinese_email_test.eml new file mode 100644 index 0000000..3408886 --- /dev/null +++ b/parse_emails/tests/test_data/chinese_email_test.eml @@ -0,0 +1,16 @@ +To: test@test.com +From: "test@test.com" +Subject: 示例邮件 +Message-ID: <5b4831d0-5322-ea23-6312-864ff419a1f1@test.com> +Date: Wed, 23 Jan 2019 18:55:56 +0100 +User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 + Thunderbird/60.4.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8; format=flowed +Content-Transfer-Encoding: 8bit +Content-Language: en-US + +这是一个示例邮件,用于演示指定的条件。 +它使用了 UTF-8 编码,可以支持多种语言的字符,包括中文。 +祝好, +发件人 diff --git a/pyproject.toml b/pyproject.toml index 37f9cc8..178d92e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "parse-emails" -version = "0.1.26" +version = "0.1.27" description = "Parses an email message file and extracts the data from it." authors = ["Demisto"] license = "MIT"