Skip to content

Commit c3da88b

Browse files
committed
fix: support msg files as well as eml
1 parent cea9c92 commit c3da88b

File tree

3 files changed

+416
-253
lines changed

3 files changed

+416
-253
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ license = "Copyright (c) 2017-2025 Splunk Inc."
66
requires-python = ">=3.13, <3.15"
77
authors = []
88
dependencies = [
9-
"beautifulsoup4>=4.14.2",
9+
"beautifulsoup4>=4.13,<4.14",
1010
"msal>=1.31.0",
1111
"requests>=2.32.0",
12-
"splunk-soar-sdk>=3.15.1",
12+
"splunk-soar-sdk>=3.17.0",
1313
]
1414

1515
# [tool.uv.sources]

src/app.py

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,17 @@
3333
IP_REGEX,
3434
URI_REGEX,
3535
)
36-
from soar_sdk.extras.email.rfc5322 import extract_rfc5322_email_data
36+
from soar_sdk.extras.email.email_data import extract_email_data
3737
from soar_sdk.extras.email.utils import clean_url, is_ip
3838
from soar_sdk.logging import getLogger
3939
from soar_sdk.models.artifact import Artifact
4040
from soar_sdk.models.container import Container
41-
from soar_sdk.models.finding import Finding, FindingAttachment, FindingEmail
41+
from soar_sdk.models.finding import (
42+
Finding,
43+
FindingAttachment,
44+
FindingEmail,
45+
FindingEmailReporter,
46+
)
4247
from soar_sdk.params import OnESPollParams, OnPollParams
4348
from soar_sdk.webhooks.models import WebhookRequest, WebhookResponse
4449

@@ -787,18 +792,69 @@ def on_es_poll(
787792
if isinstance(eml_content, str)
788793
else eml_content.decode("utf-8", errors="replace")
789794
)
790-
attachments.append(
791-
FindingAttachment(
792-
file_name=f"{subject[:50]}.eml",
793-
data=raw_eml,
794-
is_raw_email=True,
795-
)
796-
)
797-
798795
try:
799-
parsed = extract_rfc5322_email_data(
796+
parsed = extract_email_data(
800797
eml_str, email_id, include_attachment_content=True
801798
)
799+
800+
# Check for a forwarded email attachment (.eml or .msg)
801+
reporter = None
802+
forwarded_att = None
803+
for att in parsed.attachments:
804+
if att.content and att.filename.lower().endswith((".eml", ".msg")):
805+
forwarded_att = att
806+
break
807+
808+
if forwarded_att is not None:
809+
# Build reporter from the outer (forwarding) email
810+
outer_body = parsed.body.plain_text or parsed.body.html or ""
811+
reporter = FindingEmailReporter(
812+
**{"from": parsed.headers.from_address or ""},
813+
to=parsed.headers.to,
814+
cc=parsed.headers.cc,
815+
bcc=parsed.headers.bcc,
816+
subject=parsed.headers.subject,
817+
message_id=parsed.headers.message_id,
818+
id=parsed.headers.email_id,
819+
body=outer_body or None,
820+
date=parsed.headers.date,
821+
)
822+
823+
# Optionally include the outer (reporter) EML
824+
if asset.ingest_eml:
825+
attachments.append(
826+
FindingAttachment(
827+
file_name=f"{subject[:50]}.eml",
828+
data=raw_eml,
829+
is_raw_email=False,
830+
)
831+
)
832+
833+
# Re-parse from the inner (forwarded) email
834+
inner_eml_bytes = forwarded_att.content
835+
attachments.append(
836+
FindingAttachment(
837+
file_name=forwarded_att.filename,
838+
data=inner_eml_bytes,
839+
is_raw_email=True,
840+
)
841+
)
842+
parsed = extract_email_data(
843+
inner_eml_bytes, include_attachment_content=True
844+
)
845+
if parsed.headers.subject:
846+
subject = parsed.headers.subject
847+
else:
848+
# No forwarded email — attach the outer EML as the raw email
849+
attachments.append(
850+
FindingAttachment(
851+
file_name=f"{subject[:50]}.eml",
852+
data=raw_eml,
853+
is_raw_email=True,
854+
)
855+
)
856+
857+
# Build finding from parsed email (inner if forwarded, outer otherwise)
802858
body_text = parsed.body.plain_text or parsed.body.html or ""
803859
email_headers = {
804860
k: v for k, v in parsed.to_dict()["headers"].items() if v
@@ -807,6 +863,7 @@ def on_es_poll(
807863
headers=email_headers or None,
808864
body=body_text or None,
809865
urls=parsed.urls or None,
866+
reporter=reporter,
810867
)
811868
for att in parsed.attachments:
812869
if att.content:

0 commit comments

Comments
 (0)