Skip to content

Commit 9f6121b

Browse files
authored
fix(ocsf): serialization errors non-serializable resource meta (#10129)
1 parent 9d4f68f commit 9f6121b

File tree

3 files changed

+192
-12
lines changed

3 files changed

+192
-12
lines changed

prowler/CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,15 @@ All notable changes to the **Prowler SDK** are documented in this file.
5656

5757
---
5858

59-
## [5.18.3] (Prowler UNRELEASED)
59+
## [5.18.4] (Prowler v5.18.4)
60+
61+
### 🐞 Fixed
62+
63+
- Handle serialization errors in OCSF output for non-serializable resource metadata [(#10129)](https://github.com/prowler-cloud/prowler/pull/10129)
64+
65+
---
66+
67+
## [5.18.3] (Prowler v5.18.3)
6068

6169
### 🐞 Fixed
6270

prowler/lib/outputs/ocsf/ocsf.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import os
23
from datetime import datetime
34
from typing import List
@@ -115,10 +116,10 @@ def transform(self, findings: List[Finding]) -> None:
115116
# TODO: this should be included only if using the Cloud profile
116117
cloud_partition=finding.partition,
117118
region=finding.region,
118-
data={
119-
"details": finding.resource_details,
120-
"metadata": finding.resource_metadata,
121-
},
119+
data=self._sanitize_resource_data(
120+
finding.resource_details,
121+
finding.resource_metadata,
122+
),
122123
)
123124
]
124125
if finding.metadata.Provider != "kubernetes"
@@ -129,10 +130,10 @@ def transform(self, findings: List[Finding]) -> None:
129130
uid=finding.resource_uid,
130131
group=Group(name=finding.metadata.ServiceName),
131132
type=finding.metadata.ResourceType,
132-
data={
133-
"details": finding.resource_details,
134-
"metadata": finding.resource_metadata,
135-
},
133+
data=self._sanitize_resource_data(
134+
finding.resource_details,
135+
finding.resource_metadata,
136+
),
136137
namespace=finding.region.replace("namespace: ", ""),
137138
)
138139
]
@@ -200,9 +201,13 @@ def batch_write_data_to_file(self) -> None:
200201
self._file_descriptor.write("[")
201202
for finding in self._data:
202203
try:
203-
self._file_descriptor.write(
204-
finding.model_dump_json(exclude_none=True, indent=4)
205-
)
204+
if hasattr(finding, "model_dump_json"):
205+
json_output = finding.model_dump_json(
206+
exclude_none=True, indent=4
207+
)
208+
else:
209+
json_output = finding.json(exclude_none=True, indent=4)
210+
self._file_descriptor.write(json_output)
206211
self._file_descriptor.write(",")
207212
except Exception as error:
208213
logger.error(
@@ -221,6 +226,40 @@ def batch_write_data_to_file(self) -> None:
221226
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
222227
)
223228

229+
@staticmethod
230+
def _sanitize_resource_data(resource_details: str, resource_metadata: dict) -> dict:
231+
"""Ensures resource data is JSON-serializable.
232+
233+
The resource_metadata dict may contain non-serializable objects
234+
(e.g., Pydantic models passed as raw dicts with model values)
235+
from service resource conversion. This method converts them to
236+
plain dicts and roundtrips through JSON to guarantee serializability.
237+
"""
238+
239+
def _make_serializable(obj):
240+
if hasattr(obj, "model_dump") and callable(obj.model_dump):
241+
return _make_serializable(obj.model_dump())
242+
if hasattr(obj, "dict") and callable(obj.dict):
243+
return _make_serializable(obj.dict())
244+
if isinstance(obj, dict):
245+
return {str(k): _make_serializable(v) for k, v in obj.items()}
246+
if isinstance(obj, (list, tuple)):
247+
return [_make_serializable(v) for v in obj]
248+
return obj
249+
250+
try:
251+
converted = _make_serializable(resource_metadata)
252+
sanitized_metadata = json.loads(json.dumps(converted, default=str))
253+
except (TypeError, ValueError) as error:
254+
logger.warning(
255+
f"Failed to serialize resource metadata, defaulting to empty: {error}"
256+
)
257+
sanitized_metadata = {}
258+
return {
259+
"details": resource_details,
260+
"metadata": sanitized_metadata,
261+
}
262+
224263
@staticmethod
225264
def get_account_type_id_by_provider(provider: str) -> TypeID:
226265
"""

tests/lib/outputs/ocsf/ocsf_test.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
from datetime import datetime, timezone
33
from io import StringIO
4+
from typing import Optional
45

56
import requests
67
from freezegun import freeze_time
@@ -19,6 +20,7 @@
1920
from py_ocsf_models.objects.product import Product
2021
from py_ocsf_models.objects.remediation import Remediation
2122
from py_ocsf_models.objects.resource_details import ResourceDetails
23+
from pydantic.v1 import BaseModel as V1BaseModel
2224

2325
from prowler.config.config import prowler_version
2426
from prowler.lib.outputs.ocsf.ocsf import OCSF
@@ -461,3 +463,134 @@ def test_new_when_not_muted(self):
461463
def test_suppressed_when_muted(self):
462464
muted = True
463465
assert OCSF.get_finding_status_id(muted) == StatusID.Suppressed
466+
467+
def test_sanitize_resource_data_plain_dict(self):
468+
result = OCSF._sanitize_resource_data("details", {"key": "value"})
469+
assert result == {
470+
"details": "details",
471+
"metadata": {"key": "value"},
472+
}
473+
474+
def test_sanitize_resource_data_empty_dict(self):
475+
result = OCSF._sanitize_resource_data("details", {})
476+
assert result == {
477+
"details": "details",
478+
"metadata": {},
479+
}
480+
481+
def test_sanitize_resource_data_with_pydantic_v1_models(self):
482+
"""Reproduces the Trail serialization bug: resource_metadata is a
483+
dict[str, PydanticModel] when checks pass cloudtrail_client.trails."""
484+
485+
class EventSelector(V1BaseModel):
486+
name: str = None
487+
is_all: bool = False
488+
489+
class Trail(V1BaseModel):
490+
name: str = None
491+
region: str = "us-east-1"
492+
is_logging: bool = True
493+
latest_cloudwatch_delivery_time: datetime = None
494+
data_events: list = []
495+
tags: Optional[list] = []
496+
497+
trails = {
498+
"arn:aws:cloudtrail:us-east-1:123456:trail/main": Trail(
499+
name="main",
500+
latest_cloudwatch_delivery_time=datetime(2026, 1, 15, 10, 30),
501+
data_events=[EventSelector(name="s3", is_all=True)],
502+
),
503+
"arn:aws:cloudtrail:eu-west-1:123456:trail/secondary": Trail(
504+
name="secondary",
505+
),
506+
}
507+
508+
result = OCSF._sanitize_resource_data("resource details", trails)
509+
510+
assert result["details"] == "resource details"
511+
metadata = result["metadata"]
512+
# Trail objects are converted to dicts, not strings
513+
main_trail = metadata["arn:aws:cloudtrail:us-east-1:123456:trail/main"]
514+
assert isinstance(main_trail, dict)
515+
assert main_trail["name"] == "main"
516+
assert main_trail["region"] == "us-east-1"
517+
assert main_trail["is_logging"] is True
518+
# datetime converted to string
519+
assert "2026-01-15" in main_trail["latest_cloudwatch_delivery_time"]
520+
# Nested models are also converted
521+
assert main_trail["data_events"] == [{"name": "s3", "is_all": True}]
522+
523+
secondary_trail = metadata[
524+
"arn:aws:cloudtrail:eu-west-1:123456:trail/secondary"
525+
]
526+
assert isinstance(secondary_trail, dict)
527+
assert secondary_trail["name"] == "secondary"
528+
assert secondary_trail["latest_cloudwatch_delivery_time"] is None
529+
530+
# Entire result must be JSON-serializable
531+
json.dumps(result)
532+
533+
def test_sanitize_resource_data_with_nested_non_serializable_types(self):
534+
"""Ensures datetimes and enums nested in dicts are handled."""
535+
resource_metadata = {
536+
"created_at": datetime(2026, 6, 15, 12, 0, 0),
537+
"nested": {
538+
"timestamp": datetime(2026, 1, 1),
539+
"values": [1, "two", datetime(2025, 12, 31)],
540+
},
541+
}
542+
543+
result = OCSF._sanitize_resource_data("details", resource_metadata)
544+
545+
assert "2026-06-15" in result["metadata"]["created_at"]
546+
assert "2026-01-01" in result["metadata"]["nested"]["timestamp"]
547+
assert result["metadata"]["nested"]["values"][0] == 1
548+
assert result["metadata"]["nested"]["values"][1] == "two"
549+
assert "2025-12-31" in result["metadata"]["nested"]["values"][2]
550+
json.dumps(result)
551+
552+
@freeze_time(datetime.now())
553+
def test_batch_write_data_to_file_with_pydantic_model_in_resource_metadata(self):
554+
"""End-to-end test: OCSF output succeeds when resource_metadata
555+
contains Pydantic v1 model objects (the Trail serialization bug)."""
556+
557+
class Trail(V1BaseModel):
558+
name: str = None
559+
region: str = "us-east-1"
560+
is_logging: bool = True
561+
562+
finding = generate_finding_output(
563+
status="FAIL",
564+
severity="low",
565+
muted=False,
566+
region=AWS_REGION_EU_WEST_1,
567+
timestamp=datetime.now(),
568+
resource_details="trail details",
569+
resource_name="main-trail",
570+
resource_uid="arn:aws:cloudtrail:eu-west-1:123456:trail/main",
571+
status_extended="CloudTrail trail is not logging",
572+
)
573+
# Simulate what happens when Check_Report receives
574+
# resource=cloudtrail_client.trails (a dict of Trail models)
575+
finding.resource_metadata = {
576+
"arn:trail/main": Trail(name="main"),
577+
"arn:trail/secondary": Trail(name="secondary", is_logging=False),
578+
}
579+
580+
mock_file = StringIO()
581+
output = OCSF([finding])
582+
output._file_descriptor = mock_file
583+
584+
with patch.object(mock_file, "close", return_value=None):
585+
output.batch_write_data_to_file()
586+
587+
mock_file.seek(0)
588+
content = mock_file.read()
589+
parsed = json.loads(content)
590+
591+
assert len(parsed) == 1
592+
resource_data = parsed[0]["resources"][0]["data"]
593+
assert resource_data["details"] == "trail details"
594+
# Trail models should be serialized as proper dicts
595+
assert resource_data["metadata"]["arn:trail/main"]["name"] == "main"
596+
assert resource_data["metadata"]["arn:trail/secondary"]["is_logging"] is False

0 commit comments

Comments
 (0)