|
1 | 1 | import json |
2 | 2 | from datetime import datetime, timezone |
3 | 3 | from io import StringIO |
| 4 | +from typing import Optional |
4 | 5 |
|
5 | 6 | import requests |
6 | 7 | from freezegun import freeze_time |
|
19 | 20 | from py_ocsf_models.objects.product import Product |
20 | 21 | from py_ocsf_models.objects.remediation import Remediation |
21 | 22 | from py_ocsf_models.objects.resource_details import ResourceDetails |
| 23 | +from pydantic.v1 import BaseModel as V1BaseModel |
22 | 24 |
|
23 | 25 | from prowler.config.config import prowler_version |
24 | 26 | from prowler.lib.outputs.ocsf.ocsf import OCSF |
@@ -461,3 +463,134 @@ def test_new_when_not_muted(self): |
461 | 463 | def test_suppressed_when_muted(self): |
462 | 464 | muted = True |
463 | 465 | assert OCSF.get_finding_status_id(muted) == StatusID.Suppressed |
| 466 | + |
| 467 | + def test_sanitize_resource_data_plain_dict(self): |
| 468 | + result = OCSF._sanitize_resource_data("details", {"key": "value"}) |
| 469 | + assert result == { |
| 470 | + "details": "details", |
| 471 | + "metadata": {"key": "value"}, |
| 472 | + } |
| 473 | + |
| 474 | + def test_sanitize_resource_data_empty_dict(self): |
| 475 | + result = OCSF._sanitize_resource_data("details", {}) |
| 476 | + assert result == { |
| 477 | + "details": "details", |
| 478 | + "metadata": {}, |
| 479 | + } |
| 480 | + |
| 481 | + def test_sanitize_resource_data_with_pydantic_v1_models(self): |
| 482 | + """Reproduces the Trail serialization bug: resource_metadata is a |
| 483 | + dict[str, PydanticModel] when checks pass cloudtrail_client.trails.""" |
| 484 | + |
| 485 | + class EventSelector(V1BaseModel): |
| 486 | + name: str = None |
| 487 | + is_all: bool = False |
| 488 | + |
| 489 | + class Trail(V1BaseModel): |
| 490 | + name: str = None |
| 491 | + region: str = "us-east-1" |
| 492 | + is_logging: bool = True |
| 493 | + latest_cloudwatch_delivery_time: datetime = None |
| 494 | + data_events: list = [] |
| 495 | + tags: Optional[list] = [] |
| 496 | + |
| 497 | + trails = { |
| 498 | + "arn:aws:cloudtrail:us-east-1:123456:trail/main": Trail( |
| 499 | + name="main", |
| 500 | + latest_cloudwatch_delivery_time=datetime(2026, 1, 15, 10, 30), |
| 501 | + data_events=[EventSelector(name="s3", is_all=True)], |
| 502 | + ), |
| 503 | + "arn:aws:cloudtrail:eu-west-1:123456:trail/secondary": Trail( |
| 504 | + name="secondary", |
| 505 | + ), |
| 506 | + } |
| 507 | + |
| 508 | + result = OCSF._sanitize_resource_data("resource details", trails) |
| 509 | + |
| 510 | + assert result["details"] == "resource details" |
| 511 | + metadata = result["metadata"] |
| 512 | + # Trail objects are converted to dicts, not strings |
| 513 | + main_trail = metadata["arn:aws:cloudtrail:us-east-1:123456:trail/main"] |
| 514 | + assert isinstance(main_trail, dict) |
| 515 | + assert main_trail["name"] == "main" |
| 516 | + assert main_trail["region"] == "us-east-1" |
| 517 | + assert main_trail["is_logging"] is True |
| 518 | + # datetime converted to string |
| 519 | + assert "2026-01-15" in main_trail["latest_cloudwatch_delivery_time"] |
| 520 | + # Nested models are also converted |
| 521 | + assert main_trail["data_events"] == [{"name": "s3", "is_all": True}] |
| 522 | + |
| 523 | + secondary_trail = metadata[ |
| 524 | + "arn:aws:cloudtrail:eu-west-1:123456:trail/secondary" |
| 525 | + ] |
| 526 | + assert isinstance(secondary_trail, dict) |
| 527 | + assert secondary_trail["name"] == "secondary" |
| 528 | + assert secondary_trail["latest_cloudwatch_delivery_time"] is None |
| 529 | + |
| 530 | + # Entire result must be JSON-serializable |
| 531 | + json.dumps(result) |
| 532 | + |
| 533 | + def test_sanitize_resource_data_with_nested_non_serializable_types(self): |
| 534 | + """Ensures datetimes and enums nested in dicts are handled.""" |
| 535 | + resource_metadata = { |
| 536 | + "created_at": datetime(2026, 6, 15, 12, 0, 0), |
| 537 | + "nested": { |
| 538 | + "timestamp": datetime(2026, 1, 1), |
| 539 | + "values": [1, "two", datetime(2025, 12, 31)], |
| 540 | + }, |
| 541 | + } |
| 542 | + |
| 543 | + result = OCSF._sanitize_resource_data("details", resource_metadata) |
| 544 | + |
| 545 | + assert "2026-06-15" in result["metadata"]["created_at"] |
| 546 | + assert "2026-01-01" in result["metadata"]["nested"]["timestamp"] |
| 547 | + assert result["metadata"]["nested"]["values"][0] == 1 |
| 548 | + assert result["metadata"]["nested"]["values"][1] == "two" |
| 549 | + assert "2025-12-31" in result["metadata"]["nested"]["values"][2] |
| 550 | + json.dumps(result) |
| 551 | + |
| 552 | + @freeze_time(datetime.now()) |
| 553 | + def test_batch_write_data_to_file_with_pydantic_model_in_resource_metadata(self): |
| 554 | + """End-to-end test: OCSF output succeeds when resource_metadata |
| 555 | + contains Pydantic v1 model objects (the Trail serialization bug).""" |
| 556 | + |
| 557 | + class Trail(V1BaseModel): |
| 558 | + name: str = None |
| 559 | + region: str = "us-east-1" |
| 560 | + is_logging: bool = True |
| 561 | + |
| 562 | + finding = generate_finding_output( |
| 563 | + status="FAIL", |
| 564 | + severity="low", |
| 565 | + muted=False, |
| 566 | + region=AWS_REGION_EU_WEST_1, |
| 567 | + timestamp=datetime.now(), |
| 568 | + resource_details="trail details", |
| 569 | + resource_name="main-trail", |
| 570 | + resource_uid="arn:aws:cloudtrail:eu-west-1:123456:trail/main", |
| 571 | + status_extended="CloudTrail trail is not logging", |
| 572 | + ) |
| 573 | + # Simulate what happens when Check_Report receives |
| 574 | + # resource=cloudtrail_client.trails (a dict of Trail models) |
| 575 | + finding.resource_metadata = { |
| 576 | + "arn:trail/main": Trail(name="main"), |
| 577 | + "arn:trail/secondary": Trail(name="secondary", is_logging=False), |
| 578 | + } |
| 579 | + |
| 580 | + mock_file = StringIO() |
| 581 | + output = OCSF([finding]) |
| 582 | + output._file_descriptor = mock_file |
| 583 | + |
| 584 | + with patch.object(mock_file, "close", return_value=None): |
| 585 | + output.batch_write_data_to_file() |
| 586 | + |
| 587 | + mock_file.seek(0) |
| 588 | + content = mock_file.read() |
| 589 | + parsed = json.loads(content) |
| 590 | + |
| 591 | + assert len(parsed) == 1 |
| 592 | + resource_data = parsed[0]["resources"][0]["data"] |
| 593 | + assert resource_data["details"] == "trail details" |
| 594 | + # Trail models should be serialized as proper dicts |
| 595 | + assert resource_data["metadata"]["arn:trail/main"]["name"] == "main" |
| 596 | + assert resource_data["metadata"]["arn:trail/secondary"]["is_logging"] is False |
0 commit comments