Skip to content

Commit 2a4ee83

Browse files
authored
feat(sdk): add --export-ocsf flag for OCSF ingestion to Prowler Cloud (#10095)
1 parent 247bde1 commit 2a4ee83

File tree

9 files changed

+203
-11
lines changed

9 files changed

+203
-11
lines changed

poetry.lock

Lines changed: 20 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

prowler/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ All notable changes to the **Prowler SDK** are documented in this file.
2222
- OpenStack compute 7 new checks [(#9944)](https://github.com/prowler-cloud/prowler/pull/9944)
2323
- CSA CCM 4.0 for the Alibaba Cloud provider [(#10061)](https://github.com/prowler-cloud/prowler/pull/10061)
2424
- ECS Exec (ECS-006) privilege escalation detection via `ecs:ExecuteCommand` + `ecs:DescribeTasks` [(#10066)](https://github.com/prowler-cloud/prowler/pull/10066)
25+
- `--export-ocsf` CLI flag to upload OCSF scan results to Prowler Cloud [(#10095)](https://github.com/prowler-cloud/prowler/pull/10095)
26+
- `scan_id` field in OCSF `unmapped` output for ingestion correlation [(#10095)](https://github.com/prowler-cloud/prowler/pull/10095)
2527
- `defenderxdr_endpoint_privileged_user_exposed_credentials` check for M365 provider [(#10084)](https://github.com/prowler-cloud/prowler/pull/10084)
2628
- `defenderxdr_critical_asset_management_pending_approvals` check for M365 provider [(#10085)](https://github.com/prowler-cloud/prowler/pull/10085)
2729
- `entra_seamless_sso_disabled` check for m365 provider [(#10086)](https://github.com/prowler-cloud/prowler/pull/10086)

prowler/__main__.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
# -*- coding: utf-8 -*-
33

44
import sys
5+
import tempfile
56
from os import environ
67

8+
import requests
79
from colorama import Fore, Style
810
from colorama import init as colorama_init
911

1012
from prowler.config.config import (
1113
EXTERNAL_TOOL_PROVIDERS,
14+
cloud_api_base_url,
1215
csv_file_suffix,
1316
get_available_compliance_frameworks,
1417
html_file_suffix,
@@ -110,6 +113,7 @@
110113
from prowler.lib.outputs.csv.csv import CSV
111114
from prowler.lib.outputs.finding import Finding
112115
from prowler.lib.outputs.html.html import HTML
116+
from prowler.lib.outputs.ocsf.ingestion import send_ocsf_to_api
113117
from prowler.lib.outputs.ocsf.ocsf import OCSF
114118
from prowler.lib.outputs.outputs import extract_findings_statistics, report
115119
from prowler.lib.outputs.slack.slack import Slack
@@ -477,6 +481,7 @@ def streaming_callback(findings_batch):
477481
sys.exit(1)
478482

479483
generated_outputs = {"regular": [], "compliance": []}
484+
ocsf_output = None
480485

481486
if args.output_formats:
482487
for mode in args.output_formats:
@@ -507,6 +512,7 @@ def streaming_callback(findings_batch):
507512
file_path=f"{filename}{json_ocsf_file_suffix}",
508513
)
509514
generated_outputs["regular"].append(json_output)
515+
ocsf_output = json_output
510516
json_output.batch_write_data_to_file()
511517
if mode == "html":
512518
html_output = HTML(
@@ -518,6 +524,57 @@ def streaming_callback(findings_batch):
518524
provider=global_provider, stats=stats
519525
)
520526

527+
if getattr(args, "export_ocsf", False):
528+
if not ocsf_output or not getattr(ocsf_output, "file_path", None):
529+
tmp_ocsf = tempfile.NamedTemporaryFile(
530+
suffix=json_ocsf_file_suffix, delete=False
531+
)
532+
ocsf_output = OCSF(
533+
findings=finding_outputs,
534+
file_path=tmp_ocsf.name,
535+
)
536+
tmp_ocsf.close()
537+
ocsf_output.batch_write_data_to_file()
538+
print(
539+
f"{Style.BRIGHT}\nExporting OCSF to Prowler Cloud, please wait...{Style.RESET_ALL}"
540+
)
541+
try:
542+
response = send_ocsf_to_api(ocsf_output.file_path)
543+
except ValueError:
544+
logger.warning(
545+
"OCSF export skipped: no API key configured. "
546+
"Set the PROWLER_API_KEY environment variable to enable it. "
547+
f"Scan results were saved to {ocsf_output.file_path}"
548+
)
549+
except requests.ConnectionError:
550+
logger.warning(
551+
"OCSF export skipped: could not reach the Prowler Cloud API at "
552+
f"{cloud_api_base_url}. Check the URL and your network connection. "
553+
f"Scan results were saved to {ocsf_output.file_path}"
554+
)
555+
except requests.HTTPError as http_err:
556+
logger.warning(
557+
f"OCSF export failed: the API returned HTTP {http_err.response.status_code}. "
558+
"Verify your API key is valid and has the right permissions. "
559+
f"Scan results were saved to {ocsf_output.file_path}"
560+
)
561+
except Exception as error:
562+
logger.warning(
563+
f"OCSF export failed unexpectedly: {error}. "
564+
f"Scan results were saved to {ocsf_output.file_path}"
565+
)
566+
else:
567+
job_id = response.get("data", {}).get("id") if response else None
568+
if job_id:
569+
print(
570+
f"{Style.BRIGHT}{Fore.GREEN}\nOCSF export accepted. Ingestion job: {job_id}{Style.RESET_ALL}"
571+
)
572+
else:
573+
logger.warning(
574+
"OCSF export: unexpected API response (missing ingestion job ID). "
575+
f"Scan results were saved to {ocsf_output.file_path}"
576+
)
577+
521578
# Compliance Frameworks
522579
input_compliance_frameworks = set(output_options.output_modes).intersection(
523580
get_available_compliance_frameworks(provider)

prowler/config/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ def get_available_compliance_frameworks(provider=None):
120120
encoding_format_utf_8 = "utf-8"
121121
available_output_formats = ["csv", "json-asff", "json-ocsf", "html"]
122122

123+
# Prowler Cloud API settings
124+
cloud_api_base_url = os.getenv("PROWLER_CLOUD_API_BASE", "https://api.prowler.com")
125+
cloud_api_key = os.getenv("PROWLER_API_KEY", "")
126+
cloud_api_ingestion_path = "/api/v1/ingestions"
127+
123128

124129
def set_output_timestamp(
125130
new_timestamp: datetime,

prowler/lib/cli/parser.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,14 @@ def __init_outputs_parser__(self):
215215
default=False,
216216
help="Set the output timestamp format as unix timestamps instead of iso format timestamps (default mode).",
217217
)
218+
common_outputs_parser.add_argument(
219+
"--export-ocsf",
220+
action="store_true",
221+
help=(
222+
"Send OCSF output to Prowler Cloud ingestion endpoint. "
223+
"Requires PROWLER_API_KEY environment variable."
224+
),
225+
)
218226

219227
def __init_logging_parser__(self):
220228
# Logging Options
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import os
2+
from typing import Any, Dict, Optional
3+
4+
import requests
5+
6+
from prowler.config.config import (
7+
cloud_api_base_url,
8+
cloud_api_ingestion_path,
9+
cloud_api_key,
10+
)
11+
12+
13+
def send_ocsf_to_api(
14+
file_path: str,
15+
*,
16+
base_url: Optional[str] = None,
17+
api_key: Optional[str] = None,
18+
timeout: int = 60,
19+
) -> Dict[str, Any]:
20+
"""Send OCSF file to the Prowler Cloud ingestion endpoint.
21+
22+
Args:
23+
file_path: Path to the OCSF JSON file to upload.
24+
base_url: API base URL. Falls back to PROWLER_CLOUD_API_BASE env var,
25+
then to https://api.prowler.com.
26+
api_key: API key. Falls back to PROWLER_API_KEY env var.
27+
timeout: Request timeout in seconds.
28+
29+
Returns:
30+
Parsed JSON:API response dict.
31+
32+
Raises:
33+
FileNotFoundError: If the OCSF file does not exist.
34+
ValueError: If no API key is available.
35+
requests.HTTPError: If the API returns an error status.
36+
"""
37+
if not file_path:
38+
raise ValueError("No OCSF file path provided.")
39+
40+
if not os.path.isfile(file_path):
41+
raise FileNotFoundError(f"OCSF file not found: {file_path}")
42+
43+
api_key = api_key or cloud_api_key
44+
if not api_key:
45+
raise ValueError("Missing API key. Set PROWLER_API_KEY environment variable.")
46+
47+
base_url = base_url or cloud_api_base_url
48+
base_url = base_url.rstrip("/")
49+
if not base_url.lower().startswith(("http://", "https://")):
50+
base_url = f"https://{base_url}"
51+
52+
url = f"{base_url}{cloud_api_ingestion_path}"
53+
54+
with open(file_path, "rb") as fh:
55+
response = requests.post(
56+
url,
57+
headers={
58+
"Authorization": f"Api-Key {api_key}",
59+
"Accept": "application/vnd.api+json",
60+
},
61+
files={"file": (os.path.basename(file_path), fh, "application/json")},
62+
timeout=timeout,
63+
)
64+
response.raise_for_status()
65+
return response.json() if response.text else {}

prowler/lib/outputs/ocsf/ocsf.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
3-
from datetime import datetime
3+
from datetime import datetime, timezone
4+
from random import getrandbits
45
from typing import List
56

67
from py_ocsf_models.events.base_event import SeverityID, StatusID
@@ -17,6 +18,7 @@
1718
from py_ocsf_models.objects.product import Product
1819
from py_ocsf_models.objects.remediation import Remediation
1920
from py_ocsf_models.objects.resource_details import ResourceDetails
21+
from uuid6 import UUID
2022

2123
from prowler.lib.logger import logger
2224
from prowler.lib.outputs.finding import Finding
@@ -52,6 +54,10 @@ def transform(self, findings: List[Finding]) -> None:
5254
findings (List[Finding]): a list of Finding objects
5355
"""
5456
try:
57+
if not findings:
58+
return
59+
60+
scan_id = _uuid7_from_timestamp(findings[0].timestamp)
5561
for finding in findings:
5662
finding_activity = ActivityID.Create
5763
cloud_account_type = self.get_account_type_id_by_provider(
@@ -163,6 +169,7 @@ def transform(self, findings: List[Finding]) -> None:
163169
"additional_urls": finding.metadata.AdditionalURLs,
164170
"notes": finding.metadata.Notes,
165171
"compliance": finding.compliance,
172+
"scan_id": str(scan_id),
166173
},
167174
)
168175
if finding.provider != "kubernetes":
@@ -295,3 +302,26 @@ def get_finding_status_id(muted: bool) -> StatusID:
295302
if muted:
296303
status_id = StatusID.Suppressed
297304
return status_id
305+
306+
307+
# NOTE: Copied from api/src/backend/api/uuid_utils.py (datetime_to_uuid7)
308+
# Adapted to accept datetime/epoch inputs.
309+
def _uuid7_from_timestamp(value) -> UUID:
310+
if isinstance(value, datetime):
311+
dt = value
312+
else:
313+
dt = datetime.fromtimestamp(int(value), tz=timezone.utc)
314+
if dt.tzinfo is None:
315+
dt = dt.replace(tzinfo=timezone.utc)
316+
317+
timestamp_ms = int(dt.timestamp() * 1000) & 0xFFFFFFFFFFFF
318+
rand_seq = getrandbits(12)
319+
rand_node = getrandbits(62)
320+
321+
uuid_int = timestamp_ms << 80
322+
uuid_int |= 0x7 << 76
323+
uuid_int |= rand_seq << 64
324+
uuid_int |= 0x2 << 62
325+
uuid_int |= rand_node
326+
327+
return UUID(int=uuid_int)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ dependencies = [
7070
"slack-sdk==3.39.0",
7171
"tabulate==0.9.0",
7272
"tzlocal==5.3.1",
73+
"uuid6==2024.7.10",
7374
"py-iam-expand==0.1.0",
7475
"h2==4.3.0",
7576
"oci==2.160.3",

tests/lib/outputs/ocsf/ocsf_test.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime, timezone
33
from io import StringIO
44
from typing import Optional
5+
from uuid import UUID
56

67
import requests
78
from freezegun import freeze_time
@@ -101,7 +102,10 @@ def test_transform(self):
101102
output_data.type_name
102103
== f"Detection Finding: {DetectionFindingTypeID.Create.name}"
103104
)
104-
assert output_data.unmapped == {
105+
unmapped = output_data.unmapped
106+
scan_id = unmapped.pop("scan_id")
107+
assert UUID(scan_id) # Valid UUID
108+
assert unmapped == {
105109
"related_url": findings[0].metadata.RelatedUrl,
106110
"categories": findings[0].metadata.Categories,
107111
"depends_on": findings[0].metadata.DependsOn,
@@ -260,7 +264,11 @@ def test_batch_write_data_to_file(self):
260264

261265
mock_file.seek(0)
262266
content = mock_file.read()
263-
assert json.loads(content) == expected_json_output
267+
actual_output = json.loads(content)
268+
# scan_id is non-deterministic (UUID7), validate and remove before comparison
269+
actual_scan_id = actual_output[0]["unmapped"].pop("scan_id")
270+
assert UUID(actual_scan_id)
271+
assert actual_output == expected_json_output
264272

265273
def test_batch_write_data_to_file_without_findings(self):
266274
assert not OCSF([])._file_descriptor
@@ -318,7 +326,10 @@ def test_finding_output_cloud_pass_low_muted(self):
318326
assert finding_ocsf.risk_details == finding_output.metadata.Risk
319327

320328
# Unmapped Data
321-
assert finding_ocsf.unmapped == {
329+
unmapped = finding_ocsf.unmapped
330+
scan_id = unmapped.pop("scan_id")
331+
assert UUID(scan_id) # Valid UUID
332+
assert unmapped == {
322333
"related_url": finding_output.metadata.RelatedUrl,
323334
"categories": finding_output.metadata.Categories,
324335
"depends_on": finding_output.metadata.DependsOn,

0 commit comments

Comments
 (0)