Skip to content

Commit 787a339

Browse files
authored
feat(attack-paths): scans add tenant and provider related labels to nodes (#10308)
1 parent 1cf6eaa commit 787a339

File tree

9 files changed

+66
-7
lines changed

9 files changed

+66
-7
lines changed

api/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ All notable changes to the **Prowler API** are documented in this file.
99
- Attack Paths: Migrate network exposure queries from APOC to standard openCypher for Neo4j and Neptune compatibility [(#10266)](https://github.com/prowler-cloud/prowler/pull/10266)
1010
- Attack Paths: Complete migration to private graph labels and properties, removing deprecated dual-write support [(#10268)](https://github.com/prowler-cloud/prowler/pull/10268)
1111
- `POST /api/v1/providers` returns `409 Conflict` if already exists [(#10293)](https://github.com/prowler-cloud/prowler/pull/10293)
12+
- Attack Paths: Added tenant and provider related labels to the nodes so they can be easily filtered on custom queries [(#10308)](https://github.com/prowler-cloud/prowler/pull/10308)
1213

1314
### 🐞 Fixed
1415

api/poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/src/backend/api/attack_paths/views_helpers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
INTERNAL_LABELS,
1818
INTERNAL_PROPERTIES,
1919
PROVIDER_ID_PROPERTY,
20+
is_dynamic_isolation_label,
2021
)
2122

2223
logger = logging.getLogger(BackendLogger.API)
@@ -305,7 +306,11 @@ def _serialize_graph(graph, provider_id: str) -> dict[str, Any]:
305306

306307

307308
def _filter_labels(labels: Iterable[str]) -> list[str]:
308-
return [label for label in labels if label not in INTERNAL_LABELS]
309+
return [
310+
label
311+
for label in labels
312+
if label not in INTERNAL_LABELS and not is_dynamic_isolation_label(label)
313+
]
309314

310315

311316
def _serialize_properties(properties: dict[str, Any]) -> dict[str, Any]:

api/src/backend/api/tests/test_attack_paths.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,14 @@ def test_serialize_properties_filters_internal_fields():
363363
assert result == {"name": "prod"}
364364

365365

366+
def test_filter_labels_strips_dynamic_isolation_labels():
367+
labels = ["AWSRole", "_Tenant_abc123", "_Provider_def456", "_ProviderResource"]
368+
369+
result = views_helpers._filter_labels(labels)
370+
371+
assert result == ["AWSRole"]
372+
373+
366374
def test_serialize_graph_as_text_node_without_properties():
367375
graph = {
368376
"nodes": [{"id": "n1", "labels": ["AWSAccount"], "properties": {}}],

api/src/backend/tasks/jobs/attack_paths/config.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from dataclasses import dataclass
22
from typing import Callable
3+
from uuid import UUID
34

45
from config.env import env
56

@@ -17,6 +18,12 @@
1718
PROWLER_FINDING_LABEL = "ProwlerFinding"
1819
PROVIDER_RESOURCE_LABEL = "_ProviderResource"
1920

21+
# Dynamic isolation labels that contain entity UUIDs and are added to every synced node during sync
22+
# Format: _Tenant_{uuid_no_hyphens}, _Provider_{uuid_no_hyphens}
23+
TENANT_LABEL_PREFIX = "_Tenant_"
24+
PROVIDER_LABEL_PREFIX = "_Provider_"
25+
DYNAMIC_ISOLATION_PREFIXES = [TENANT_LABEL_PREFIX, PROVIDER_LABEL_PREFIX]
26+
2027

2128
@dataclass(frozen=True)
2229
class ProviderConfig:
@@ -107,3 +114,27 @@ def get_provider_resource_label(provider_type: str) -> str:
107114
"""Get the resource label for a provider type (e.g., `_AWSResource`)."""
108115
config = PROVIDER_CONFIGS.get(provider_type)
109116
return config.resource_label if config else "_UnknownProviderResource"
117+
118+
119+
# Dynamic Isolation Label Helpers
120+
# --------------------------------
121+
122+
123+
def _normalize_uuid(value: str | UUID) -> str:
124+
"""Strip hyphens from a UUID string for use in Neo4j labels."""
125+
return str(value).replace("-", "")
126+
127+
128+
def get_tenant_label(tenant_id: str | UUID) -> str:
129+
"""Get the Neo4j label for a tenant (e.g., `_Tenant_019c41ee7df37deca684d839f95619f8`)."""
130+
return f"{TENANT_LABEL_PREFIX}{_normalize_uuid(tenant_id)}"
131+
132+
133+
def get_provider_label(provider_id: str | UUID) -> str:
134+
"""Get the Neo4j label for a provider (e.g., `_Provider_019c41ee7df37deca684d839f95619f8`)."""
135+
return f"{PROVIDER_LABEL_PREFIX}{_normalize_uuid(provider_id)}"
136+
137+
138+
def is_dynamic_isolation_label(label: str) -> bool:
139+
"""Check if a label is a dynamic tenant/provider isolation label."""
140+
return any(label.startswith(prefix) for prefix in DYNAMIC_ISOLATION_PREFIXES)

api/src/backend/tasks/jobs/attack_paths/scan.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ def run(tenant_id: str, scan_id: str, task_id: str) -> dict[str, Any]:
237237
sync.sync_graph(
238238
source_database=tmp_database_name,
239239
target_database=tenant_database_name,
240+
tenant_id=str(prowler_api_provider.tenant_id),
240241
provider_id=str(prowler_api_provider.id),
241242
)
242243
db_utils.set_graph_data_ready(attack_paths_scan, True)

api/src/backend/tasks/jobs/attack_paths/sync.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
BATCH_SIZE,
1616
PROVIDER_ISOLATION_PROPERTIES,
1717
PROVIDER_RESOURCE_LABEL,
18+
get_provider_label,
19+
get_tenant_label,
1820
)
1921
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
2022
from tasks.jobs.attack_paths.queries import (
@@ -36,6 +38,7 @@ def create_sync_indexes(neo4j_session) -> None:
3638
def sync_graph(
3739
source_database: str,
3840
target_database: str,
41+
tenant_id: str,
3942
provider_id: str,
4043
) -> dict[str, int]:
4144
"""
@@ -44,6 +47,7 @@ def sync_graph(
4447
Args:
4548
`source_database`: The temporary scan database
4649
`target_database`: The tenant database
50+
`tenant_id`: The tenant ID for isolation
4751
`provider_id`: The provider ID for isolation
4852
4953
Returns:
@@ -52,6 +56,7 @@ def sync_graph(
5256
nodes_synced = sync_nodes(
5357
source_database,
5458
target_database,
59+
tenant_id,
5560
provider_id,
5661
)
5762
relationships_synced = sync_relationships(
@@ -69,12 +74,14 @@ def sync_graph(
6974
def sync_nodes(
7075
source_database: str,
7176
target_database: str,
77+
tenant_id: str,
7278
provider_id: str,
7379
) -> int:
7480
"""
7581
Sync nodes from source to target database.
7682
7783
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
84+
Also adds dynamic `_Tenant_{id}` and `_Provider_{id}` isolation labels.
7885
"""
7986
last_id = -1
8087
total_synced = 0
@@ -112,6 +119,8 @@ def sync_nodes(
112119
for labels, batch in grouped.items():
113120
label_set = set(labels)
114121
label_set.add(PROVIDER_RESOURCE_LABEL)
122+
label_set.add(get_tenant_label(tenant_id))
123+
label_set.add(get_provider_label(provider_id))
115124
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
116125

117126
query = render_cypher_template(

api/src/backend/tasks/tests/test_attack_paths_scan.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def test_run_success_flow(
151151
mock_sync.assert_called_once_with(
152152
source_database="db-scan-id",
153153
target_database="tenant-db",
154+
tenant_id=str(provider.tenant_id),
154155
provider_id=str(provider.id),
155156
)
156157
mock_get_ingestion.assert_called_once_with(provider.provider)
@@ -1118,12 +1119,15 @@ def test_sync_nodes_adds_private_label(self):
11181119
"tasks.jobs.attack_paths.sync.graph_database.get_session",
11191120
side_effect=[source_ctx, target_ctx],
11201121
):
1121-
total = sync_module.sync_nodes("source-db", "target-db", "prov-1")
1122+
total = sync_module.sync_nodes(
1123+
"source-db", "target-db", "tenant-1", "prov-1"
1124+
)
11221125

11231126
assert total == 1
11241127
query = mock_target_session.run.call_args.args[0]
11251128
assert "_ProviderResource" in query
1126-
assert "ProviderResource" not in query.replace("_ProviderResource", "")
1129+
assert "_Tenant_tenant1" in query
1130+
assert "_Provider_prov1" in query
11271131

11281132

11291133
class TestInternetAnalysis:

skills/prowler-attack-paths-query/SKILL.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,13 @@ https://raw.githubusercontent.com/cartography-cncf/cartography/refs/tags/0.126.0
252252

253253
**IMPORTANT**: Always match the schema version to the dependency version in `pyproject.toml`. Using master/main may reference node labels or properties that don't exist in the deployed version.
254254

255-
**Additional Prowler Labels**: The Attack Paths sync task adds extra labels:
255+
**Additional Prowler Labels**: The Attack Paths sync task adds labels that queries can reference:
256256

257257
- `ProwlerFinding` - Prowler finding nodes with `status`, `provider_uid` properties
258-
- `ProviderResource` - Generic resource marker
259-
- `{Provider}Resource` - Provider-specific marker (e.g., `AWSResource`)
260258
- `Internet` - Internet sentinel node with `_provider_id` property (used in network exposure queries)
261259

260+
Other internal labels (`_ProviderResource`, `_AWSResource`, `_Tenant_*`, `_Provider_*`) exist for isolation but should never be used in queries.
261+
262262
These are defined in `api/src/backend/tasks/jobs/attack_paths/config.py`.
263263

264264
### 3. Consult the Schema for Available Data

0 commit comments

Comments
 (0)