Skip to content

Commit 75c7f61

Browse files
authored
feat(api): private labels and properties in Attack Paths graph - phase 1 (#10124)
1 parent b5d2a75 commit 75c7f61

File tree

8 files changed

+133
-28
lines changed

8 files changed

+133
-28
lines changed

api/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ All notable changes to the **Prowler API** are documented in this file.
2525
- AI agent guidelines with TDD and testing skills references [(#9925)](https://github.com/prowler-cloud/prowler/pull/9925)
2626
- Attack Paths: Upgrade Cartography from fork 0.126.1 to upstream 0.129.0 and Neo4j driver from 5.x to 6.x [(#10110)](https://github.com/prowler-cloud/prowler/pull/10110)
2727
- Attack Paths: Query results now filtered by provider, preventing future cross-tenant and cross-provider data leakage [(#10118)](https://github.com/prowler-cloud/prowler/pull/10118)
28+
- Attack Paths: Add private labels and properties in Attack Paths graphs for avoiding future overlapping with Cartography's ones [(#10124)](https://github.com/prowler-cloud/prowler/pull/10124)
2829

2930
### 🐞 Fixed
3031

api/src/backend/api/attack_paths/database.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
from django.conf import settings
1313

1414
from api.attack_paths.retryable_session import RetryableSession
15-
from tasks.jobs.attack_paths.config import BATCH_SIZE, PROVIDER_RESOURCE_LABEL
15+
from tasks.jobs.attack_paths.config import (
16+
BATCH_SIZE,
17+
DEPRECATED_PROVIDER_RESOURCE_LABEL,
18+
)
1619

1720
# Without this Celery goes crazy with Neo4j logging
1821
logging.getLogger("neo4j").setLevel(logging.ERROR)
@@ -128,7 +131,7 @@ def drop_subgraph(database: str, provider_id: str) -> int:
128131
while deleted_count > 0:
129132
result = session.run(
130133
f"""
131-
MATCH (n:{PROVIDER_RESOURCE_LABEL} {{provider_id: $provider_id}})
134+
MATCH (n:{DEPRECATED_PROVIDER_RESOURCE_LABEL} {{provider_id: $provider_id}})
132135
WITH n LIMIT $batch_size
133136
DETACH DELETE n
134137
RETURN COUNT(n) AS deleted_nodes_count

api/src/backend/tasks/jobs/attack_paths/config.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,17 @@
1010
BATCH_SIZE = env.int("ATTACK_PATHS_BATCH_SIZE", 1000)
1111

1212
# Neo4j internal labels (Prowler-specific, not provider-specific)
13-
# - `ProwlerFinding`: Label for finding nodes created by Prowler and linked to cloud resources.
14-
# - `ProviderResource`: Added to ALL synced nodes for provider isolation and drop/query ops.
15-
# - `Internet`: Singleton node representing external internet access for exposed-resource queries.
13+
# - `ProwlerFinding`: Label for finding nodes created by Prowler and linked to cloud resources
14+
# - `_ProviderResource`: Added to ALL synced nodes for provider isolation and drop/query ops
15+
# - `Internet`: Singleton node representing external internet access for exposed-resource queries
1616
PROWLER_FINDING_LABEL = "ProwlerFinding"
17-
PROVIDER_RESOURCE_LABEL = "ProviderResource"
17+
PROVIDER_RESOURCE_LABEL = "_ProviderResource"
1818
INTERNET_NODE_LABEL = "Internet"
1919

20+
# Phase 1 dual-write: deprecated label kept for drop_subgraph and infrastructure queries
21+
# Remove in Phase 2 once all nodes use the private label exclusively
22+
DEPRECATED_PROVIDER_RESOURCE_LABEL = "ProviderResource"
23+
2024

2125
@dataclass(frozen=True)
2226
class ProviderConfig:
@@ -26,7 +30,8 @@ class ProviderConfig:
2630
root_node_label: str # e.g., "AWSAccount"
2731
uid_field: str # e.g., "arn"
2832
# Label for resources connected to the account node, enabling indexed finding lookups.
29-
resource_label: str # e.g., "AWSResource"
33+
resource_label: str # e.g., "_AWSResource"
34+
deprecated_resource_label: str # e.g., "AWSResource"
3035
ingestion_function: Callable
3136

3237

@@ -37,7 +42,8 @@ class ProviderConfig:
3742
name="aws",
3843
root_node_label="AWSAccount",
3944
uid_field="arn",
40-
resource_label="AWSResource",
45+
resource_label="_AWSResource",
46+
deprecated_resource_label="AWSResource",
4147
ingestion_function=aws.start_aws_ingestion,
4248
)
4349

@@ -48,10 +54,12 @@ class ProviderConfig:
4854
# Labels added by Prowler that should be filtered from API responses
4955
# Derived from provider configs + common internal labels
5056
INTERNAL_LABELS: list[str] = [
51-
"Tenant",
57+
"Tenant", # From Cartography, but it looks like it's ours
5258
PROVIDER_RESOURCE_LABEL,
59+
DEPRECATED_PROVIDER_RESOURCE_LABEL,
5360
# Add all provider-specific resource labels
5461
*[config.resource_label for config in PROVIDER_CONFIGS.values()],
62+
*[config.deprecated_resource_label for config in PROVIDER_CONFIGS.values()],
5563
]
5664

5765

@@ -83,6 +91,12 @@ def get_node_uid_field(provider_type: str) -> str:
8391

8492

8593
def get_provider_resource_label(provider_type: str) -> str:
86-
"""Get the resource label for a provider type (e.g., `AWSResource`)."""
94+
"""Get the resource label for a provider type (e.g., `_AWSResource`)."""
95+
config = PROVIDER_CONFIGS.get(provider_type)
96+
return config.resource_label if config else "_UnknownProviderResource"
97+
98+
99+
def get_deprecated_provider_resource_label(provider_type: str) -> str:
100+
"""Get the deprecated resource label for a provider type (e.g., `AWSResource`)."""
87101
config = PROVIDER_CONFIGS.get(provider_type)
88-
return config.resource_label if config else "UnknownProviderResource"
102+
return config.deprecated_resource_label if config else "UnknownProviderResource"

api/src/backend/tasks/jobs/attack_paths/findings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from prowler.config import config as ProwlerConfig
2626
from tasks.jobs.attack_paths.config import (
2727
BATCH_SIZE,
28+
get_deprecated_provider_resource_label,
2829
get_node_uid_field,
2930
get_provider_resource_label,
3031
get_root_node_label,
@@ -152,6 +153,9 @@ def add_resource_label(
152153
{
153154
"__ROOT_LABEL__": get_root_node_label(provider_type),
154155
"__RESOURCE_LABEL__": get_provider_resource_label(provider_type),
156+
"__DEPRECATED_RESOURCE_LABEL__": get_deprecated_provider_resource_label(
157+
provider_type
158+
),
155159
},
156160
)
157161

api/src/backend/tasks/jobs/attack_paths/indexes.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from celery.utils.log import get_task_logger
77

88
from tasks.jobs.attack_paths.config import (
9+
DEPRECATED_PROVIDER_RESOURCE_LABEL,
910
INTERNET_NODE_LABEL,
1011
PROWLER_FINDING_LABEL,
1112
PROVIDER_RESOURCE_LABEL,
@@ -23,9 +24,11 @@ class IndexType(Enum):
2324

2425
# Indexes for Prowler findings and resource lookups
2526
FINDINGS_INDEX_STATEMENTS = [
26-
# Resources indexes for quick Prowler Finding lookups
27-
"CREATE INDEX aws_resource_arn IF NOT EXISTS FOR (n:AWSResource) ON (n.arn);",
28-
"CREATE INDEX aws_resource_id IF NOT EXISTS FOR (n:AWSResource) ON (n.id);",
27+
# Resource indexes for Prowler Finding lookups
28+
"CREATE INDEX aws_resource_arn IF NOT EXISTS FOR (n:_AWSResource) ON (n.arn);",
29+
"CREATE INDEX aws_resource_id IF NOT EXISTS FOR (n:_AWSResource) ON (n.id);",
30+
"CREATE INDEX deprecated_aws_resource_arn IF NOT EXISTS FOR (n:AWSResource) ON (n.arn);",
31+
"CREATE INDEX deprecated_aws_resource_id IF NOT EXISTS FOR (n:AWSResource) ON (n.id);",
2932
# Prowler Finding indexes
3033
f"CREATE INDEX prowler_finding_id IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.id);",
3134
f"CREATE INDEX prowler_finding_provider_uid IF NOT EXISTS FOR (n:{PROWLER_FINDING_LABEL}) ON (n.provider_uid);",
@@ -37,8 +40,10 @@ class IndexType(Enum):
3740

3841
# Indexes for provider resource sync operations
3942
SYNC_INDEX_STATEMENTS = [
40-
f"CREATE INDEX provider_element_id IF NOT EXISTS FOR (n:{PROVIDER_RESOURCE_LABEL}) ON (n.provider_element_id);",
41-
f"CREATE INDEX provider_resource_provider_id IF NOT EXISTS FOR (n:{PROVIDER_RESOURCE_LABEL}) ON (n.provider_id);",
43+
f"CREATE INDEX provider_element_id IF NOT EXISTS FOR (n:{PROVIDER_RESOURCE_LABEL}) ON (n._provider_element_id);",
44+
f"CREATE INDEX provider_resource_provider_id IF NOT EXISTS FOR (n:{PROVIDER_RESOURCE_LABEL}) ON (n._provider_id);",
45+
f"CREATE INDEX deprecated_provider_element_id IF NOT EXISTS FOR (n:{DEPRECATED_PROVIDER_RESOURCE_LABEL}) ON (n.provider_element_id);",
46+
f"CREATE INDEX deprecated_provider_resource_provider_id IF NOT EXISTS FOR (n:{DEPRECATED_PROVIDER_RESOURCE_LABEL}) ON (n.provider_id);",
4247
]
4348

4449

api/src/backend/tasks/jobs/attack_paths/queries.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def render_cypher_template(template: str, replacements: dict[str, str]) -> str:
2626
MATCH (account:__ROOT_LABEL__ {id: $provider_uid})-->(r)
2727
WHERE NOT r:__ROOT_LABEL__ AND NOT r:__RESOURCE_LABEL__
2828
WITH r LIMIT $batch_size
29-
SET r:__RESOURCE_LABEL__
29+
SET r:__RESOURCE_LABEL__:__DEPRECATED_RESOURCE_LABEL__
3030
RETURN COUNT(r) AS labeled_count
3131
"""
3232

@@ -151,16 +151,20 @@ def render_cypher_template(template: str, replacements: dict[str, str]) -> str:
151151

152152
NODE_SYNC_TEMPLATE = """
153153
UNWIND $rows AS row
154-
MERGE (n:__NODE_LABELS__ {provider_element_id: row.provider_element_id})
154+
MERGE (n:__NODE_LABELS__ {_provider_element_id: row.provider_element_id})
155155
SET n += row.props
156+
SET n._provider_id = $provider_id
157+
SET n.provider_element_id = row.provider_element_id
156158
SET n.provider_id = $provider_id
157-
"""
159+
""" # The last two lines are deprecated properties
158160

159161
RELATIONSHIP_SYNC_TEMPLATE = f"""
160162
UNWIND $rows AS row
161-
MATCH (s:{PROVIDER_RESOURCE_LABEL} {{provider_element_id: row.start_element_id}})
162-
MATCH (t:{PROVIDER_RESOURCE_LABEL} {{provider_element_id: row.end_element_id}})
163-
MERGE (s)-[r:__REL_TYPE__ {{provider_element_id: row.provider_element_id}}]->(t)
163+
MATCH (s:{PROVIDER_RESOURCE_LABEL} {{_provider_element_id: row.start_element_id}})
164+
MATCH (t:{PROVIDER_RESOURCE_LABEL} {{_provider_element_id: row.end_element_id}})
165+
MERGE (s)-[r:__REL_TYPE__ {{_provider_element_id: row.provider_element_id}}]->(t)
164166
SET r += row.props
167+
SET r._provider_id = $provider_id
168+
SET r.provider_element_id = row.provider_element_id
165169
SET r.provider_id = $provider_id
166-
"""
170+
""" # The last two lines are deprecated properties

api/src/backend/tasks/jobs/attack_paths/sync.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@
1111
from celery.utils.log import get_task_logger
1212

1313
from api.attack_paths import database as graph_database
14-
from tasks.jobs.attack_paths.config import BATCH_SIZE, PROVIDER_RESOURCE_LABEL
14+
from tasks.jobs.attack_paths.config import (
15+
BATCH_SIZE,
16+
DEPRECATED_PROVIDER_RESOURCE_LABEL,
17+
PROVIDER_RESOURCE_LABEL,
18+
)
1519
from tasks.jobs.attack_paths.indexes import IndexType, create_indexes
1620
from tasks.jobs.attack_paths.queries import (
1721
NODE_FETCH_QUERY,
@@ -70,7 +74,7 @@ def sync_nodes(
7074
"""
7175
Sync nodes from source to target database.
7276
73-
Adds `ProviderResource` label and `provider_id` property to all nodes.
77+
Adds `_ProviderResource` label and `_provider_id` property to all nodes.
7478
"""
7579
last_id = -1
7680
total_synced = 0
@@ -108,6 +112,7 @@ def sync_nodes(
108112
for labels, batch in grouped.items():
109113
label_set = set(labels)
110114
label_set.add(PROVIDER_RESOURCE_LABEL)
115+
label_set.add(DEPRECATED_PROVIDER_RESOURCE_LABEL)
111116
node_labels = ":".join(f"`{label}`" for label in sorted(label_set))
112117

113118
query = render_cypher_template(
@@ -137,7 +142,7 @@ def sync_relationships(
137142
"""
138143
Sync relationships from source to target database.
139144
140-
Adds `provider_id` property to all relationships.
145+
Adds `_provider_id` property to all relationships.
141146
"""
142147
last_id = -1
143148
total_synced = 0
@@ -196,7 +201,9 @@ def sync_relationships(
196201
def _strip_internal_properties(props: dict[str, Any]) -> None:
197202
"""Remove internal properties that shouldn't be copied during sync."""
198203
for key in [
199-
"provider_element_id",
200-
"provider_id",
204+
"_provider_element_id",
205+
"_provider_id",
206+
"provider_element_id", # Deprecated
207+
"provider_id", # Deprecated
201208
]:
202209
props.pop(key, None)

api/src/backend/tasks/tests/test_attack_paths_scan.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
import pytest
66
from tasks.jobs.attack_paths import findings as findings_module
77
from tasks.jobs.attack_paths import internet as internet_module
8+
from tasks.jobs.attack_paths import sync as sync_module
9+
from tasks.jobs.attack_paths.config import (
10+
get_deprecated_provider_resource_label,
11+
)
812
from tasks.jobs.attack_paths.scan import run as attack_paths_run
913

1014
from api.models import (
@@ -1073,6 +1077,69 @@ def empty_gen():
10731077
mock_session.run.assert_not_called()
10741078

10751079

1080+
class TestProviderConfigAccessors:
1081+
def test_get_deprecated_provider_resource_label_known_provider(self):
1082+
assert get_deprecated_provider_resource_label("aws") == "AWSResource"
1083+
1084+
def test_get_deprecated_provider_resource_label_unknown_provider(self):
1085+
assert (
1086+
get_deprecated_provider_resource_label("unknown")
1087+
== "UnknownProviderResource"
1088+
)
1089+
1090+
1091+
class TestAddResourceLabel:
1092+
def test_add_resource_label_applies_both_labels(self):
1093+
mock_session = MagicMock()
1094+
1095+
first_result = MagicMock()
1096+
first_result.single.return_value = {"labeled_count": 5}
1097+
second_result = MagicMock()
1098+
second_result.single.return_value = {"labeled_count": 0}
1099+
mock_session.run.side_effect = [first_result, second_result]
1100+
1101+
total = findings_module.add_resource_label(mock_session, "aws", "123456789012")
1102+
1103+
assert total == 5
1104+
assert mock_session.run.call_count == 2
1105+
query = mock_session.run.call_args_list[0].args[0]
1106+
assert "_AWSResource" in query
1107+
assert "AWSResource" in query
1108+
1109+
1110+
class TestSyncNodes:
1111+
def test_sync_nodes_adds_both_labels(self):
1112+
mock_source_session = MagicMock()
1113+
mock_target_session = MagicMock()
1114+
1115+
row = {
1116+
"internal_id": 1,
1117+
"element_id": "elem-1",
1118+
"labels": ["SomeLabel"],
1119+
"props": {"key": "value"},
1120+
}
1121+
mock_source_session.run.side_effect = [[row], []]
1122+
1123+
source_ctx = MagicMock()
1124+
source_ctx.__enter__ = MagicMock(return_value=mock_source_session)
1125+
source_ctx.__exit__ = MagicMock(return_value=False)
1126+
1127+
target_ctx = MagicMock()
1128+
target_ctx.__enter__ = MagicMock(return_value=mock_target_session)
1129+
target_ctx.__exit__ = MagicMock(return_value=False)
1130+
1131+
with patch(
1132+
"tasks.jobs.attack_paths.sync.graph_database.get_session",
1133+
side_effect=[source_ctx, target_ctx],
1134+
):
1135+
total = sync_module.sync_nodes("source-db", "target-db", "prov-1")
1136+
1137+
assert total == 1
1138+
query = mock_target_session.run.call_args.args[0]
1139+
assert "_ProviderResource" in query
1140+
assert "ProviderResource" in query
1141+
1142+
10761143
class TestInternetAnalysis:
10771144
def _make_provider_and_config(self):
10781145
provider = MagicMock()

0 commit comments

Comments
 (0)