Skip to content

Commit 5a85db1

Browse files
authored
feat(attack-paths): Task and endpoints (#9344)
- Added support to Neo4j - Added Cartography as Attack Paths Scan - Added Attack Path Scans endpoints for their management and run queries on those scan
1 parent 2b86078 commit 5a85db1

40 files changed

+4424
-144
lines changed

.env

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,25 @@ POSTGRES_DB=prowler_db
4141
# POSTGRES_REPLICA_MAX_ATTEMPTS=3
4242
# POSTGRES_REPLICA_RETRY_BASE_DELAY=0.5
4343

44+
# Neo4j auth
45+
NEO4J_HOST=neo4j
46+
NEO4J_PORT=7687
47+
NEO4J_USER=neo4j
48+
NEO4J_PASSWORD=neo4j_password
49+
# Neo4j settings
50+
NEO4J_SERVER_MEMORY_PAGECACHE_SIZE=1G
51+
NEO4J_SERVER_MEMORY_HEAP_INITIAL__SIZE=1G
52+
NEO4J_SERVER_MEMORY_HEAP_MAX__SIZE=1G
53+
NEO4J_POC_EXPORT_FILE_ENABLED=true
54+
NEO4J_APOC_IMPORT_FILE_ENABLED=true
55+
NEO4J_APOC_IMPORT_FILE_USE_NEO4J_CONFIG=true
56+
NEO4J_PLUGINS=["apoc"]
57+
NEO4J_DBMS_SECURITY_PROCEDURES_ALLOWLIST=apoc.*
58+
NEO4J_DBMS_SECURITY_PROCEDURES_UNRESTRICTED=apoc.*
59+
NEO4J_DBMS_CONNECTOR_BOLT_LISTEN_ADDRESS=0.0.0.0:7687
60+
# Neo4j Prowler settings
61+
NEO4J_INSERT_BATCH_SIZE=500
62+
4463
# Celery-Prowler task settings
4564
TASK_RETRY_DELAY_SECONDS=0.1
4665
TASK_RETRY_ATTEMPTS=5

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,23 @@ prowler dashboard
7575
```
7676
![Prowler Dashboard](docs/images/products/dashboard.png)
7777

78+
79+
## Attack Paths
80+
81+
Attack Paths automatically extends every completed AWS scan with a Neo4j graph that combines Cartography's cloud inventory with Prowler findings. The feature runs in the API worker after each scan and therefore requires:
82+
83+
- An accessible Neo4j instance (the Docker Compose files already ships a `neo4j` service).
84+
- The following environment variables so Django and Celery can connect:
85+
86+
| Variable | Description | Default |
87+
| --- | --- | --- |
88+
| `NEO4J_HOST` | Hostname used by the API containers. | `neo4j` |
89+
| `NEO4J_PORT` | Bolt port exposed by Neo4j. | `7687` |
90+
| `NEO4J_USER` / `NEO4J_PASSWORD` | Credentials with rights to create per-tenant databases. | `neo4j` / `neo4j_password` |
91+
92+
Every AWS provider scan will enqueue an Attack Paths ingestion job automatically. Other cloud providers will be added in future iterations.
93+
94+
7895
# Prowler at a Glance
7996
> [!Tip]
8097
> For the most accurate and up-to-date information about checks, services, frameworks, and categories, visit [**Prowler Hub**](https://hub.prowler.com).

api/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ All notable changes to the **Prowler API** are documented in this file.
44

55
## [1.16.0] (Unreleased)
66

7+
### Added
8+
- Attack Paths backend support [(#9344)](https://github.com/prowler-cloud/prowler/pull/9344)
9+
710
### Changed
811
- Restore the compliance overview endpoint's mandatory filters [(#9330)](https://github.com/prowler-cloud/prowler/pull/9330)
912

api/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ FROM python:3.12.10-slim-bookworm AS build
22

33
LABEL maintainer="https://github.com/prowler-cloud/api"
44

5+
ARG CARTOGRAPHY_VERSION=0.117.0
6+
ENV CARTOGRAPHY_VERSION=${CARTOGRAPHY_VERSION}
7+
58
ARG POWERSHELL_VERSION=7.5.0
69
ENV POWERSHELL_VERSION=${POWERSHELL_VERSION}
710

@@ -79,6 +82,8 @@ ENV PATH="/home/prowler/.local/bin:$PATH"
7982
RUN poetry install --no-root && \
8083
rm -rf ~/.cache/pip
8184

85+
RUN poetry run python -m pip install cartography==${CARTOGRAPHY_VERSION}
86+
8287
RUN poetry run python "$(poetry env info --path)/src/prowler/prowler/providers/m365/lib/powershell/m365_powershell.py"
8388

8489
COPY src/backend/ ./backend/

api/poetry.lock

Lines changed: 22 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ dependencies = [
3535
"markdown (>=3.9,<4.0)",
3636
"drf-simple-apikey (==2.2.1)",
3737
"matplotlib (>=3.10.6,<4.0.0)",
38-
"reportlab (>=4.4.4,<5.0.0)"
38+
"reportlab (>=4.4.4,<5.0.0)",
39+
"neo4j (<6.0.0)",
3940
]
4041
description = "Prowler's API (Django/DRF)"
4142
license = "Apache-2.0"

api/src/backend/api/apps.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import atexit
23
import os
34
import sys
45
from pathlib import Path
@@ -30,6 +31,7 @@ class ApiConfig(AppConfig):
3031
def ready(self):
3132
from api import schema_extensions # noqa: F401
3233
from api import signals # noqa: F401
34+
from api.attack_paths import database as graph_database
3335
from api.compliance import load_prowler_compliance
3436

3537
# Generate required cryptographic keys if not present, but only if:
@@ -39,6 +41,10 @@ def ready(self):
3941
if "manage.py" not in sys.argv or os.environ.get("RUN_MAIN"):
4042
self._ensure_crypto_keys()
4143

44+
if not getattr(settings, "TESTING", False):
45+
graph_database.init_driver()
46+
atexit.register(graph_database.close_driver)
47+
4248
load_prowler_compliance()
4349

4450
def _ensure_crypto_keys(self):
@@ -54,7 +60,7 @@ def _ensure_crypto_keys(self):
5460
global _keys_initialized
5561

5662
# Skip key generation if running tests
57-
if hasattr(settings, "TESTING") and settings.TESTING:
63+
if getattr(settings, "TESTING", False):
5864
return
5965

6066
# Skip if already initialized in this process
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from api.attack_paths.query_definitions import (
2+
AttackPathsQueryDefinition,
3+
AttackPathsQueryParameterDefinition,
4+
get_queries_for_provider,
5+
get_query_by_id,
6+
)
7+
8+
__all__ = [
9+
"AttackPathsQueryDefinition",
10+
"AttackPathsQueryParameterDefinition",
11+
"get_queries_for_provider",
12+
"get_query_by_id",
13+
]
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import logging
2+
import threading
3+
4+
from contextlib import contextmanager
5+
from typing import Iterator
6+
from uuid import UUID
7+
8+
import neo4j
9+
10+
from django.conf import settings
11+
12+
import neo4j.exceptions
13+
14+
# Without this Celery goes crazy with Neo4j logging
15+
logging.getLogger("neo4j").setLevel(logging.ERROR)
16+
logging.getLogger("neo4j").propagate = False
17+
18+
# Module-level process-wide driver singleton
19+
_driver: neo4j.Driver | None = None
20+
_lock = threading.Lock()
21+
22+
# Base Neo4j functions
23+
24+
25+
def get_uri() -> str:
26+
host = settings.DATABASES["neo4j"]["HOST"]
27+
port = settings.DATABASES["neo4j"]["PORT"]
28+
return f"bolt://{host}:{port}"
29+
30+
31+
def init_driver() -> neo4j.Driver:
32+
global _driver
33+
if _driver is not None:
34+
return _driver
35+
36+
with _lock:
37+
if _driver is None:
38+
uri = get_uri()
39+
config = settings.DATABASES["neo4j"]
40+
41+
_driver = neo4j.GraphDatabase.driver(
42+
uri, auth=(config["USER"], config["PASSWORD"])
43+
)
44+
_driver.verify_connectivity()
45+
46+
return _driver
47+
48+
49+
def get_driver() -> neo4j.Driver:
50+
return init_driver()
51+
52+
53+
def close_driver() -> None: # TODO: Use it
54+
global _driver
55+
with _lock:
56+
if _driver is not None:
57+
try:
58+
_driver.close()
59+
60+
finally:
61+
_driver = None
62+
63+
64+
@contextmanager
65+
def get_session(database: str | None = None) -> Iterator[neo4j.Session]:
66+
try:
67+
with get_driver().session(database=database) as session:
68+
yield session
69+
70+
except neo4j.exceptions.Neo4jError as exc:
71+
raise GraphDatabaseQueryException(message=exc.message, code=exc.code)
72+
73+
74+
def create_database(database: str) -> None:
75+
query = "CREATE DATABASE $database IF NOT EXISTS"
76+
parameters = {"database": database}
77+
78+
with get_session() as session:
79+
session.run(query, parameters)
80+
81+
82+
def drop_database(database: str) -> None:
83+
query = f"DROP DATABASE `{database}` IF EXISTS DESTROY DATA"
84+
85+
with get_session() as session:
86+
session.run(query)
87+
88+
89+
def drop_subgraph(database: str, root_node_label: str, root_node_id: str) -> int:
90+
query = """
91+
MATCH (a:__ROOT_NODE_LABEL__ {id: $root_node_id})
92+
CALL apoc.path.subgraphNodes(a, {})
93+
YIELD node
94+
DETACH DELETE node
95+
RETURN COUNT(node) AS deleted_nodes_count
96+
""".replace("__ROOT_NODE_LABEL__", root_node_label)
97+
parameters = {"root_node_id": root_node_id}
98+
99+
with get_session(database) as session:
100+
result = session.run(query, parameters)
101+
102+
try:
103+
return result.single()["deleted_nodes_count"]
104+
105+
except neo4j.exceptions.ResultConsumedError:
106+
return 0 # As there are no nodes to delete, the result is empty
107+
108+
109+
# Neo4j functions related to Prowler + Cartography
110+
DATABASE_NAME_TEMPLATE = "db-{attack_paths_scan_id}"
111+
112+
113+
def get_database_name(attack_paths_scan_id: UUID) -> str:
114+
attack_paths_scan_id_str = str(attack_paths_scan_id).lower()
115+
return DATABASE_NAME_TEMPLATE.format(attack_paths_scan_id=attack_paths_scan_id_str)
116+
117+
118+
# Exceptions
119+
120+
121+
class GraphDatabaseQueryException(Exception):
122+
def __init__(self, message: str, code: str | None = None) -> None:
123+
super().__init__(message)
124+
self.message = message
125+
self.code = code
126+
127+
def __str__(self) -> str:
128+
if self.code:
129+
return f"{self.code}: {self.message}"
130+
131+
return self.message

0 commit comments

Comments
 (0)