|
| 1 | +""" |
| 2 | +Benchmark script for Gap Analysis performance (Issue #587) |
| 3 | +=========================================================== |
| 4 | +
|
| 5 | +Measures wall-clock time and peak memory for: |
| 6 | + - MODE A: Original exhaustive traversal (always runs wildcard [*..20] twice) |
| 7 | + - MODE B: Optimized tiered pruning (early exit on strong/medium links) |
| 8 | +
|
| 9 | +Usage: |
| 10 | + # List available standards in Neo4j: |
| 11 | + python scripts/benchmark_gap.py --list-standards |
| 12 | +
|
| 13 | + # Run benchmark on two standards: |
| 14 | + python scripts/benchmark_gap.py --standard1 "OWASP Top 10 2021" --standard2 "NIST 800-53" |
| 15 | +
|
| 16 | +Requirements: |
| 17 | + Neo4j must be running (use: make docker-neo4j) |
| 18 | + NEO4J_URL env var or default: neo4j://neo4j:password@localhost:7687 |
| 19 | +""" |
| 20 | + |
| 21 | +import argparse |
| 22 | +import os |
| 23 | +import sys |
| 24 | +import time |
| 25 | +import tracemalloc |
| 26 | + |
| 27 | +# Bootstrap project root onto sys.path |
| 28 | +_project_root = os.path.join(os.path.dirname(__file__), "..") |
| 29 | +sys.path.insert(0, os.path.abspath(_project_root)) |
| 30 | + |
| 31 | +try: |
| 32 | + from neomodel import config as neo_config, db as neomodel_db |
| 33 | + |
| 34 | + # Must import the project's DB models so neomodel registers NeoStandard, |
| 35 | + # NeoCRE etc. — otherwise resolve_objects=True raises NodeClassNotDefined. |
| 36 | + import application.database.db # noqa: F401 |
| 37 | +except ImportError as exc: |
| 38 | + print(f"[ERROR] Could not import project modules: {exc}") |
| 39 | + print(" Make sure you run from the project root with venv activated.") |
| 40 | + sys.exit(1) |
| 41 | + |
| 42 | + |
| 43 | +def connect_neo4j(): |
| 44 | + url = os.environ.get("NEO4J_URL", "neo4j://neo4j:password@localhost:7687") |
| 45 | + neo_config.DATABASE_URL = url |
| 46 | + print(f" → Connected to Neo4j at: {url}\n") |
| 47 | + |
| 48 | + |
| 49 | +def list_available_standards(): |
| 50 | + connect_neo4j() |
| 51 | + results, _ = neomodel_db.cypher_query( |
| 52 | + "MATCH (n:NeoStandard) RETURN DISTINCT n.name ORDER BY n.name" |
| 53 | + ) |
| 54 | + if not results: |
| 55 | + print(" [!] No NeoStandard nodes found. Import data first:") |
| 56 | + print(" make import-neo4j") |
| 57 | + return |
| 58 | + print(f"Found {len(results)} standards:") |
| 59 | + for row in results: |
| 60 | + print(f" • {row[0]}") |
| 61 | + |
| 62 | + |
| 63 | +def run_original(name_1, name_2): |
| 64 | + """Original pre-PR#716 approach: always runs BOTH queries unconditionally.""" |
| 65 | + denylist = ["Cross-cutting concerns"] |
| 66 | + |
| 67 | + # Query 1 — wildcard (the expensive one) |
| 68 | + r1, _ = neomodel_db.cypher_query( |
| 69 | + """ |
| 70 | + MATCH (BaseStandard:NeoStandard {name: $name1}) |
| 71 | + MATCH (CompareStandard:NeoStandard {name: $name2}) |
| 72 | + MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) |
| 73 | + WITH p |
| 74 | + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE |
| 75 | + (n:NeoCRE OR n = BaseStandard OR n = CompareStandard) |
| 76 | + AND NOT n.name IN $denylist) |
| 77 | + RETURN p |
| 78 | + """, |
| 79 | + {"name1": name_1, "name2": name_2, "denylist": denylist}, |
| 80 | + resolve_objects=True, |
| 81 | + ) |
| 82 | + |
| 83 | + # Query 2 — filtered (also always ran) |
| 84 | + r2, _ = neomodel_db.cypher_query( |
| 85 | + """ |
| 86 | + MATCH (BaseStandard:NeoStandard {name: $name1}) |
| 87 | + MATCH (CompareStandard:NeoStandard {name: $name2}) |
| 88 | + MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|CONTAINS)*..20]-(CompareStandard)) |
| 89 | + WITH p |
| 90 | + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE |
| 91 | + (n:NeoCRE OR n = BaseStandard OR n = CompareStandard) |
| 92 | + AND NOT n.name IN $denylist) |
| 93 | + RETURN p |
| 94 | + """, |
| 95 | + {"name1": name_1, "name2": name_2, "denylist": denylist}, |
| 96 | + resolve_objects=True, |
| 97 | + ) |
| 98 | + |
| 99 | + return len(r1) + len(r2), 2 # paths, num_queries_run |
| 100 | + |
| 101 | + |
| 102 | +def run_optimized(name_1, name_2): |
| 103 | + """Tiered pruning from PR #716/#717: exits early when strong/medium links found.""" |
| 104 | + denylist = ["Cross-cutting concerns"] |
| 105 | + |
| 106 | + # Tier 1 — strong links only |
| 107 | + r, _ = neomodel_db.cypher_query( |
| 108 | + """ |
| 109 | + MATCH (BaseStandard:NeoStandard {name: $name1}) |
| 110 | + MATCH (CompareStandard:NeoStandard {name: $name2}) |
| 111 | + MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|SAME)*..20]-(CompareStandard)) |
| 112 | + WITH p |
| 113 | + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE |
| 114 | + (n:NeoCRE OR n = BaseStandard OR n = CompareStandard) |
| 115 | + AND NOT n.name IN $denylist) |
| 116 | + RETURN p |
| 117 | + """, |
| 118 | + {"name1": name_1, "name2": name_2, "denylist": denylist}, |
| 119 | + resolve_objects=True, |
| 120 | + ) |
| 121 | + if r: |
| 122 | + return len(r), 1, "Tier 1 — strong links (LINKED_TO/SAME/AUTO)" |
| 123 | + |
| 124 | + # Tier 2 — adds CONTAINS |
| 125 | + r, _ = neomodel_db.cypher_query( |
| 126 | + """ |
| 127 | + MATCH (BaseStandard:NeoStandard {name: $name1}) |
| 128 | + MATCH (CompareStandard:NeoStandard {name: $name2}) |
| 129 | + MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|AUTOMATICALLY_LINKED_TO|SAME|CONTAINS)*..20]-(CompareStandard)) |
| 130 | + WITH p |
| 131 | + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE |
| 132 | + (n:NeoCRE OR n = BaseStandard OR n = CompareStandard) |
| 133 | + AND NOT n.name IN $denylist) |
| 134 | + RETURN p |
| 135 | + """, |
| 136 | + {"name1": name_1, "name2": name_2, "denylist": denylist}, |
| 137 | + resolve_objects=True, |
| 138 | + ) |
| 139 | + if r: |
| 140 | + return len(r), 2, "Tier 2 — medium links (adds CONTAINS)" |
| 141 | + |
| 142 | + # Tier 3 — wildcard fallback |
| 143 | + r, _ = neomodel_db.cypher_query( |
| 144 | + """ |
| 145 | + MATCH (BaseStandard:NeoStandard {name: $name1}) |
| 146 | + MATCH (CompareStandard:NeoStandard {name: $name2}) |
| 147 | + MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) |
| 148 | + WITH p |
| 149 | + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE |
| 150 | + (n:NeoCRE OR n = BaseStandard OR n = CompareStandard) |
| 151 | + AND NOT n.name IN $denylist) |
| 152 | + RETURN p |
| 153 | + """, |
| 154 | + {"name1": name_1, "name2": name_2, "denylist": denylist}, |
| 155 | + resolve_objects=True, |
| 156 | + ) |
| 157 | + return len(r), 3, "Tier 3 — wildcard fallback (no strong/medium paths found)" |
| 158 | + |
| 159 | + |
| 160 | +def benchmark(name_1, name_2, runs=3): |
| 161 | + connect_neo4j() |
| 162 | + print(f"Benchmarking gap analysis: '{name_1}' ↔ '{name_2}'") |
| 163 | + print(f"Averaging over {runs} run(s) per mode\n") |
| 164 | + print("=" * 68) |
| 165 | + |
| 166 | + # MODE A — Original |
| 167 | + a_times, a_mems, a_paths, a_queries = [], [], 0, 0 |
| 168 | + print("▶ MODE A — Original exhaustive (pre-PR #716 behaviour)...") |
| 169 | + for i in range(runs): |
| 170 | + tracemalloc.start() |
| 171 | + t0 = time.perf_counter() |
| 172 | + a_paths, a_queries = run_original(name_1, name_2) |
| 173 | + elapsed = time.perf_counter() - t0 |
| 174 | + _, peak = tracemalloc.get_traced_memory() |
| 175 | + tracemalloc.stop() |
| 176 | + a_times.append(elapsed) |
| 177 | + a_mems.append(peak / 1024 / 1024) |
| 178 | + print(f" Run {i+1}: {elapsed:.3f}s | peak mem {a_mems[-1]:.2f} MB") |
| 179 | + |
| 180 | + avg_a_t = sum(a_times) / runs |
| 181 | + avg_a_m = sum(a_mems) / runs |
| 182 | + print() |
| 183 | + |
| 184 | + # MODE B — Optimized |
| 185 | + b_times, b_mems, b_paths, b_queries, b_tier = [], [], 0, 0, "" |
| 186 | + print("▶ MODE B — Optimized tiered pruning (GAP_ANALYSIS_OPTIMIZED=true)...") |
| 187 | + for i in range(runs): |
| 188 | + tracemalloc.start() |
| 189 | + t0 = time.perf_counter() |
| 190 | + b_paths, b_queries, b_tier = run_optimized(name_1, name_2) |
| 191 | + elapsed = time.perf_counter() - t0 |
| 192 | + _, peak = tracemalloc.get_traced_memory() |
| 193 | + tracemalloc.stop() |
| 194 | + b_times.append(elapsed) |
| 195 | + b_mems.append(peak / 1024 / 1024) |
| 196 | + print( |
| 197 | + f" Run {i+1}: {elapsed:.3f}s | peak mem {b_mems[-1]:.2f} MB | queries run: {b_queries}" |
| 198 | + ) |
| 199 | + |
| 200 | + avg_b_t = sum(b_times) / runs |
| 201 | + avg_b_m = sum(b_mems) / runs |
| 202 | + |
| 203 | + t_pct = ((avg_a_t - avg_b_t) / avg_a_t * 100) if avg_a_t > 0 else 0 |
| 204 | + m_pct = ((avg_a_m - avg_b_m) / avg_a_m * 100) if avg_a_m > 0 else 0 |
| 205 | + |
| 206 | + direction = "faster" if t_pct >= 0 else "slower" |
| 207 | + print() |
| 208 | + print("=" * 68) |
| 209 | + print("RESULTS") |
| 210 | + print("=" * 68) |
| 211 | + print(f" Pair: '{name_1}' ↔ '{name_2}' | {runs} run(s)\n") |
| 212 | + print(f" {'Metric':<26} {'MODE A (original)':>18} {'MODE B (optimized)':>18}") |
| 213 | + print(f" {'-'*26} {'-'*18} {'-'*18}") |
| 214 | + print(f" {'Avg time (s)':<26} {avg_a_t:>18.3f} {avg_b_t:>18.3f}") |
| 215 | + print(f" {'Avg peak memory (MB)':<26} {avg_a_m:>18.2f} {avg_b_m:>18.2f}") |
| 216 | + print(f" {'Total paths returned':<26} {a_paths:>18} {b_paths:>18}") |
| 217 | + print(f" {'DB queries executed':<26} {a_queries:>18} {b_queries:>18}") |
| 218 | + print() |
| 219 | + print(f" ⚡ Mode B is {abs(t_pct):.1f}% {direction} than Mode A") |
| 220 | + print( |
| 221 | + f" 🧠 Mode B used {abs(m_pct):.1f}% {'less' if m_pct >= 0 else 'more'} peak memory" |
| 222 | + ) |
| 223 | + print(f" 🔍 Mode B exited at: {b_tier}") |
| 224 | + print("=" * 68) |
| 225 | + |
| 226 | + # GitHub-ready table |
| 227 | + print() |
| 228 | + print("### GitHub-ready Benchmark Table\n") |
| 229 | + print( |
| 230 | + "| Metric | Original (`GAP_ANALYSIS_OPTIMIZED=false`) | Optimized (`GAP_ANALYSIS_OPTIMIZED=true`) | Δ |" |
| 231 | + ) |
| 232 | + print( |
| 233 | + "|--------|------------------------------------------|------------------------------------------|---|" |
| 234 | + ) |
| 235 | + print( |
| 236 | + f"| Avg query time | `{avg_a_t:.3f}s` | `{avg_b_t:.3f}s` | **{abs(t_pct):.1f}% {direction}** |" |
| 237 | + ) |
| 238 | + print( |
| 239 | + f"| Peak memory | `{avg_a_m:.2f} MB` | `{avg_b_m:.2f} MB` | **{abs(m_pct):.1f}% {'less' if m_pct >= 0 else 'more'}** |" |
| 240 | + ) |
| 241 | + print(f"| Paths returned | `{a_paths}` | `{b_paths}` | — |") |
| 242 | + print( |
| 243 | + f"| DB queries run | `{a_queries}` (always both) | `{b_queries}` (early exit at {b_tier.split('—')[0].strip()}) | — |" |
| 244 | + ) |
| 245 | + |
| 246 | + |
| 247 | +if __name__ == "__main__": |
| 248 | + p = argparse.ArgumentParser(description="Gap analysis benchmark — Issue #587") |
| 249 | + p.add_argument("--standard1", default="OWASP Top 10 2021") |
| 250 | + p.add_argument("--standard2", default="NIST 800-53") |
| 251 | + p.add_argument("--runs", type=int, default=3) |
| 252 | + p.add_argument("--list-standards", action="store_true") |
| 253 | + args = p.parse_args() |
| 254 | + |
| 255 | + if args.list_standards: |
| 256 | + list_available_standards() |
| 257 | + else: |
| 258 | + benchmark(args.standard1, args.standard2, args.runs) |
0 commit comments