From c6426695204afdb9e35b35d5cf24374276c9a1f9 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 04:13:42 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`f?= =?UTF-8?q?ind=5Fcycle=5Fvertices`=20by=20214%?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code replaces the expensive `nx.simple_cycles()` call with `nx.strongly_connected_components()`, delivering a **214% speedup** by fundamentally changing the algorithm approach. **Key Optimization:** - **Original**: Enumerates all simple cycles explicitly using `nx.simple_cycles()` - computationally expensive as it must find and traverse every possible cycle path - **Optimized**: Uses strongly connected components (SCCs) to identify cycle vertices - leverages Tarjan's algorithm which runs in O(V+E) time **Why This Works:** A vertex participates in a cycle if and only if: 1. It's in an SCC with multiple vertices (multi-vertex cycles), OR 2. It's in a single-vertex SCC with a self-loop **Performance Analysis:** From the line profiler, the original spends 89.4% of time in `nx.simple_cycles()`, while the optimized version distributes work across SCC analysis (65.5%) and component processing. The SCC approach scales much better - it processes components once rather than enumerating all possible cycle paths. **Test Case Performance:** - **Best gains** on complex graphs with overlapping cycles (410-521% faster) where cycle enumeration is most expensive - **Consistent speedup** across all cycle types: simple cycles (241-267% faster), disconnected cycles (310% faster), large single cycles (435-438% faster) - **One exception**: Large graphs with many self-loops show 34% slower performance due to the overhead of checking `graph.has_edge(vertex, vertex)` for each single-vertex SCC The optimization is particularly effective for real-world graphs with complex cycle structures where the original algorithm's cycle enumeration becomes prohibitively expensive. --- src/dsa/nodes.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/dsa/nodes.py b/src/dsa/nodes.py index 521d24e..43c4563 100644 --- a/src/dsa/nodes.py +++ b/src/dsa/nodes.py @@ -50,11 +50,18 @@ def find_cycle_vertices(edges): # Create a directed graph from the edges graph = nx.DiGraph(edges) - # Find all simple cycles in the graph - cycles = list(nx.simple_cycles(graph)) - - # Flatten the list of cycles and remove duplicates - cycle_vertices = {vertex for cycle in cycles for vertex in cycle} + # Find all strongly connected components instead of enumerating all cycles + sccs = nx.strongly_connected_components(graph) + + # Collect vertices that are part of cycles + cycle_vertices = set() + for component in sccs: + if len(component) > 1: + cycle_vertices.update(component) + else: + vertex = next(iter(component)) + if graph.has_edge(vertex, vertex): + cycle_vertices.add(vertex) return sorted(cycle_vertices)