From ffae18d9058f72a88b0190932487643200b30161 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 03:09:08 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`f?= =?UTF-8?q?ind=5Fnode=5Fwith=5Fhighest=5Fdegree`=20by=202,940%?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **2939% speedup** by eliminating the nested loop that was causing O(n²) behavior in the original implementation. **Key Optimization: Precomputing In-Degrees** The original code counted incoming connections by iterating through all connection entries for every node: ```python for src, targets in connections.items(): # O(C) connections if node in targets: # O(T) targets per connection degree += 1 ``` This created O(N × C × T) complexity where N=nodes, C=connections, T=targets per connection. The optimized version precomputes all in-degrees in a single pass: ```python in_degree = {} for targets in connections.values(): # O(C) - once only for tgt in targets: # O(T) in_degree[tgt] = in_degree.get(tgt, 0) + 1 ``` Then simply looks up each node's in-degree in O(1): `in_deg = in_degree.get(node, 0)` **Performance Impact Analysis:** From the line profiler results, the bottleneck was eliminated: - **Original**: Lines with nested loops consumed 98.4% of runtime (52.2% + 46.2%) - **Optimized**: The precomputation phase takes only 58.3% of total time (29% + 29.3%), but runs once instead of N times **Complexity Improvement:** - **Original**: O(N × C × T) where each node triggers a full scan of all connections - **Optimized**: O(C × T + N) - single precomputation pass plus linear node processing **Test Case Performance Patterns:** The optimization shows dramatic improvements on larger, denser graphs: - **Large complete graph (100 nodes)**: 1151% faster - eliminates O(n³) behavior - **Large chain graph (1000 nodes)**: 13940% faster - reduces O(n²) to O(n) - **Large sparse graphs**: 2513% faster - benefits from single-pass preprocessing For small graphs (≤3 nodes), the optimization shows modest 5-15% gains or even slight regressions due to preprocessing overhead, but this is negligible compared to the massive gains on realistic graph sizes. The optimization is particularly effective for graphs with many connections or high in-degrees, where the original nested loop would perform many redundant scans. --- src/dsa/nodes.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/dsa/nodes.py b/src/dsa/nodes.py index 521d24e..31ff6b0 100644 --- a/src/dsa/nodes.py +++ b/src/dsa/nodes.py @@ -93,15 +93,18 @@ def find_node_with_highest_degree( max_degree = -1 max_degree_node = None + # Precompute in-degree for each node + in_degree = {} + for targets in connections.values(): + for tgt in targets: + in_degree[tgt] = in_degree.get(tgt, 0) + 1 + for node in nodes: - degree = 0 - # Count outgoing connections - degree += len(connections.get(node, [])) - - # Count incoming connections - for src, targets in connections.items(): - if node in targets: - degree += 1 + # Outgoing degree (connections from this node) + out_deg = len(connections.get(node, [])) + # Incoming degree (connections to this node) + in_deg = in_degree.get(node, 0) + degree = out_deg + in_deg if degree > max_degree: max_degree = degree