Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented May 28, 2025

📄 3,914% (39.14x) speedup for find_node_with_highest_degree in src/dsa/nodes.py

⏱️ Runtime : 42.2 milliseconds 1.05 milliseconds (best of 1092 runs)

📝 Explanation and details

Here is a rewritten version for improved speed.
Optimizations applied:

  • Precompute incoming degree of each node in a single pass over connections, avoiding checking all nodes in all targets for every node (O(N+E) instead of O(N^2) time).
  • Use a single loop to compute total degree (outgoing + incoming) for each node.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 41 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 1 Passed
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests Details
import pytest  # used for our unit tests
from src.dsa.nodes import find_node_with_highest_degree

# unit tests

# ---------------- BASIC TEST CASES ----------------

def test_single_node_no_connections():
    # One node, no connections
    nodes = ["A"]
    connections = {}
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_two_nodes_one_connection():
    # Two nodes, one connection from A to B
    nodes = ["A", "B"]
    connections = {"A": ["B"]}
    # B has 1 incoming, A has 1 outgoing, both degree 1, but A comes first
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_three_nodes_varied_connections():
    # Three nodes, multiple connections
    nodes = ["A", "B", "C"]
    connections = {
        "A": ["B", "C"],
        "B": ["C"],
        "C": []
    }
    # Degrees: A:2 (out) +0 (in)=2, B:1(out)+1(in)=2, C:0(out)+2(in)=2
    # All have degree 2, so A (first in list) should be returned
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_disconnected_nodes():
    # Some nodes not connected at all
    nodes = ["A", "B", "C"]
    connections = {"A": ["B"]}
    # A: 1(out)+0(in)=1, B:0(out)+1(in)=1, C:0(out)+0(in)=0
    # A and B have degree 1, A comes first
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_multiple_highest_degree_nodes():
    # Multiple nodes with same highest degree, tie-break by order
    nodes = ["X", "Y", "Z"]
    connections = {"X": ["Y"], "Y": ["Z"], "Z": ["X"]}
    # All have degree 2: 1 out + 1 in
    codeflash_output = find_node_with_highest_degree(nodes, connections)

# ---------------- EDGE TEST CASES ----------------

def test_empty_nodes_list():
    # No nodes at all
    nodes = []
    connections = {}
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_empty_connections_dict():
    # Nodes exist but no connections
    nodes = ["A", "B", "C"]
    connections = {}
    # All have degree 0, first node returned
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_self_loop():
    # Node with a self-loop
    nodes = ["A", "B"]
    connections = {"A": ["A"]}
    # A: 1(out)+1(in)=2, B:0(out)+0(in)=0
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_node_with_only_incoming_connections():
    # Node appears only as a target, never as a source
    nodes = ["A", "B", "C"]
    connections = {"A": ["C"], "B": ["C"]}
    # C: 0(out)+2(in)=2, A:1(out)+0(in)=1, B:1(out)+0(in)=1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_node_with_only_outgoing_connections():
    # Node only connects to others, never receives
    nodes = ["A", "B", "C"]
    connections = {"A": ["B", "C"]}
    # A:2(out)+0(in)=2, B:0(out)+1(in)=1, C:0(out)+1(in)=1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_node_not_in_connections_but_in_nodes():
    # Node in nodes list but not in connections dict at all
    nodes = ["A", "B", "C"]
    connections = {"A": ["B"]}
    # C:0(out)+0(in)=0, A:1(out)+0(in)=1, B:0(out)+1(in)=1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_duplicate_connections():
    # Connections list has duplicate targets for a node
    nodes = ["A", "B"]
    connections = {"A": ["B", "B", "B"]}
    # A:3(out)+0(in)=3, B:0(out)+3(in)=3
    # Both degree 3, A comes first
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_connections_to_nonexistent_nodes():
    # Connections point to nodes not in nodes list
    nodes = ["A", "B"]
    connections = {"A": ["B", "C", "D"]}
    # Only consider A and B
    # A:3(out)+0(in)=3, B:0(out)+1(in)=1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_all_nodes_with_zero_degree():
    # All nodes have no connections
    nodes = ["A", "B", "C"]
    connections = {"D": ["E"]}
    # All have degree 0, A comes first
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_number_of_self_loops():
    # All nodes have self-loops
    nodes = ["A", "B", "C"]
    connections = {"A": ["A"], "B": ["B"], "C": ["C"]}
    # Each: 1(out)+1(in)=2, A comes first
    codeflash_output = find_node_with_highest_degree(nodes, connections)

# ---------------- LARGE SCALE TEST CASES ----------------

def test_large_complete_graph():
    # Complete graph: each node connects to every other node (including self)
    n = 100
    nodes = [f"N{i}" for i in range(n)]
    connections = {node: nodes[:] for node in nodes}  # Each node connects to all (including self)
    # Each node has n outgoing and n incoming (since all connect to all)
    # Degree: n (out) + n (in) = 2n
    # All have same degree, first node returned
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_sparse_graph():
    # Sparse graph: each node connects to next node only
    n = 1000
    nodes = [f"N{i}" for i in range(n)]
    connections = {f"N{i}": [f"N{i+1}"] for i in range(n-1)}
    # N0: 1(out)+0(in)=1, N1:1(out)+1(in)=2, ..., Nn-2:1(out)+1(in)=2, Nn-1:0(out)+1(in)=1
    # N1 to Nn-2 have degree 2, N0 and Nn-1 have degree 1
    # N1 comes first among degree 2 nodes
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_graph_with_hub():
    # One node connects to all others
    n = 500
    nodes = [f"N{i}" for i in range(n)]
    connections = {"HUB": [f"N{i}" for i in range(n)]}
    nodes = ["HUB"] + nodes  # Ensure HUB is in the nodes list
    # HUB: n(out)+0(in)=n, others:0(out)+1(in)=1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_graph_many_isolated_nodes():
    # Many nodes, only a few connected
    n = 1000
    nodes = [f"N{i}" for i in range(n)]
    connections = {"N0": ["N1"], "N2": ["N3"]}
    # N0:1(out)+0(in)=1, N1:0(out)+1(in)=1, N2:1(out)+0(in)=1, N3:0(out)+1(in)=1, rest:0
    # N0 comes first among degree 1 nodes
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_graph_with_duplicate_and_self_loops():
    # Large graph, some nodes with duplicate edges and self-loops
    n = 300
    nodes = [f"N{i}" for i in range(n)]
    connections = {f"N{i}": [f"N{i}", f"N{(i+1)%n}"]*2 for i in range(n)}
    # Each node: 4 outgoing (2 self, 2 next), and 2 incoming from previous node (since each previous node points to it twice)
    # Degree: 4 (out) + 2 (in) = 6
    # All nodes have same degree, first node returned
    codeflash_output = find_node_with_highest_degree(nodes, connections)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

import random  # used for large scale randomized tests

# imports
import pytest  # used for our unit tests
from src.dsa.nodes import find_node_with_highest_degree

# unit tests

# 1. Basic Test Cases

def test_single_node_no_connections():
    # Single node, no connections
    nodes = ['A']
    connections = {}
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_two_nodes_one_connection():
    # Two nodes, one connection from A to B
    nodes = ['A', 'B']
    connections = {'A': ['B']}
    # A has 1 outgoing, B has 1 incoming; both degree 1, but A is first in list
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_three_nodes_varied_connections():
    # Three nodes, varied connections
    nodes = ['A', 'B', 'C']
    connections = {
        'A': ['B', 'C'],
        'B': ['C'],
        'C': []
    }
    # A: 2 out, 0 in = 2; B: 1 out, 1 in = 2; C: 0 out, 2 in = 2
    # All degree 2, so should return 'A' (first in list)
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_simple_bidirectional():
    # Two nodes, bidirectional connection
    nodes = ['A', 'B']
    connections = {
        'A': ['B'],
        'B': ['A']
    }
    # Both have 1 in, 1 out = 2; should return 'A'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_node_with_self_loop():
    # Node with a self-loop
    nodes = ['A', 'B']
    connections = {
        'A': ['A'],
        'B': []
    }
    # A: 1 out (self), 1 in (self) = 2; B: 0
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_tie_breaker_returns_first():
    # Tie in degree, should return first node in list
    nodes = ['X', 'Y', 'Z']
    connections = {
        'X': ['Y'],
        'Y': ['Z'],
        'Z': ['X']
    }
    # All degree 1 in, 1 out = 2
    codeflash_output = find_node_with_highest_degree(nodes, connections)

# 2. Edge Test Cases

def test_empty_nodes_list():
    # No nodes at all
    nodes = []
    connections = {'A': ['B']}
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_nodes_not_in_connections():
    # Nodes exist but not in connections dict
    nodes = ['A', 'B', 'C']
    connections = {}
    # All have degree 0, should return 'A'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_connections_to_nonexistent_nodes():
    # Connections point to nodes not in the nodes list
    nodes = ['A', 'B']
    connections = {'A': ['B', 'C'], 'B': ['C'], 'C': ['A']}
    # Only A and B are considered; C is ignored as a node, but can be a target
    # A: 2 out, 1 in (from C) = 3; B: 1 out, 1 in (from A) = 2
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_duplicate_connections():
    # Duplicate connections should count for each occurrence
    nodes = ['A', 'B']
    connections = {'A': ['B', 'B', 'B'], 'B': []}
    # A: 3 out, 0 in = 3; B: 0 out, 3 in = 3; tie, return 'A'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_self_loop_and_external():
    # Node has both self-loop and external connection
    nodes = ['X', 'Y']
    connections = {'X': ['X', 'Y'], 'Y': []}
    # X: 2 out, 1 in (self) = 3; Y: 0 out, 1 in (from X) = 1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_all_nodes_disconnected():
    # All nodes, no connections
    nodes = ['A', 'B', 'C', 'D']
    connections = {}
    # All have degree 0, should return 'A'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_number_of_zero_degree_nodes():
    # Many nodes, all with zero degree
    nodes = [f'node{i}' for i in range(100)]
    connections = {}
    # All degree 0, should return 'node0'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_node_only_in_targets():
    # Node is only present as a target, not as a key in connections
    nodes = ['A', 'B']
    connections = {'A': ['B']}
    # B has 1 in, 0 out = 1; A has 1 out, 0 in = 1; tie, return 'A'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_node_with_multiple_incoming_from_same_source():
    # Multiple incoming from same node (duplicates)
    nodes = ['A', 'B']
    connections = {'A': ['B', 'B', 'B']}
    # B: 0 out, 3 in = 3; A: 3 out, 0 in = 3; tie, return 'A'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_nodes_with_non_string_names():
    # Node names as numbers (should still work if function supports general hashable types)
    nodes = [1, 2, 3]
    connections = {1: [2, 3], 2: [3], 3: []}
    # 1: 2 out, 0 in = 2; 2: 1 out, 1 in = 2; 3: 0 out, 2 in = 2; tie, return 1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

# 3. Large Scale Test Cases

def test_large_complete_graph():
    # Complete directed graph: every node connects to every other node (not itself)
    N = 100
    nodes = [f'node{i}' for i in range(N)]
    connections = {node: [n for n in nodes if n != node] for node in nodes}
    # Each node: N-1 out, N-1 in = 2*N-2; tie, return 'node0'
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_sparse_graph():
    # Large graph, each node connects to next node only
    N = 1000
    nodes = [f'node{i}' for i in range(N)]
    connections = {f'node{i}': [f'node{i+1}'] for i in range(N-1)}
    # node0: 1 out, 0 in = 1; nodeN-1: 0 out, 1 in = 1; others: 1 out, 1 in = 2
    # First node with degree 2 is node1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_star_graph():
    # One central node connected to all others
    N = 500
    nodes = [f'node{i}' for i in range(N)]
    center = 'node0'
    connections = {center: [n for n in nodes if n != center]}
    # center: N-1 out, 0 in = N-1; others: 0 out, 1 in = 1
    codeflash_output = find_node_with_highest_degree(nodes, connections)

def test_large_random_graph():
    # Large random graph, check that function runs and returns a valid node
    N = 200
    nodes = [f'node{i}' for i in range(N)]
    connections = {}
    for node in nodes:
        # Each node connects to up to 5 random others (could be itself)
        targets = random.sample(nodes, random.randint(0, 5))
        connections[node] = targets
    # Just check that result is in nodes and function completes
    codeflash_output = find_node_with_highest_degree(nodes, connections); result = codeflash_output

def test_large_graph_with_tie():
    # Large graph, two nodes tied for highest degree
    N = 100
    nodes = [f'node{i}' for i in range(N)]
    connections = {}
    for i in range(N):
        # node0 and node1 each connect to all others (except themselves)
        if i == 0 or i == 1:
            connections[f'node{i}'] = [n for n in nodes if n != f'node{i}']
        else:
            connections[f'node{i}'] = []
    # node0 and node1: N-1 out, 1 in (from the other) = N; others: 0 out, 2 in = 2
    # node0 and node1 tied, should return 'node0'
    codeflash_output = find_node_with_highest_degree(nodes, connections)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

from src.dsa.nodes import find_node_with_highest_degree

def test_find_node_with_highest_degree():
    find_node_with_highest_degree(['\x02', '\x01\x00'], {'\x00\x00': [], '\x00\x00\x00': ['\x02'], '\x00': [], '\x01': []})

To edit these changes git checkout codeflash/optimize-find_node_with_highest_degree-mb8doay0 and push.

Codeflash

Here is a rewritten version for improved speed.  
**Optimizations applied:**
- Precompute incoming degree of each node in a single pass over `connections`, avoiding checking all nodes in all targets for every node (O(N+E) instead of O(N^2) time).
- Use a single loop to compute total degree (outgoing + incoming) for each node.
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label May 28, 2025
@codeflash-ai codeflash-ai bot requested a review from aseembits93 May 28, 2025 20:08
@KRRT7 KRRT7 closed this Jun 4, 2025
@codeflash-ai codeflash-ai bot deleted the codeflash/optimize-find_node_with_highest_degree-mb8doay0 branch June 4, 2025 07:33
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant