Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Dec 22, 2025

📄 18,874% (188.74x) speedup for find_last_node in src/algorithms/graph.py

⏱️ Runtime : 64.7 milliseconds 341 microseconds (best of 250 runs)

📝 Explanation and details

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 41 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Click to see Generated Regression Tests
from __future__ import annotations

# imports
import pytest  # used for our unit tests
from src.algorithms.graph import find_last_node

# unit tests

# Basic Test Cases


def test_single_node_no_edges():
    # Single node, no edges: should return the node itself
    nodes = [{"id": 1, "data": "A"}]
    edges = []
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.29μs -> 1.00μs (29.2% faster)


def test_two_nodes_one_edge():
    # Two nodes, one edge from first to second: should return the second node
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.88μs -> 1.12μs (66.7% faster)


def test_three_nodes_chain():
    # Three nodes in a chain: 1->2->3, last node is 3
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}]
    edges = [{"source": 1, "target": 2}, {"source": 2, "target": 3}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 2.21μs -> 1.25μs (76.6% faster)


def test_multiple_terminal_nodes():
    # Two terminal nodes (no outgoing edges): should return the first found (id=3)
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}]
    edges = [{"source": 1, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.79μs -> 1.12μs (59.2% faster)


def test_no_terminal_nodes():
    # All nodes have outgoing edges (cycle): should return None
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2}, {"source": 2, "target": 1}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.83μs -> 1.17μs (57.1% faster)


def test_node_with_multiple_outgoing_edges():
    # Node 1 has multiple outgoing edges, only node 3 has none
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}]
    edges = [{"source": 1, "target": 2}, {"source": 1, "target": 3}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.96μs -> 1.17μs (67.8% faster)


def test_nodes_with_non_integer_ids():
    # Node ids are strings
    nodes = [{"id": "A"}, {"id": "B"}]
    edges = [{"source": "A", "target": "B"}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.92μs -> 1.17μs (64.2% faster)


# Edge Test Cases


def test_empty_nodes_and_edges():
    # No nodes or edges: should return None
    nodes = []
    edges = []
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 791ns -> 875ns (9.60% slower)


def test_edges_with_nonexistent_nodes():
    # Edges refer to nodes not in the nodes list: should not affect result
    nodes = [{"id": 1}]
    edges = [{"source": 2, "target": 3}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.54μs -> 1.12μs (37.0% faster)


def test_duplicate_node_ids():
    # Duplicate node ids: function should still return the first terminal node by order
    nodes = [{"id": 1}, {"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 2.04μs -> 1.17μs (75.0% faster)


def test_edges_with_extra_keys():
    # Edges have extra keys: should be ignored
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2, "weight": 5}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.79μs -> 1.08μs (65.5% faster)


def test_nodes_with_extra_keys():
    # Nodes have extra keys: should be returned as-is
    nodes = [{"id": 1, "label": "start"}, {"id": 2, "label": "end"}]
    edges = [{"source": 1, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.83μs -> 1.12μs (62.9% faster)


def test_all_nodes_are_terminal():
    # No edges at all, all nodes are terminal, should return the first node
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}]
    edges = []
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.29μs -> 1.00μs (29.1% faster)


def test_node_id_is_none():
    # Node id is None, edge points to it
    nodes = [{"id": None}, {"id": 2}]
    edges = [{"source": 2, "target": None}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.54μs -> 1.08μs (42.4% faster)


def test_edge_source_is_none():
    # Edge has source None, which matches node with id None
    nodes = [{"id": None}, {"id": 2}]
    edges = [{"source": None, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 2.00μs -> 1.17μs (71.4% faster)


def test_nodes_with_mixed_types():
    # Node ids are mixed types
    nodes = [{"id": 1}, {"id": "2"}, {"id": (3,)}]
    edges = [{"source": 1, "target": "2"}, {"source": "2", "target": (3,)}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 2.62μs -> 1.38μs (90.9% faster)


# Large Scale Test Cases


def test_large_linear_chain():
    # Large chain of 1000 nodes: last node should be terminal
    nodes = [{"id": i} for i in range(1000)]
    edges = [{"source": i, "target": i + 1} for i in range(999)]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 18.5ms -> 55.7μs (33122% faster)


def test_large_star_graph():
    # One central node with outgoing edges to all others
    nodes = [{"id": 0}] + [{"id": i} for i in range(1, 1000)]
    edges = [{"source": 0, "target": i} for i in range(1, 1000)]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 38.1μs -> 20.2μs (88.1% faster)


def test_large_all_terminal():
    # 1000 nodes, no edges, all are terminal, should return first node
    nodes = [{"id": i} for i in range(1000)]
    edges = []
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.33μs -> 1.08μs (23.2% faster)


def test_large_no_terminal():
    # 1000 nodes, each has an outgoing edge to the next, and last has edge to first (cycle)
    nodes = [{"id": i} for i in range(1000)]
    edges = [{"source": i, "target": (i + 1) % 1000} for i in range(1000)]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 18.4ms -> 55.5μs (33138% faster)


def test_large_multiple_terminal_nodes():
    # 1000 nodes, first 500 have outgoing edges, last 500 are terminal
    nodes = [{"id": i} for i in range(1000)]
    edges = [{"source": i, "target": i + 1} for i in range(500)]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 4.57ms -> 28.3μs (16041% faster)


# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
from __future__ import annotations

# imports
import pytest  # used for our unit tests
from src.algorithms.graph import find_last_node

# unit tests

# -------------------------
# Basic Test Cases
# -------------------------


def test_single_node_no_edges():
    # One node, no edges: should return the only node
    nodes = [{"id": 1}]
    edges = []
    codeflash_output = find_last_node(nodes, edges)  # 1.33μs -> 958ns (39.1% faster)


def test_two_nodes_one_edge():
    # Two nodes, one edge from 1->2: last node is 2
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)  # 1.92μs -> 1.08μs (76.8% faster)


def test_three_nodes_linear_chain():
    # Three nodes, chain 1->2->3: last node is 3
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}]
    edges = [{"source": 1, "target": 2}, {"source": 2, "target": 3}]
    codeflash_output = find_last_node(nodes, edges)  # 2.21μs -> 1.25μs (76.6% faster)


def test_three_nodes_branch():
    # Tree: 1->2, 1->3: both 2 and 3 are leaves, should return 2 (first found)
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}]
    edges = [{"source": 1, "target": 2}, {"source": 1, "target": 3}]
    codeflash_output = find_last_node(nodes, edges)
    result = codeflash_output  # 1.92μs -> 1.17μs (64.3% faster)


def test_no_edges_multiple_nodes():
    # Multiple nodes, no edges: should return the first node
    nodes = [{"id": "A"}, {"id": "B"}, {"id": "C"}]
    edges = []
    codeflash_output = find_last_node(nodes, edges)  # 1.25μs -> 958ns (30.5% faster)


# -------------------------
# Edge Test Cases
# -------------------------


def test_empty_nodes_and_edges():
    # No nodes, no edges: should return None
    nodes = []
    edges = []
    codeflash_output = find_last_node(nodes, edges)  # 833ns -> 875ns (4.80% slower)


def test_edges_but_no_nodes():
    # Edges but no nodes: should return None
    nodes = []
    edges = [{"source": 1, "target": 2}]
    codeflash_output = find_last_node(nodes, edges)  # 708ns -> 1.00μs (29.2% slower)


def test_node_with_self_loop():
    # Single node with self-loop: should return None
    nodes = [{"id": 1}]
    edges = [{"source": 1, "target": 1}]
    codeflash_output = find_last_node(nodes, edges)  # 1.42μs -> 1.04μs (35.9% faster)


def test_cycle_two_nodes():
    # Two nodes, cycle: 1->2, 2->1, no last node
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2}, {"source": 2, "target": 1}]
    codeflash_output = find_last_node(nodes, edges)  # 1.88μs -> 1.21μs (55.2% faster)


def test_multiple_leaves():
    # 1->2, 1->3, 2->4, 3->5; leaves: 4,5. Should return 4 (first in nodes)
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]
    edges = [
        {"source": 1, "target": 2},
        {"source": 1, "target": 3},
        {"source": 2, "target": 4},
        {"source": 3, "target": 5},
    ]
    codeflash_output = find_last_node(nodes, edges)  # 2.75μs -> 1.38μs (100% faster)


def test_disconnected_graph():
    # Two disconnected subgraphs: 1->2 and 3->4. Leaves: 2, 4. Should return 2.
    nodes = [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}]
    edges = [
        {"source": 1, "target": 2},
        {"source": 3, "target": 4},
    ]
    codeflash_output = find_last_node(nodes, edges)  # 1.92μs -> 1.17μs (64.3% faster)


def test_edge_with_missing_source_key():
    # Edge missing 'source' key should raise KeyError
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"target": 2}]
    with pytest.raises(KeyError):
        find_last_node(nodes, edges)  # 2.25μs -> 875ns (157% faster)


def test_edge_with_extra_keys():
    # Edge with extra keys should be ignored for those keys
    nodes = [{"id": 1}, {"id": 2}]
    edges = [{"source": 1, "target": 2, "weight": 10}]
    codeflash_output = find_last_node(nodes, edges)  # 1.92μs -> 1.17μs (64.3% faster)


def test_node_ids_are_strings():
    # Node IDs as strings
    nodes = [{"id": "x"}, {"id": "y"}]
    edges = [{"source": "x", "target": "y"}]
    codeflash_output = find_last_node(nodes, edges)  # 1.96μs -> 1.12μs (74.1% faster)


def test_node_ids_are_mixed_types():
    # Node IDs are mixed types, should match exactly
    nodes = [{"id": 1}, {"id": "1"}]
    edges = [{"source": 1, "target": "1"}]
    codeflash_output = find_last_node(nodes, edges)  # 2.00μs -> 1.08μs (84.7% faster)


# -------------------------
# Large Scale Test Cases
# -------------------------


def test_large_linear_chain():
    # Large chain: 1->2->3->...->999
    nodes = [{"id": i} for i in range(1, 1001)]
    edges = [{"source": i, "target": i + 1} for i in range(1, 1000)]
    codeflash_output = find_last_node(nodes, edges)  # 18.4ms -> 56.2μs (32605% faster)


def test_large_star_graph():
    # Star: 1->2, 1->3, ..., 1->1000; leaves: 2..1000; should return 2
    nodes = [{"id": i} for i in range(1, 1001)]
    edges = [{"source": 1, "target": i} for i in range(2, 1001)]
    codeflash_output = find_last_node(nodes, edges)  # 38.1μs -> 20.5μs (85.8% faster)


def test_large_fully_connected():
    # Fully connected: every node points to every other node
    nodes = [{"id": i} for i in range(1, 21)]
    edges = [
        {"source": i, "target": j} for i in range(1, 21) for j in range(1, 21) if i != j
    ]
    # No node is a leaf
    codeflash_output = find_last_node(nodes, edges)  # 132μs -> 10.5μs (1156% faster)


def test_large_graph_with_many_leaves():
    # 1->2, 1->3, ..., 1->1000; leaves: 2..1000; should return 2
    nodes = [{"id": i} for i in range(1, 1001)]
    edges = [{"source": 1, "target": i} for i in range(2, 1001)]
    codeflash_output = find_last_node(nodes, edges)  # 36.7μs -> 20.1μs (82.8% faster)


def test_large_graph_disconnected():
    # Two disconnected chains: 1->2->3->...->500 and 501->502->...->1000
    nodes = [{"id": i} for i in range(1, 1001)]
    edges = [{"source": i, "target": i + 1} for i in range(1, 500)] + [
        {"source": i, "target": i + 1} for i in range(501, 1000)
    ]
    # Leaves: 500, 1000; should return 500
    codeflash_output = find_last_node(nodes, edges)  # 4.54ms -> 38.7μs (11643% faster)


# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-find_last_node-mjhqf7pb and push.

Codeflash

@codeflash-ai codeflash-ai bot requested a review from KRRT7 December 22, 2025 22:33
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Dec 22, 2025
@KRRT7 KRRT7 closed this Dec 23, 2025
@codeflash-ai codeflash-ai bot deleted the codeflash/optimize-find_last_node-mjhqf7pb branch December 23, 2025 05:48
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants