Skip to content

Conversation

@codeflash-ai
Copy link
Contributor

@codeflash-ai codeflash-ai bot commented May 21, 2025

📄 90% (0.90x) speedup for function_has_return_statement in codeflash/discovery/functions_to_optimize.py

⏱️ Runtime : 2.54 milliseconds 1.33 millisecond (best of 220 runs)

📝 Explanation and details

Here’s a faster rewrite. The original code uses ast.walk, which traverses the entire subtree, yielding all nodes. Our target is to determine if any ast.Return exists; we can short-circuit the search as soon as we find one, so a custom DFS traversal is much faster.

This custom loop avoids constructing and yielding the full list of nodes, and stops immediately once a return is found, improving both speed and memory usage especially for large ASTs.
All existing comments preserved (none required updating).

Correctness verification report:

Test Status
⏪ Replay Tests 🔘 None Found
⚙️ Existing Unit Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
🌀 Generated Regression Tests 48 Passed
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests Details
from __future__ import annotations

import ast
from _ast import AsyncFunctionDef, FunctionDef

# imports
import pytest  # used for our unit tests
from codeflash.discovery.functions_to_optimize import \
    function_has_return_statement

# unit tests

# ---- Helper function to extract the first function node from source code ----
def get_first_function_node(src: str):
    """
    Parses the source code and returns the first FunctionDef or AsyncFunctionDef node.
    """
    tree = ast.parse(src)
    for node in ast.walk(tree):
        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            return node
    raise ValueError("No function definition found in source.")

# ---- Basic Test Cases ----

def test_simple_function_with_return():
    # Basic function with a single return statement
    src = """
def foo():
    return 42
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_simple_function_without_return():
    # Function with no return statement
    src = """
def bar():
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_multiple_returns():
    # Function with multiple return statements
    src = """
def baz(x):
    if x > 0:
        return 1
    else:
        return -1
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_async_function_with_return():
    # Async function with a return statement
    src = """
async def afunc():
    return "async"
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_async_function_without_return():
    # Async function without a return statement
    src = """
async def afunc():
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

# ---- Edge Test Cases ----

def test_function_with_return_in_nested_function():
    # Only outer function should be checked; inner function's return doesn't count for outer
    src = """
def outer():
    def inner():
        return 1
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)  # There is a return in the inner function, but ast.walk includes all descendants

def test_function_with_return_in_class_method():
    # Method inside a class with a return
    src = """
class MyClass:
    def method(self):
        return 123
"""
    # Find the method function node
    tree = ast.parse(src)
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == "method":
            func = node
            break
    else:
        raise AssertionError("Method not found")
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_lambda():
    # Lambda is not a FunctionDef, so should not be detected
    src = """
def foo():
    x = lambda y: y + 1
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_try_except_finally():
    # Return in try, except, and finally blocks
    src = """
def foo(x):
    try:
        return 1
    except Exception:
        return 2
    finally:
        return 3
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_yield_but_no_return():
    # Generator function with yield but no return
    src = """
def gen():
    for i in range(5):
        yield i
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_none():
    # Function with 'return None'
    src = """
def foo():
    return None
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_if_elif_else():
    # Return in all branches
    src = """
def foo(x):
    if x == 1:
        return "a"
    elif x == 2:
        return "b"
    else:
        return "c"
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_only_docstring():
    # Function with only a docstring and no return
    src = '''
def foo():
    """This is a docstring."""
'''
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_comprehension():
    # Return inside a comprehension is not valid, but test that stray 'return' in a string doesn't count
    src = '''
def foo():
    x = [i for i in range(5)]
    # return is not a statement here
'''
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_nested_class():
    # Return inside a class defined inside a function
    src = """
def foo():
    class Bar:
        def baz(self):
            return 1
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)  # There is a return in a nested class method

def test_function_with_return_in_deeply_nested_function():
    # Deeply nested function with a return
    src = """
def foo():
    def bar():
        def baz():
            return 42
        return baz
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_deeply_nested_scope_but_not_outer():
    # Outer function has no return, only nested function does
    src = """
def foo():
    def bar():
        return 1
    pass
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_if_without_else():
    # Return in one branch only
    src = """
def foo(x):
    if x:
        return 1
    # no return in else
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_return_in_while_loop():
    # Return inside a while loop
    src = """
def foo():
    while True:
        return 1
"""
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_function_with_no_body():
    # Function with no body (should not happen in valid Python, but test for robustness)
    func = ast.FunctionDef(
        name="empty",
        args=ast.arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]),
        body=[],
        decorator_list=[],
        lineno=1,
        col_offset=0,
        end_lineno=1,
        end_col_offset=0,
        type_comment=None
    )
    codeflash_output = function_has_return_statement(func)

# ---- Large Scale Test Cases ----

def test_large_function_with_many_statements_and_one_return():
    # Function with 999 pass statements and 1 return at the end
    src = "def foo():\n" + "\n".join(["    pass"] * 999) + "\n    return 1"
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_large_function_with_no_return():
    # Function with 1000 pass statements, no return
    src = "def foo():\n" + "\n".join(["    pass"] * 1000)
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_large_function_with_return_in_middle():
    # Function with 500 pass, 1 return, then 499 pass
    src = "def foo():\n" + "\n".join(["    pass"] * 500) + "\n    return 42\n" + "\n".join(["    pass"] * 499)
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_large_nested_functions_with_return_in_innermost():
    # Outer function with 10 nested functions, only innermost has a return
    src_lines = ["def foo():"]
    indent = "    "
    for i in range(10):
        src_lines.append(indent * (i+1) + f"def f{i}():")
    src_lines.append(indent * 11 + "return 99")
    for i in reversed(range(10)):
        src_lines.append(indent * (i+1) + "pass")
    src = "\n".join(src_lines)
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)

def test_large_function_with_many_branches_and_returns():
    # Function with many if/elif/else branches, each with a return
    src = "def foo(x):\n"
    for i in range(500):
        src += f"    if x == {i}:\n        return {i}\n"
    src += "    return -1"
    func = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func)



from __future__ import annotations

import ast
from _ast import AsyncFunctionDef, FunctionDef

# imports
import pytest  # used for our unit tests
from codeflash.discovery.functions_to_optimize import \
    function_has_return_statement

# unit tests

# Helper function to extract the first (Async)FunctionDef node from code
def get_first_function_node(source: str) -> FunctionDef | AsyncFunctionDef:
    """
    Parses the source code and returns the first FunctionDef or AsyncFunctionDef node found.
    """
    tree = ast.parse(source)
    for node in ast.walk(tree):
        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            return node
    raise ValueError("No function definition found in source code.")

# 1. Basic Test Cases

def test_function_with_simple_return():
    # Function with a single return statement
    src = "def foo():\n    return 42"
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_without_return():
    # Function with no return statement
    src = "def foo():\n    pass"
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_multiple_returns():
    # Function with multiple return statements
    src = "def foo(x):\n    if x:\n        return 1\n    else:\n        return 0"
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_none():
    # Function with 'return None'
    src = "def foo():\n    return None"
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_async_function_with_return():
    # Async function with a return statement
    src = "async def foo():\n    return 'async'"
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_async_function_without_return():
    # Async function without a return statement
    src = "async def foo():\n    pass"
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

# 2. Edge Test Cases

def test_function_with_return_in_nested_function():
    # Only the outer function is tested; inner function has a return
    src = """
def outer():
    def inner():
        return 1
    pass
"""
    func_node = get_first_function_node(src)
    # The outer function itself does not have a return statement
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_in_nested_if():
    # Return statement inside a nested if block
    src = """
def foo(x):
    if x > 0:
        if x < 10:
            return x
    return
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_in_loop():
    # Return statement inside a for loop
    src = """
def foo(lst):
    for item in lst:
        if item == 0:
            return item
    return None
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_yield_but_no_return():
    # Function with yield but no return
    src = """
def foo():
    for i in range(10):
        yield i
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_in_try_except_finally():
    # Return statements in try, except, and finally blocks
    src = """
def foo(x):
    try:
        return x
    except Exception:
        return -1
    finally:
        return 0
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_only_docstring():
    # Function with only a docstring, no return
    src = '''
def foo():
    """This is a docstring."""
'''
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_in_comprehension():
    # Return statement inside a list comprehension (should not exist, but test for robustness)
    src = """
def foo():
    [x for x in range(10)]
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_in_lambda_inside():
    # Return statement inside a lambda (lambdas can't have 'return', but test for robustness)
    src = """
def foo():
    x = lambda y: y + 1
    pass
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_in_class_method():
    # Function is a method inside a class
    src = """
class Bar:
    def foo(self):
        return 123
"""
    # Parse the class, then get the method node
    tree = ast.parse(src)
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == "foo":
            func_node = node
            break
    else:
        raise AssertionError("Method not found")
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_return_as_expression_statement():
    # 'return' as a variable name, not a statement
    src = """
def foo():
    return_var = 5
    return_var2 = 10
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_early_return_and_code_after():
    # Return statement not at the end
    src = """
def foo():
    if True:
        return 1
    x = 2
    y = 3
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

# 3. Large Scale Test Cases

def test_large_function_with_many_statements_and_one_return():
    # Large function with many statements, only one return at the end
    body = "\n".join([f"x{i} = {i}" for i in range(500)])
    src = f"""
def foo():
{chr(10).join('    ' + line for line in body.splitlines())}
    return 'done'
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_large_function_with_no_return():
    # Large function with many statements, no return
    body = "\n".join([f"x{i} = {i}" for i in range(500)])
    src = f"""
def foo():
{chr(10).join('    ' + line for line in body.splitlines())}
    pass
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_large_function_with_many_returns():
    # Large function with many return statements scattered throughout
    body = ""
    for i in range(100):
        body += f"    if x == {i}:\n        return {i}\n"
    src = f"""
def foo(x):
{body}
    return -1
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_large_async_function_with_return():
    # Large async function with a return statement
    body = "\n".join([f"x{i} = {i}" for i in range(500)])
    src = f"""
async def foo():
{chr(10).join('    ' + line for line in body.splitlines())}
    return 'async done'
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_many_nested_functions_and_only_inner_returns():
    # Outer function with many nested functions, only inner functions have returns
    nested_funcs = "\n".join([
        f"    def inner_{i}():\n        return {i}"
        for i in range(10)
    ])
    src = f"""
def outer():
{nested_funcs}
    pass
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)

def test_function_with_many_nested_functions_and_outer_return():
    # Outer function with many nested functions, outer has a return
    nested_funcs = "\n".join([
        f"    def inner_{i}():\n        return {i}"
        for i in range(10)
    ])
    src = f"""
def outer():
{nested_funcs}
    return 42
"""
    func_node = get_first_function_node(src)
    codeflash_output = function_has_return_statement(func_node)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-function_has_return_statement-maxgutzd and push.

Codeflash

Here’s a faster rewrite. The original code uses `ast.walk`, which traverses the entire subtree, yielding all nodes. Our target is to determine if any `ast.Return` exists; we can short-circuit the search as soon as we find one, so a custom DFS traversal is much faster.


This custom loop avoids constructing and yielding the full list of nodes, and stops immediately once a return is found, improving both speed and memory usage especially for large ASTs.  
All existing comments preserved (none required updating).
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label May 21, 2025
@codeflash-ai codeflash-ai bot requested a review from KRRT7 May 21, 2025 04:52
@KRRT7
Copy link
Contributor

KRRT7 commented May 21, 2025

@misrasaurabh1 what do you think? I like it.

@misrasaurabh1
Copy link
Contributor

yeah the testing seems strong too, we can accept

@KRRT7 KRRT7 merged commit 165b5fb into tracer-optimization May 21, 2025
16 checks passed
@KRRT7 KRRT7 deleted the codeflash/optimize-function_has_return_statement-maxgutzd branch May 21, 2025 05:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants