Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Jun 1, 2025

📄 29% (0.29x) speedup for downscale_height_and_width in src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py

⏱️ Runtime : 40.0 microseconds 31.0 microseconds (best of 593 runs)

📝 Explanation and details

Explanation of optimizations:

  • Store scale_factor * scale_factor in a local variable sf2, to avoid redundant exponentiation.
  • Use the "ceiling division" trick -(-x // y) to combine floor division and increment for remainder into a single (and branchless) step, thus reducing the need for modulo and if-statements.
  • This code is faster due to minimized operations and no branching on the critical path.
  • Return value and behavior remain identical to the original.
  • Comment retained with added note for the optimization applied.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 52 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests Details
import pytest  # used for our unit tests
from src.diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2_controlnet import \
    downscale_height_and_width

# unit tests

# ========== BASIC TEST CASES ==========

@pytest.mark.parametrize(
    "height, width, scale_factor, expected",
    [
        # Standard divisible case
        (64, 64, 8, (64, 64)),
        # Not divisible, expect rounding up
        (65, 65, 8, (72, 72)),
        # Different height and width
        (128, 64, 8, (128, 64)),
        # scale_factor=1, should return original
        (32, 32, 1, (32, 32)),
        # scale_factor=2, divisible
        (16, 16, 2, (16, 16)),
        # scale_factor=2, not divisible
        (17, 17, 2, (18, 18)),
        # scale_factor=4, divisible
        (64, 32, 4, (64, 32)),
        # scale_factor=4, not divisible
        (65, 33, 4, (68, 36)),
        # Large but still basic
        (256, 256, 8, (256, 256)),
    ]
)
def test_downscale_height_and_width_basic(height, width, scale_factor, expected):
    """Test typical cases for correct rounding and scaling."""
    codeflash_output = downscale_height_and_width(height, width, scale_factor); result = codeflash_output

# ========== EDGE TEST CASES ==========

@pytest.mark.parametrize(
    "height, width, scale_factor, expected",
    [
        # Minimum size, height and width = 0
        (0, 0, 8, (0, 0)),
        # Minimum size, height=1, width=1
        (1, 1, 8, (8, 8)),
        # Height < scale_factor**2, width < scale_factor**2
        (10, 10, 4, (16, 16)),
        # Height=scale_factor**2-1, width=scale_factor**2-1
        (63, 63, 8, (64, 64)),
        # Height=scale_factor**2, width=scale_factor**2
        (64, 64, 8, (64, 64)),
        # Height=scale_factor**2+1, width=scale_factor**2+1
        (65, 65, 8, (72, 72)),
        # Large scale_factor, small input
        (5, 5, 16, (16, 16)),
        # scale_factor=1, small input
        (3, 7, 1, (3, 7)),
        # scale_factor larger than input
        (5, 5, 10, (10, 10)),
        # scale_factor=2, height=0, width=1
        (0, 1, 2, (0, 2)),
        # scale_factor=2, height=1, width=0
        (1, 0, 2, (2, 0)),
        # scale_factor=2, height=0, width=0
        (0, 0, 2, (0, 0)),
    ]
)
def test_downscale_height_and_width_edge(height, width, scale_factor, expected):
    """Test edge cases including zeros, small numbers, and scale_factor > input."""
    codeflash_output = downscale_height_and_width(height, width, scale_factor); result = codeflash_output

def test_downscale_height_and_width_invalid_scale_factor():
    """Test invalid scale_factor (zero and negative). Should raise ZeroDivisionError or ValueError."""
    # scale_factor=0: division by zero
    with pytest.raises(ZeroDivisionError):
        downscale_height_and_width(10, 10, 0)
    # scale_factor < 0: negative scaling, should still work mathematically
    codeflash_output = downscale_height_and_width(10, 10, -2); result = codeflash_output

@pytest.mark.parametrize(
    "height, width, scale_factor",
    [
        (10.5, 10, 2),  # float height
        (10, 10.5, 2),  # float width
        ("10", 10, 2),  # str height
        (10, "10", 2),  # str width
        (10, 10, "2"),  # str scale_factor
    ]
)
def test_downscale_height_and_width_type_errors(height, width, scale_factor):
    """Test that non-integer inputs raise TypeError."""
    with pytest.raises(TypeError):
        downscale_height_and_width(height, width, scale_factor)

# ========== LARGE SCALE TEST CASES ==========

@pytest.mark.parametrize(
    "height, width, scale_factor",
    [
        # Large, but under 1000x1000
        (999, 999, 8),
        # Large, but not divisible
        (1000, 1000, 8),
        # Large, non-square
        (512, 987, 8),
        # Large, scale_factor=16
        (960, 960, 16),
        # Maximum allowed (to keep under 100MB if used as tensor shape)
        (1024, 768, 8),
    ]
)
def test_downscale_height_and_width_large(height, width, scale_factor):
    """Test the function with large input sizes for performance and correctness."""
    codeflash_output = downscale_height_and_width(height, width, scale_factor); result = codeflash_output

def test_downscale_height_and_width_large_variety():
    """Test a variety of large inputs in a loop (under 1000 elements)."""
    for h in range(900, 1000, 25):
        for w in range(900, 1000, 25):
            for sf in (4, 8, 16):
                codeflash_output = downscale_height_and_width(h, w, sf); res = codeflash_output

# ========== FUNCTIONALITY TESTS FOR MUTATION RESISTANCE ==========

def test_mutation_resistance_divisibility():
    """Changing // to / or removing rounding up should fail this test."""
    # height=65, width=65, scale_factor=8, scale_factor**2=64
    # 65//64 = 1, 65%64=1, so new_height=2, result=2*8=16
    # Should be (16, 16)
    codeflash_output = downscale_height_and_width(65, 65, 8)
    # 128//64=2, 128%64=0, so new_height=2, result=2*8=16
    codeflash_output = downscale_height_and_width(128, 128, 8)  # Actually, this is incorrect. Let's check:
    # 128//64=2, 128%64=0, so new_height=2, result=2*8=16
    # But the function returns (16, 16), but the input is 128, so it should be (16, 16)
    # Wait, the function returns new_height*scale_factor, so 2*8=16
    # But the input is 128, so this is a huge downscale. Let's check if that's correct.
    # Let's check with scale_factor=8, 128//64=2, 128%64=0, so new_height=2, result=2*8=16
    # So yes, (16, 16)
    # Try with 127, 127//64=1, 127%64=63, so new_height=2, result=2*8=16
    codeflash_output = downscale_height_and_width(127, 127, 8)

def test_mutation_resistance_rounding():
    """Test that rounding up works correctly for non-divisible inputs."""
    # 70//64=1, 70%64=6, so new_height=2, result=2*8=16
    codeflash_output = downscale_height_and_width(70, 70, 8)
    # 130//64=2, 130%64=2, so new_height=3, result=3*8=24
    codeflash_output = downscale_height_and_width(130, 130, 8)
    # 255//64=3, 255%64=63, so new_height=4, result=4*8=32
    codeflash_output = downscale_height_and_width(255, 255, 8)

def test_mutation_resistance_asymmetry():
    """Test that height and width are handled independently."""
    # height=70, width=128, scale_factor=8
    # height: 70//64=1, 70%64=6 -> new_height=2, 2*8=16
    # width: 128//64=2, 128%64=0 -> new_width=2, 2*8=16
    codeflash_output = downscale_height_and_width(70, 128, 8)
    # height=128, width=70
    codeflash_output = downscale_height_and_width(128, 70, 8)
    # height=64, width=70
    codeflash_output = downscale_height_and_width(64, 70, 8)
    # height=70, width=64
    codeflash_output = downscale_height_and_width(70, 64, 8)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

import pytest  # used for our unit tests
from src.diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2_controlnet import \
    downscale_height_and_width

# unit tests

# ---------------------------
# 1. BASIC TEST CASES
# ---------------------------

def test_exact_multiple_of_scale_factor_squared():
    # height and width are exact multiples of scale_factor**2 (8**2=64)
    # Should return the same values as input
    codeflash_output = downscale_height_and_width(64, 128, scale_factor=8)
    codeflash_output = downscale_height_and_width(128, 64, scale_factor=8)
    codeflash_output = downscale_height_and_width(256, 256, scale_factor=8)

def test_not_multiple_of_scale_factor_squared():
    # height and width not multiples of scale_factor**2, should be rounded up
    # 70 // 64 = 1, 70 % 64 = 6 -> new_height = 2*8 = 16
    codeflash_output = downscale_height_and_width(70, 70, scale_factor=8)
    # 130 // 64 = 2, 130 % 64 = 2 -> new_height = 3*8 = 24
    codeflash_output = downscale_height_and_width(130, 130, scale_factor=8)
    # 65 // 64 = 1, 65 % 64 = 1 -> new_height = 2*8 = 16
    codeflash_output = downscale_height_and_width(65, 65, scale_factor=8)

def test_scale_factor_1():
    # scale_factor=1 should return the same as input
    codeflash_output = downscale_height_and_width(10, 20, scale_factor=1)
    codeflash_output = downscale_height_and_width(100, 200, scale_factor=1)

def test_scale_factor_2():
    # scale_factor=2, scale_factor**2=4
    # 7 // 4 = 1, 7 % 4 = 3 -> new_height = 2*2=4
    codeflash_output = downscale_height_and_width(7, 7, scale_factor=2)
    # 8 // 4 = 2, 8%4=0 -> new_height = 2*2=4
    codeflash_output = downscale_height_and_width(8, 8, scale_factor=2)

def test_non_square_input():
    # Non-square input, different height and width
    codeflash_output = downscale_height_and_width(70, 130, scale_factor=8)
    codeflash_output = downscale_height_and_width(130, 70, scale_factor=8)

# ---------------------------
# 2. EDGE TEST CASES
# ---------------------------

def test_zero_height_and_width():
    # Zero height and width
    codeflash_output = downscale_height_and_width(0, 0, scale_factor=8)

def test_height_or_width_zero():
    # One dimension zero, one nonzero
    codeflash_output = downscale_height_and_width(0, 64, scale_factor=8)
    codeflash_output = downscale_height_and_width(64, 0, scale_factor=8)

def test_height_or_width_less_than_scale_factor_squared():
    # Both less than scale_factor**2 (64)
    # 1 // 64 = 0, 1%64=1 -> new_height = 1*8=8
    codeflash_output = downscale_height_and_width(1, 1, scale_factor=8)
    # 63 // 64 = 0, 63%64=63 -> new_height = 1*8=8
    codeflash_output = downscale_height_and_width(63, 63, scale_factor=8)
    # 0 // 64 = 0, 0%64=0 -> new_height = 0*8=0
    codeflash_output = downscale_height_and_width(0, 63, scale_factor=8)
    codeflash_output = downscale_height_and_width(63, 0, scale_factor=8)

def test_height_and_width_just_above_multiple():
    # Just above a multiple of scale_factor**2
    # 65 // 64 = 1, 65%64=1 -> new_height = 2*8=16
    codeflash_output = downscale_height_and_width(65, 65, scale_factor=8)
    # 129 // 64 = 2, 129%64=1 -> new_height = 3*8=24
    codeflash_output = downscale_height_and_width(129, 129, scale_factor=8)

def test_minimum_scale_factor():
    # scale_factor=1, should always return input
    codeflash_output = downscale_height_and_width(0, 0, scale_factor=1)
    codeflash_output = downscale_height_and_width(1, 1, scale_factor=1)
    codeflash_output = downscale_height_and_width(100, 100, scale_factor=1)

def test_large_scale_factor():
    # Very large scale_factor, larger than input
    # 10 // 100**2 = 0, 10%10000=10 -> new_height = 1*100=100
    codeflash_output = downscale_height_and_width(10, 10, scale_factor=100)
    # 10000 // 100**2 = 1, 10000%10000=0 -> new_height = 1*100=100
    codeflash_output = downscale_height_and_width(10000, 10000, scale_factor=100)

def test_negative_inputs():
    # Negative height or width
    # Should still compute, but return negative multiples
    codeflash_output = downscale_height_and_width(-64, 64, scale_factor=8)
    codeflash_output = downscale_height_and_width(64, -64, scale_factor=8)
    codeflash_output = downscale_height_and_width(-65, -65, scale_factor=8)

def test_zero_scale_factor():
    # scale_factor=0 should raise ZeroDivisionError
    with pytest.raises(ZeroDivisionError):
        downscale_height_and_width(10, 10, scale_factor=0)


def test_non_numeric_inputs():
    # Non-numeric inputs should raise TypeError
    with pytest.raises(TypeError):
        downscale_height_and_width("10", 20, scale_factor=8)
    with pytest.raises(TypeError):
        downscale_height_and_width(10, "20", scale_factor=8)
    with pytest.raises(TypeError):
        downscale_height_and_width(10, 20, scale_factor="8")

# ---------------------------
# 3. LARGE SCALE TEST CASES
# ---------------------------

def test_large_inputs_exact_multiple():
    # Large input, exact multiple of scale_factor**2
    # 800*800 is 640000, 8**2=64, 640000//64=10000
    # new_height = 10000*8 = 80000
    height = 64 * 10
    width = 64 * 15
    scale_factor = 8
    expected = (height, width)
    codeflash_output = downscale_height_and_width(height, width, scale_factor=scale_factor)

def test_large_inputs_not_multiple():
    # Large input, not a multiple, should round up
    height = 999
    width = 888
    scale_factor = 8
    # 999//64=15, 999%64=39 -> new_height=16*8=128
    # 888//64=13, 888%64=56 -> new_width=14*8=112
    codeflash_output = downscale_height_and_width(height, width, scale_factor)

def test_maximum_acceptable_input_size():
    # Largest allowed under 1000 elements (for height/width)
    height = 999
    width = 999
    scale_factor = 8
    # 999//64=15, 999%64=39 -> new_height=16*8=128
    codeflash_output = downscale_height_and_width(height, width, scale_factor)

def test_large_scale_factor_with_large_input():
    # Large scale_factor, large input
    height = 900
    width = 900
    scale_factor = 30
    # 900//900=1, 900%900=0 -> new_height=1*30=30
    codeflash_output = downscale_height_and_width(height, width, scale_factor)

def test_large_prime_input_and_scale():
    # Large prime numbers for height/width and scale_factor
    height = 997
    width = 991
    scale_factor = 7
    # scale_factor**2 = 49
    # 997//49=20, 997%49=17 -> new_height=21*7=147
    # 991//49=20, 991%49=11 -> new_width=21*7=147
    codeflash_output = downscale_height_and_width(height, width, scale_factor)

# ---------------------------
# 4. PARAMETRIZED TESTS FOR COVERAGE
# ---------------------------

@pytest.mark.parametrize(
    "height,width,scale_factor,expected",
    [
        # multiples
        (64, 64, 8, (64, 64)),
        (128, 128, 8, (128, 128)),
        # not multiples
        (65, 65, 8, (16, 16)),
        (130, 130, 8, (24, 24)),
        # edge
        (0, 0, 8, (0, 0)),
        (1, 1, 8, (8, 8)),
        (999, 999, 8, (128, 128)),
        # scale_factor=2
        (7, 7, 2, (4, 4)),
        (8, 8, 2, (4, 4)),
        # negative
        (-64, -64, 8, (-64, -64)),
        (-65, -65, 8, (-16, -16)),
    ]
)
def test_parametrized_cases(height, width, scale_factor, expected):
    # Parametrized test for various scenarios
    codeflash_output = downscale_height_and_width(height, width, scale_factor)

# ---------------------------
# 5. TYPE CHECKING (optional, for mutation testing)
# ---------------------------

def test_type_checking():
    # Should raise TypeError for non-integer types
    with pytest.raises(TypeError):
        downscale_height_and_width([10], 20, 8)
    with pytest.raises(TypeError):
        downscale_height_and_width(10, {20: 1}, 8)
    with pytest.raises(TypeError):
        downscale_height_and_width(10, 20, None)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-downscale_height_and_width-mbdxg4km and push.

Codeflash

**Explanation of optimizations:**

- Store `scale_factor * scale_factor` in a local variable `sf2`, to avoid redundant exponentiation.
- Use the "ceiling division" trick `-(-x // y)` to combine floor division and increment for remainder into a single (and branchless) step, thus reducing the need for modulo and if-statements.
- This code is faster due to minimized operations and no branching on the critical path.  
- Return value and behavior remain identical to the original.  
- Comment retained with added note for the optimization applied.
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Jun 1, 2025
@codeflash-ai codeflash-ai bot requested a review from aseembits93 June 1, 2025 17:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

0 participants