⚡️ Speed up function `retrieve_timesteps` by 87% #144

codeflash-ai · 2025-06-01T16:36:25Z

📄 87% (0.87x) speedup for `retrieve_timesteps` in `src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py`

⏱️ Runtime : 627 microseconds → 336 microseconds (best of 284 runs)

📝 Explanation and details

Here’s an optimized rewrite keeping the exact signature, preserving comments, and maximizing efficiency.

Key optimizations.

Avoid repeated set(inspect.signature(...).parameters.keys()) and avoid full signature computation for every call.
Use a local params = scheduler.set_timesteps.__code__.co_varnames which is very fast, and supports almost all cases (works for recent PyTorch/HuggingFace pipelines).
Only fallback to slower inspect.signature if this fails (edge cases: bound methods or if function wrapper disables __code__ access).
Combine post-call lines to reduce stack ops.
Avoid unnecessary intermediate references.
Minor local variable improvements.

This is functionally identical, preserves all comments, and is notably faster for typical scheduler objects due to _accepts_kw avoiding repeated slow inspect.signature calls.

✅ Correctness verification report:

Test	Status
⚙️ Existing Unit Tests	🔘 None Found
🌀 Generated Regression Tests	✅ 22 Passed
⏪ Replay Tests	🔘 None Found
🔎 Concolic Coverage Tests	🔘 None Found
📊 Tests Coverage	100.0%

🌀 Generated Regression Tests Details

import inspect
from typing import List, Optional, Union

# imports
import pytest  # used for our unit tests
import torch
from src.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_inpaint import \
    retrieve_timesteps

# ---- Mock Scheduler Classes ----

class BasicScheduler:
    """A scheduler that only supports num_inference_steps, not timesteps or sigmas."""
    def set_timesteps(self, num_inference_steps, device=None, **kwargs):
        # Save timesteps as a torch tensor of range(num_inference_steps)
        self.timesteps = torch.arange(num_inference_steps, dtype=torch.long, device=device)

class TimestepsScheduler:
    """A scheduler that supports custom timesteps."""
    def set_timesteps(self, timesteps, device=None, **kwargs):
        # Save timesteps as a torch tensor of the provided list
        self.timesteps = torch.tensor(timesteps, dtype=torch.long, device=device)

class SigmasScheduler:
    """A scheduler that supports custom sigmas."""
    def set_timesteps(self, sigmas, device=None, **kwargs):
        # Save timesteps as a torch tensor of the provided sigmas (for test, treat as float timesteps)
        self.timesteps = torch.tensor(sigmas, dtype=torch.float, device=device)

class FullScheduler:
    """A scheduler that supports all: num_inference_steps, timesteps, sigmas."""
    def set_timesteps(self, num_inference_steps=None, timesteps=None, sigmas=None, device=None, **kwargs):
        if timesteps is not None:
            self.timesteps = torch.tensor(timesteps, dtype=torch.long, device=device)
        elif sigmas is not None:
            self.timesteps = torch.tensor(sigmas, dtype=torch.float, device=device)
        elif num_inference_steps is not None:
            self.timesteps = torch.arange(num_inference_steps, dtype=torch.long, device=device)
        else:
            raise ValueError("No valid argument provided to set_timesteps")

# ---- Unit Tests ----

# 1. Basic Test Cases

def test_basic_num_inference_steps():
    # Test with BasicScheduler and num_inference_steps
    scheduler = BasicScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=5)

def test_basic_timesteps():
    # Test with TimestepsScheduler and custom timesteps
    scheduler = TimestepsScheduler()
    custom = [3, 1, 4, 2]
    timesteps, n = retrieve_timesteps(scheduler, timesteps=custom)

def test_basic_sigmas():
    # Test with SigmasScheduler and custom sigmas
    scheduler = SigmasScheduler()
    custom = [0.1, 0.5, 1.0]
    timesteps, n = retrieve_timesteps(scheduler, sigmas=custom)

def test_full_scheduler_with_num_inference_steps():
    # Test with FullScheduler and num_inference_steps
    scheduler = FullScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=4)

def test_full_scheduler_with_timesteps():
    # Test with FullScheduler and custom timesteps
    scheduler = FullScheduler()
    custom = [7, 8, 9]
    timesteps, n = retrieve_timesteps(scheduler, timesteps=custom)

def test_full_scheduler_with_sigmas():
    # Test with FullScheduler and custom sigmas
    scheduler = FullScheduler()
    custom = [0.2, 0.4]
    timesteps, n = retrieve_timesteps(scheduler, sigmas=custom)

# 2. Edge Test Cases

def test_timesteps_and_sigmas_raises():
    # Should raise ValueError if both timesteps and sigmas are provided
    scheduler = FullScheduler()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, timesteps=[1,2], sigmas=[0.1, 0.2])

def test_timesteps_not_supported_raises():
    # Should raise ValueError if scheduler does not support timesteps
    scheduler = BasicScheduler()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, timesteps=[1,2,3])

def test_sigmas_not_supported_raises():
    # Should raise ValueError if scheduler does not support sigmas
    scheduler = TimestepsScheduler()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, sigmas=[0.1, 0.2])

def test_no_num_inference_steps_provided():
    # Should raise error if no argument is provided and scheduler expects num_inference_steps
    scheduler = BasicScheduler()
    with pytest.raises(TypeError):
        # set_timesteps will be called with None, which is invalid for range()
        retrieve_timesteps(scheduler)

def test_empty_timesteps():
    # Should handle empty timesteps
    scheduler = FullScheduler()
    timesteps, n = retrieve_timesteps(scheduler, timesteps=[])

def test_empty_sigmas():
    # Should handle empty sigmas
    scheduler = FullScheduler()
    timesteps, n = retrieve_timesteps(scheduler, sigmas=[])

def test_device_cpu():
    # Should move tensor to CPU if device is 'cpu'
    scheduler = FullScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=3, device='cpu')

@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
def test_device_cuda():
    # Should move tensor to CUDA if device is 'cuda'
    scheduler = FullScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=2, device='cuda')

def test_kwargs_passed_to_scheduler():
    # Should pass kwargs to scheduler.set_timesteps
    class KwargsScheduler:
        def set_timesteps(self, num_inference_steps=None, foo=None, **kwargs):
            # Save foo as attribute for test
            self.foo = foo
            self.timesteps = torch.arange(num_inference_steps, dtype=torch.long)
    scheduler = KwargsScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=2, foo='bar')

def test_sigmas_and_num_inference_steps_mutual_exclusion():
    # Should ignore num_inference_steps if sigmas is provided
    scheduler = FullScheduler()
    sigmas = [0.1, 0.2, 0.3]
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=10, sigmas=sigmas)

def test_timesteps_and_num_inference_steps_mutual_exclusion():
    # Should ignore num_inference_steps if timesteps is provided
    scheduler = FullScheduler()
    timesteps_list = [5, 7, 8]
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=10, timesteps=timesteps_list)

def test_sigmas_and_timesteps_mutual_exclusion():
    # Should ignore sigmas if timesteps is provided (but raises ValueError instead)
    scheduler = FullScheduler()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, timesteps=[1], sigmas=[0.1])

# 3. Large Scale Test Cases

def test_large_num_inference_steps():
    # Test with large num_inference_steps (1000)
    scheduler = BasicScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=1000)

def test_large_timesteps():
    # Test with large custom timesteps (1000 elements)
    scheduler = FullScheduler()
    custom = list(range(1000, 2000))
    timesteps, n = retrieve_timesteps(scheduler, timesteps=custom)

def test_large_sigmas():
    # Test with large custom sigmas (1000 elements)
    scheduler = FullScheduler()
    custom = [float(i)/1000 for i in range(1000)]
    timesteps, n = retrieve_timesteps(scheduler, sigmas=custom)

def test_large_device_cpu():
    # Large scale with device specified
    scheduler = FullScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=999, device='cpu')

def test_large_kwargs():
    # Pass large kwargs to scheduler
    class LargeKwargsScheduler:
        def set_timesteps(self, num_inference_steps=None, foo=None, bar=None, **kwargs):
            self.foo = foo
            self.bar = bar
            self.timesteps = torch.arange(num_inference_steps, dtype=torch.long)
    scheduler = LargeKwargsScheduler()
    timesteps, n = retrieve_timesteps(scheduler, num_inference_steps=500, foo='foo', bar='bar')
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

import inspect
from typing import List, Optional, Union

# imports
import pytest  # used for our unit tests
import torch
from src.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_inpaint import \
    retrieve_timesteps

# ---------------------------
# Unit Tests for retrieve_timesteps
# ---------------------------

# Mock scheduler classes for testing
class SchedulerBasic:
    def __init__(self):
        self.timesteps = None
        self.called_args = None

    def set_timesteps(self, num_inference_steps, device=None, **kwargs):
        # Sets timesteps as a tensor from num_inference_steps down to 1
        self.called_args = (num_inference_steps, device)
        self.timesteps = torch.arange(num_inference_steps-1, -1, -1, dtype=torch.long)
        if device is not None:
            self.timesteps = self.timesteps.to(device)

class SchedulerTimesteps:
    def __init__(self):
        self.timesteps = None
        self.called_args = None

    def set_timesteps(self, timesteps, device=None, **kwargs):
        # Sets timesteps as a tensor from the provided list
        self.called_args = (timesteps, device)
        self.timesteps = torch.tensor(timesteps, dtype=torch.long)
        if device is not None:
            self.timesteps = self.timesteps.to(device)

class SchedulerSigmas:
    def __init__(self):
        self.timesteps = None
        self.called_args = None

    def set_timesteps(self, sigmas, device=None, **kwargs):
        # Sets timesteps as a tensor from the provided sigmas (as floats)
        self.called_args = (sigmas, device)
        self.timesteps = torch.tensor(sigmas, dtype=torch.float)
        if device is not None:
            self.timesteps = self.timesteps.to(device)

class SchedulerTimestepsAndSigmas:
    def __init__(self):
        self.timesteps = None
        self.called_args = None

    def set_timesteps(self, num_inference_steps=None, timesteps=None, sigmas=None, device=None, **kwargs):
        # Accepts all three, but only one is used at a time
        if timesteps is not None:
            self.timesteps = torch.tensor(timesteps, dtype=torch.long)
        elif sigmas is not None:
            self.timesteps = torch.tensor(sigmas, dtype=torch.float)
        elif num_inference_steps is not None:
            self.timesteps = torch.arange(num_inference_steps-1, -1, -1, dtype=torch.long)
        else:
            self.timesteps = torch.tensor([], dtype=torch.long)
        if device is not None:
            self.timesteps = self.timesteps.to(device)

class SchedulerNoTimesteps:
    def __init__(self):
        self.timesteps = None

    def set_timesteps(self, num_inference_steps, device=None, **kwargs):
        # Only accepts num_inference_steps, not timesteps or sigmas
        self.timesteps = torch.arange(num_inference_steps-1, -1, -1, dtype=torch.long)
        if device is not None:
            self.timesteps = self.timesteps.to(device)

class SchedulerNoSigmas:
    def __init__(self):
        self.timesteps = None

    def set_timesteps(self, num_inference_steps=None, timesteps=None, device=None, **kwargs):
        # Accepts timesteps but not sigmas
        if timesteps is not None:
            self.timesteps = torch.tensor(timesteps, dtype=torch.long)
        elif num_inference_steps is not None:
            self.timesteps = torch.arange(num_inference_steps-1, -1, -1, dtype=torch.long)
        else:
            self.timesteps = torch.tensor([], dtype=torch.long)
        if device is not None:
            self.timesteps = self.timesteps.to(device)

# ---------------------------
# Basic Test Cases
# ---------------------------

def test_basic_num_inference_steps():
    # Test with a basic scheduler and num_inference_steps
    scheduler = SchedulerBasic()
    timesteps, n_steps = retrieve_timesteps(scheduler, num_inference_steps=5)

def test_basic_timesteps_list():
    # Test with a scheduler that accepts custom timesteps
    scheduler = SchedulerTimesteps()
    custom_timesteps = [10, 5, 1]
    timesteps, n_steps = retrieve_timesteps(scheduler, timesteps=custom_timesteps)

def test_basic_sigmas_list():
    # Test with a scheduler that accepts custom sigmas
    scheduler = SchedulerSigmas()
    custom_sigmas = [0.1, 0.2, 0.3]
    timesteps, n_steps = retrieve_timesteps(scheduler, sigmas=custom_sigmas)

def test_basic_all_args_none():
    # Test with all optional arguments as None (should raise TypeError)
    scheduler = SchedulerBasic()
    with pytest.raises(TypeError):
        retrieve_timesteps(scheduler)

def test_basic_device_cpu():
    # Test device argument: ensure timesteps are on CPU
    scheduler = SchedulerBasic()
    timesteps, _ = retrieve_timesteps(scheduler, num_inference_steps=4, device="cpu")

def test_basic_kwargs_passed():
    # Test that kwargs are passed through to set_timesteps
    class SchedulerWithKwargs:
        def __init__(self):
            self.timesteps = None
            self.kwargs = None
        def set_timesteps(self, num_inference_steps, device=None, foo=None, **kwargs):
            self.timesteps = torch.arange(num_inference_steps-1, -1, -1, dtype=torch.long)
            self.kwargs = foo
    scheduler = SchedulerWithKwargs()
    retrieve_timesteps(scheduler, num_inference_steps=3, foo="bar")

# ---------------------------
# Edge Test Cases
# ---------------------------

def test_edge_timesteps_and_sigmas_both():
    # Should raise ValueError if both timesteps and sigmas are provided
    scheduler = SchedulerTimestepsAndSigmas()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, timesteps=[1,2,3], sigmas=[0.1,0.2,0.3])

def test_edge_timesteps_not_supported():
    # Should raise ValueError if scheduler does not accept timesteps
    scheduler = SchedulerNoTimesteps()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, timesteps=[1,2,3])

def test_edge_sigmas_not_supported():
    # Should raise ValueError if scheduler does not accept sigmas
    scheduler = SchedulerNoSigmas()
    with pytest.raises(ValueError):
        retrieve_timesteps(scheduler, sigmas=[0.1,0.2,0.3])

def test_edge_empty_timesteps():
    # Should work and return empty tensor and n_steps=0
    scheduler = SchedulerTimesteps()
    timesteps, n_steps = retrieve_timesteps(scheduler, timesteps=[])

def test_edge_empty_sigmas():
    # Should work and return empty tensor and n_steps=0
    scheduler = SchedulerSigmas()
    timesteps, n_steps = retrieve_timesteps(scheduler, sigmas=[])

def test_edge_zero_inference_steps():
    # num_inference_steps=0 should return empty tensor and n_steps=0
    scheduler = SchedulerBasic()
    timesteps, n_steps = retrieve_timesteps(scheduler, num_inference_steps=0)


def test_edge_non_integer_timesteps():
    # Should work if timesteps are floats (should cast to long)
    scheduler = SchedulerTimesteps()
    custom_timesteps = [1.5, 2.5, 3.5]
    timesteps, n_steps = retrieve_timesteps(scheduler, timesteps=custom_timesteps)

def test_edge_non_integer_sigmas():
    # Should work if sigmas are integers (should cast to float)
    scheduler = SchedulerSigmas()
    custom_sigmas = [1, 2, 3]
    timesteps, n_steps = retrieve_timesteps(scheduler, sigmas=custom_sigmas)

def test_edge_device_cuda_if_available():
    # Only run this test if CUDA is available
    if torch.cuda.is_available():
        scheduler = SchedulerBasic()
        timesteps, _ = retrieve_timesteps(scheduler, num_inference_steps=3, device="cuda")

def test_edge_device_torch_device_obj():
    # Pass a torch.device object as device argument
    scheduler = SchedulerBasic()
    device = torch.device("cpu")
    timesteps, _ = retrieve_timesteps(scheduler, num_inference_steps=2, device=device)

def test_edge_kwargs_override():
    # Ensure kwargs can override device argument in scheduler
    class SchedulerWithDeviceOverride:
        def __init__(self):
            self.timesteps = None
            self.device = None
        def set_timesteps(self, num_inference_steps, device=None, **kwargs):
            # device can be overridden by kwargs['device']
            self.device = kwargs.get('device', device)
            self.timesteps = torch.arange(num_inference_steps-1, -1, -1, dtype=torch.long)
            if self.device is not None:
                self.timesteps = self.timesteps.to(self.device)
    scheduler = SchedulerWithDeviceOverride()
    retrieve_timesteps(scheduler, num_inference_steps=2, device="cpu", device2="cpu")

# ---------------------------
# Large Scale Test Cases
# ---------------------------

def test_large_num_inference_steps():
    # Test with a large number of inference steps (e.g., 1000)
    scheduler = SchedulerBasic()
    timesteps, n_steps = retrieve_timesteps(scheduler, num_inference_steps=1000)

def test_large_custom_timesteps():
    # Test with a large custom timesteps list (length 1000)
    scheduler = SchedulerTimesteps()
    custom_timesteps = list(range(1000))
    timesteps, n_steps = retrieve_timesteps(scheduler, timesteps=custom_timesteps)

def test_large_custom_sigmas():
    # Test with a large custom sigmas list (length 1000)
    scheduler = SchedulerSigmas()
    custom_sigmas = [float(i)/1000.0 for i in range(1000)]
    timesteps, n_steps = retrieve_timesteps(scheduler, sigmas=custom_sigmas)

def test_large_device_cpu():
    # Test that large tensors are placed on CPU as requested
    scheduler = SchedulerBasic()
    timesteps, _ = retrieve_timesteps(scheduler, num_inference_steps=1000, device="cpu")

def test_large_device_cuda_if_available():
    # Test that large tensors can be placed on CUDA if available
    if torch.cuda.is_available():
        scheduler = SchedulerBasic()
        timesteps, _ = retrieve_timesteps(scheduler, num_inference_steps=1000, device="cuda")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-retrieve_timesteps-mbdvurzg and push.

Here’s an optimized rewrite keeping the exact signature, preserving comments, and maximizing efficiency. ### Key optimizations. - Avoid repeated `set(inspect.signature(...).parameters.keys())` and avoid full signature computation for every call. - Use a local `params = scheduler.set_timesteps.__code__.co_varnames` which is very fast, and supports almost all cases (works for recent PyTorch/HuggingFace pipelines). - Only fallback to slower `inspect.signature` if this fails (edge cases: bound methods or if function wrapper disables `__code__` access). - Combine post-call lines to reduce stack ops. - Avoid unnecessary intermediate references. - Minor local variable improvements. **This is functionally identical, preserves all comments, and is notably faster for typical scheduler objects due to _accepts_kw avoiding repeated slow `inspect.signature` calls.**

codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Jun 1, 2025

codeflash-ai bot requested a review from aseembits93 June 1, 2025 16:36

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

⚡️ Speed up function `retrieve_timesteps` by 87% #144

⚡️ Speed up function `retrieve_timesteps` by 87% #144

Uh oh!

codeflash-ai bot commented Jun 1, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

0 participants

⚡️ Speed up function retrieve_timesteps by 87% #144

Are you sure you want to change the base?

⚡️ Speed up function retrieve_timesteps by 87% #144

Uh oh!

Conversation

codeflash-ai bot commented Jun 1, 2025

📄 87% (0.87x) speedup for retrieve_timesteps in src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py

📝 Explanation and details

Key optimizations.

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

0 participants

⚡️ Speed up function `retrieve_timesteps` by 87% #144

⚡️ Speed up function `retrieve_timesteps` by 87% #144

📄 87% (0.87x) speedup for `retrieve_timesteps` in `src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py`