diff --git a/docs/source/en/api/schedulers/overview.md b/docs/source/en/api/schedulers/overview.md index 2150357cc2b9..af287454e15d 100644 --- a/docs/source/en/api/schedulers/overview.md +++ b/docs/source/en/api/schedulers/overview.md @@ -52,6 +52,7 @@ Many schedulers are implemented from the [k-diffusion](https://github.com/crowso | sgm_uniform | init with `timestep_spacing="trailing"` | | simple | init with `timestep_spacing="trailing"` | | exponential | init with `timestep_spacing="linspace"`, `use_exponential_sigmas=True` | +| beta | init with `timestep_spacing="linspace"`, `use_beta_sigmas=True` | All schedulers are built from the base [`SchedulerMixin`] class which implements low level utilities shared by all schedulers. diff --git a/src/diffusers/schedulers/scheduling_deis_multistep.py b/src/diffusers/schedulers/scheduling_deis_multistep.py index 3b26befac64e..6fe8474aab87 100644 --- a/src/diffusers/schedulers/scheduling_deis_multistep.py +++ b/src/diffusers/schedulers/scheduling_deis_multistep.py @@ -22,10 +22,14 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import deprecate +from ..utils import deprecate, is_scipy_available from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -113,6 +117,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. timestep_spacing (`str`, defaults to `"linspace"`): The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. @@ -141,11 +148,16 @@ def __init__( lower_order_final: bool = True, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -263,6 +275,9 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) else: sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 @@ -396,6 +411,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def convert_model_output( self, model_output: torch.Tensor, diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py index 924eefb0e98d..7677e37e9426 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py @@ -21,11 +21,15 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import deprecate +from ..utils import deprecate, is_scipy_available from ..utils.torch_utils import randn_tensor from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -163,6 +167,9 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. use_lu_lambdas (`bool`, *optional*, defaults to `False`): Whether to use the uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the noise schedule during the sampling process. If `True`, the sigmas and time steps are determined according to a sequence of @@ -209,6 +216,7 @@ def __init__( euler_at_final: bool = False, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, use_lu_lambdas: Optional[bool] = False, final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min" lambda_min_clipped: float = -float("inf"), @@ -217,8 +225,12 @@ def __init__( steps_offset: int = 0, rescale_betas_zero_snr: bool = False, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if algorithm_type in ["dpmsolver", "sde-dpmsolver"]: deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead" deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message) @@ -337,6 +349,8 @@ def set_timesteps( raise ValueError("Cannot use `timesteps` with `config.use_lu_lambdas = True`") if timesteps is not None and self.config.use_exponential_sigmas: raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") if timesteps is not None: timesteps = np.array(timesteps).astype(np.int64) @@ -388,6 +402,9 @@ def set_timesteps( elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) else: sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) @@ -542,6 +559,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def convert_model_output( self, model_output: torch.Tensor, diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py index 4f024b8c4c75..c26a464518f0 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py @@ -21,11 +21,15 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import deprecate +from ..utils import deprecate, is_scipy_available from ..utils.torch_utils import randn_tensor from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -126,6 +130,9 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. lambda_min_clipped (`float`, defaults to `-inf`): Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the cosine (`squaredcos_cap_v2`) noise schedule. @@ -161,13 +168,18 @@ def __init__( euler_at_final: bool = False, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, lambda_min_clipped: float = -float("inf"), variance_type: Optional[str] = None, timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if algorithm_type in ["dpmsolver", "sde-dpmsolver"]: deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead" deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message) @@ -219,6 +231,7 @@ def __init__( self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication self.use_karras_sigmas = use_karras_sigmas self.use_exponential_sigmas = use_exponential_sigmas + self.use_beta_sigmas = use_beta_sigmas @property def step_index(self): @@ -276,6 +289,9 @@ def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torc elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) else: sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigma_max = ( @@ -416,6 +432,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.convert_model_output def convert_model_output( self, diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_sde.py b/src/diffusers/schedulers/scheduling_dpmsolver_sde.py index 3748de63388a..610e8d2d765c 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_sde.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_sde.py @@ -20,9 +20,14 @@ import torchsde from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import is_scipy_available from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + class BatchedBrownianTree: """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" @@ -162,6 +167,9 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. noise_sampler_seed (`int`, *optional*, defaults to `None`): The random seed to use for the noise sampler. If `None`, a random seed is generated. timestep_spacing (`str`, defaults to `"linspace"`): @@ -185,12 +193,17 @@ def __init__( prediction_type: str = "epsilon", use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, noise_sampler_seed: Optional[int] = None, timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -349,6 +362,9 @@ def set_timesteps( elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) second_order_timesteps = self._second_order_timesteps(sigmas, log_sigmas) @@ -451,6 +467,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + @property def state_in_first_order(self): return self.sample is None diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py index 353baf08e81d..3329919cfb02 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py @@ -21,11 +21,14 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import deprecate, logging +from ..utils import deprecate, is_scipy_available, logging from ..utils.torch_utils import randn_tensor from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -125,6 +128,9 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. final_sigmas_type (`str`, *optional*, defaults to `"zero"`): The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0. @@ -157,12 +163,17 @@ def __init__( lower_order_final: bool = False, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min" lambda_min_clipped: float = -float("inf"), variance_type: Optional[str] = None, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if algorithm_type == "dpmsolver": deprecation_message = "algorithm_type `dpmsolver` is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead" deprecate("algorithm_types=dpmsolver", "1.0.0", deprecation_message) @@ -307,6 +318,8 @@ def set_timesteps( raise ValueError("Cannot use `timesteps` when `config.use_karras_sigmas=True`.") if timesteps is not None and self.config.use_exponential_sigmas: raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") num_inference_steps = num_inference_steps or len(timesteps) self.num_inference_steps = num_inference_steps @@ -333,6 +346,9 @@ def set_timesteps( elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) else: sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) @@ -484,6 +500,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def convert_model_output( self, model_output: torch.Tensor, diff --git a/src/diffusers/schedulers/scheduling_heun_discrete.py b/src/diffusers/schedulers/scheduling_heun_discrete.py index efcfdeb1d5ef..cb995df4af59 100644 --- a/src/diffusers/schedulers/scheduling_heun_discrete.py +++ b/src/diffusers/schedulers/scheduling_heun_discrete.py @@ -19,9 +19,14 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import is_scipy_available from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -99,6 +104,9 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. timestep_spacing (`str`, defaults to `"linspace"`): The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. @@ -120,13 +128,18 @@ def __init__( prediction_type: str = "epsilon", use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, clip_sample: Optional[bool] = False, clip_sample_range: float = 1.0, timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -258,6 +271,8 @@ def set_timesteps( raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`") if timesteps is not None and self.config.use_exponential_sigmas: raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") num_inference_steps = num_inference_steps or len(timesteps) self.num_inference_steps = num_inference_steps @@ -296,6 +311,9 @@ def set_timesteps( elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) sigmas = torch.from_numpy(sigmas).to(device=device) @@ -386,6 +404,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + @property def state_in_first_order(self): return self.dt is None diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py index 038aa19603ea..b1ec244e5a79 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py @@ -19,10 +19,15 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import is_scipy_available from ..utils.torch_utils import randn_tensor from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -93,6 +98,9 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. prediction_type (`str`, defaults to `epsilon`, *optional*): Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen @@ -117,12 +125,17 @@ def __init__( trained_betas: Optional[Union[np.ndarray, List[float]]] = None, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, prediction_type: str = "epsilon", timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -258,6 +271,9 @@ def set_timesteps( elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) self.log_sigmas = torch.from_numpy(log_sigmas).to(device) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) @@ -376,6 +392,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + @property def state_in_first_order(self): return self.sample is None diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py index 8fbf66832668..422fe40556f0 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py @@ -19,9 +19,14 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import is_scipy_available from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -92,6 +97,9 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. prediction_type (`str`, defaults to `epsilon`, *optional*): Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen @@ -116,12 +124,17 @@ def __init__( trained_betas: Optional[Union[np.ndarray, List[float]]] = None, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, prediction_type: str = "epsilon", timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -257,6 +270,9 @@ def set_timesteps( elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) self.log_sigmas = torch.from_numpy(log_sigmas).to(device=device) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) @@ -389,6 +405,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def step( self, model_output: Union[torch.Tensor, np.ndarray], diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index 5ef8ffb0dcbf..aed8c5828c75 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -17,6 +17,7 @@ from typing import List, Optional, Tuple, Union import numpy as np +import scipy.stats import torch from scipy import integrate @@ -113,6 +114,9 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. prediction_type (`str`, defaults to `epsilon`, *optional*): Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen @@ -137,12 +141,15 @@ def __init__( trained_betas: Optional[Union[np.ndarray, List[float]]] = None, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, prediction_type: str = "epsilon", timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -297,6 +304,9 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) @@ -392,6 +402,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def step( self, model_output: torch.Tensor, diff --git a/src/diffusers/schedulers/scheduling_sasolver.py b/src/diffusers/schedulers/scheduling_sasolver.py index ad79c69fc714..7188be5caaea 100644 --- a/src/diffusers/schedulers/scheduling_sasolver.py +++ b/src/diffusers/schedulers/scheduling_sasolver.py @@ -22,11 +22,15 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import deprecate +from ..utils import deprecate, is_scipy_available from ..utils.torch_utils import randn_tensor from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -124,6 +128,9 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. lambda_min_clipped (`float`, defaults to `-inf`): Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the cosine (`squaredcos_cap_v2`) noise schedule. @@ -159,13 +166,18 @@ def __init__( lower_order_final: bool = True, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, lambda_min_clipped: float = -float("inf"), variance_type: Optional[str] = None, timestep_spacing: str = "linspace", steps_offset: int = 0, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -292,6 +304,9 @@ def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torc elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) else: sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 @@ -425,6 +440,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def convert_model_output( self, model_output: torch.Tensor, diff --git a/src/diffusers/schedulers/scheduling_unipc_multistep.py b/src/diffusers/schedulers/scheduling_unipc_multistep.py index 78cf0b6d16a7..195e9c8477a2 100644 --- a/src/diffusers/schedulers/scheduling_unipc_multistep.py +++ b/src/diffusers/schedulers/scheduling_unipc_multistep.py @@ -22,10 +22,14 @@ import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import deprecate +from ..utils import deprecate, is_scipy_available from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +if is_scipy_available(): + import scipy.stats + + # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar def betas_for_alpha_bar( num_diffusion_timesteps, @@ -161,6 +165,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): the sigmas are determined according to a sequence of noise levels {σi}. use_exponential_sigmas (`bool`, *optional*, defaults to `False`): Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. timestep_spacing (`str`, defaults to `"linspace"`): The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. @@ -198,13 +205,18 @@ def __init__( solver_p: SchedulerMixin = None, use_karras_sigmas: Optional[bool] = False, use_exponential_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, timestep_spacing: str = "linspace", steps_offset: int = 0, final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min" rescale_betas_zero_snr: bool = False, ): - if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: - raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.") + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) if trained_betas is not None: self.betas = torch.tensor(trained_betas, dtype=torch.float32) elif beta_schedule == "linear": @@ -337,6 +349,9 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic elif self.config.use_exponential_sigmas: sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) else: sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) if self.config.final_sigmas_type == "sigma_min": @@ -480,6 +495,38 @@ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + def convert_model_output( self, model_output: torch.Tensor,