Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/diffusers/utils/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,10 +627,10 @@ def load_numpy(arry: Union[str, np.ndarray], local_path: Optional[str] = None) -
return arry


def load_pt(url: str, map_location: str):
def load_pt(url: str, map_location: Optional[str] = None):
response = requests.get(url, timeout=DIFFUSERS_REQUEST_TIMEOUT)
response.raise_for_status()
arry = torch.load(BytesIO(response.content), map_location=map_location)
arry = torch.load(BytesIO(response.content), map_location=map_location, weights_only=False)
Copy link
Contributor Author

@yao-matrix yao-matrix May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. weights_only default value changed from False to True from PyTorch 2.6, so set explicitly to False here, otherwise tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py::TextToVideoZeroPipelineSlowTests::test_full_model will raise error
  2. set map_location default as None which aligns w. torch.load, otherwise tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py::TextToVideoZeroPipelineSlowTests::test_full_model will raise error

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add weights_only=True as an arg to load_pt and pass in to torch load. In the test itself we can set it to false so that it's clear what's happening in the test. I would avoid doing this under the hood because it is a potential security hole.

Copy link
Contributor Author

@yao-matrix yao-matrix May 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DN6 , done, pls help review, thx.

return arry


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,32 @@
import torch

from diffusers import StableDiffusionKDiffusionPipeline
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)


enable_full_determinism()


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_diffusion_1(self):
sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@
StableDiffusionLDM3DPipeline,
UNet2DConditionModel,
)
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS

Expand Down Expand Up @@ -205,17 +211,17 @@ def test_stable_diffusion_negative_prompt(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
generator = torch.Generator(device=generator_device).manual_seed(seed)
Expand Down Expand Up @@ -256,17 +262,17 @@ def test_ldm3d_stable_diffusion(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionPipelineNightlyTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
generator = torch.Generator(device=generator_device).manual_seed(seed)
Expand Down
14 changes: 10 additions & 4 deletions tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@
StableDiffusionSAGPipeline,
UNet2DConditionModel,
)
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
Expand Down Expand Up @@ -162,19 +168,19 @@ def test_encode_prompt_works_in_isolation(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_diffusion_1(self):
sag_pipe = StableDiffusionSAGPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
Expand Down
26 changes: 18 additions & 8 deletions tests/pipelines/stable_unclip/test_stable_unclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
UNet2DConditionModel,
)
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
load_numpy,
nightly,
require_torch_accelerator,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
Expand Down Expand Up @@ -190,19 +200,19 @@ def test_encode_prompt_works_in_isolation(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_unclip(self):
expected_image = load_numpy(
Expand All @@ -226,9 +236,9 @@ def test_stable_unclip(self):
assert_mean_pixel_difference(image, expected_image)

def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16)
pipe.set_progress_bar_config(disable=None)
Expand All @@ -242,6 +252,6 @@ def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self):
output_type="np",
)

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9
20 changes: 12 additions & 8 deletions tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
floats_tensor,
load_image,
load_numpy,
nightly,
require_torch_gpu,
require_torch_accelerator,
skip_mps,
torch_device,
)
Expand Down Expand Up @@ -213,19 +217,19 @@ def test_encode_prompt_works_in_isolation(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_unclip_l_img2img(self):
input_image = load_image(
Expand Down Expand Up @@ -286,9 +290,9 @@ def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png"
)

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
"fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16
Expand All @@ -304,6 +308,6 @@ def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self):
output_type="np",
)

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9
18 changes: 12 additions & 6 deletions tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,37 @@
import torch

from diffusers import DDIMScheduler, TextToVideoZeroPipeline
from diffusers.utils.testing_utils import load_pt, nightly, require_torch_gpu
from diffusers.utils.testing_utils import (
backend_empty_cache,
load_pt,
nightly,
require_torch_accelerator,
torch_device,
)

from ..test_pipelines_common import assert_mean_pixel_difference


@nightly
@require_torch_gpu
@require_torch_accelerator
class TextToVideoZeroPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_full_model(self):
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
generator = torch.Generator(device="cuda").manual_seed(0)
generator = torch.Generator(device="cpu").manual_seed(0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PRNG will behave different across accelerators, set back to "cpu" for cross-device reproducibility, as other cases in diffusers


prompt = "A bear is playing a guitar on Times Square"
result = pipe(prompt=prompt, generator=generator).images
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@

from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoZeroSDXLPipeline, UNet2DConditionModel
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_accelerate_version_greater,
require_accelerator,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)

Expand Down Expand Up @@ -220,7 +220,7 @@ def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4):
self.assertLess(max_diff, expected_max_difference)

@unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
@require_torch_accelerator
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually it's torch case, use require_torch_accelerator to reflect fact

def test_float16_inference(self, expected_max_diff=5e-2):
components = self.get_dummy_components()
for name, module in components.items():
Expand Down Expand Up @@ -262,7 +262,7 @@ def test_inference_batch_consistent(self):
def test_inference_batch_single_identical(self):
pass

@require_accelerator
@require_torch_accelerator
@require_accelerate_version_greater("0.17.0")
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
components = self.get_dummy_components()
Expand All @@ -285,7 +285,7 @@ def test_pipeline_call_signature(self):
pass

@unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
@require_torch_accelerator
def test_save_load_float16(self, expected_max_diff=1e-2):
components = self.get_dummy_components()
for name, module in components.items():
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_save_load_optional_components(self):
def test_sequential_cpu_offload_forward_pass(self):
pass

@require_accelerator
@require_torch_accelerator
def test_to_device(self):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
Expand Down Expand Up @@ -365,19 +365,19 @@ def test_xformers_attention_forwardGenerator_pass(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class TextToVideoZeroSDXLPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_full_model(self):
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
Expand Down
Loading