Skip to content

OSError: [Errno 28] No space left on device - Controlnet + Inpainting-SDXL #4957

@andysingal

Description

@andysingal

Describe the bug

  • even after using xformers it is not able to run the model

Reproduction

from diffusers import StableDiffusionControlNetInpaintPipeline

controlnet = [
    ControlNetModel.from_pretrained(
        "diffusers/controlnet-zoe-depth-sdxl-1.0" ,
        torch_dtype=torch.float16
        )
    ]

pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
    "diffusers/stable-diffusion-xl-1.0-inpainting-0.1" ,
    controlnet=controlnet,
    torch_dtype=torch.float16
    )

pipe.to( "cuda" )
pipe.enable_xformers_memory_efficient_attention()

init_image = load_image( "https://media.vogue.fr/photos/62bf04b69a57673c725432f3/3:2/w_1793,h_1195,c_limit/rev-1-Barbie-InstaVert_High_Res_JPEG.jpeg" )
zoe_image = load_image( "/workspace/zoe_depth.png" )
mask_image = load_image( "/workspace/mask.png" )

prompt = "beautiful japanese woman with smile, long hair, 8k, RAW photo, best quality, masterpiece, photo-realistic, focus, professional lighting" 
negative_prompt = "worst quality, low quality" 
strength= 0.95 
controlnet_conditioning_scale = 0.45

import os
os.makedirs( "results" , exist_ok= True )

for i in  range (4):
    seed = 10000 * (i + 1 )
    generator = torch.manual_seed(seed)
    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=init_image,
        mask_image=mask_image,
        control_image=[zoe_image],
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        strength=strength,
        generator=generator
        ).images[ 0 ]

    image.save(os.path.join( "results" ,f"/workspace/outputs/{seed}.png" ))

Logs

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[9], line 13
      3 from diffusers import ControlNetModel, StableDiffusionControlNetInpaintPipeline
      6 controlnet = [
      7     ControlNetModel.from_pretrained(
      8         "diffusers/controlnet-zoe-depth-sdxl-1.0" ,
      9         torch_dtype=torch.float16
     10         )
     11     ]
---> 13 pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
     14     "diffusers/stable-diffusion-xl-1.0-inpainting-0.1" ,
     15     controlnet=controlnet,
     16     torch_dtype=torch.float16
     17     )
     19 pipe.to( "cuda" )
     20 pipe.enable_xformers_memory_efficient_attention()

File /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/pipeline_utils.py:932, in DiffusionPipeline.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    929 # 1. Download the checkpoints and configs
    930 # use snapshot download here to get it working from from_pretrained
    931 if not os.path.isdir(pretrained_model_name_or_path):
--> 932     cached_folder = cls.download(
    933         pretrained_model_name_or_path,
    934         cache_dir=cache_dir,
    935         resume_download=resume_download,
    936         force_download=force_download,
    937         proxies=proxies,
    938         local_files_only=local_files_only,
    939         use_auth_token=use_auth_token,
    940         revision=revision,
    941         from_flax=from_flax,
    942         use_safetensors=use_safetensors,
    943         custom_pipeline=custom_pipeline,
    944         custom_revision=custom_revision,
    945         variant=variant,
    946         load_connected_pipeline=load_connected_pipeline,
    947         **kwargs,
    948     )
    949 else:
    950     cached_folder = pretrained_model_name_or_path

File /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/pipeline_utils.py:1507, in DiffusionPipeline.download(cls, pretrained_model_name, **kwargs)
   1505 # download all allow_patterns - ignore_patterns
   1506 try:
-> 1507     cached_folder = snapshot_download(
   1508         pretrained_model_name,
   1509         cache_dir=cache_dir,
   1510         resume_download=resume_download,
   1511         proxies=proxies,
   1512         local_files_only=local_files_only,
   1513         use_auth_token=use_auth_token,
   1514         revision=revision,
   1515         allow_patterns=allow_patterns,
   1516         ignore_patterns=ignore_patterns,
   1517         user_agent=user_agent,
   1518     )
   1520     # retrieve pipeline class from local file
   1521     cls_name = cls.load_config(os.path.join(cached_folder, "model_index.json")).get("_class_name", None)

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
    115 if check_use_auth_token:
    116     kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/_snapshot_download.py:235, in snapshot_download(repo_id, revision, repo_type, cache_dir, local_dir, local_dir_use_symlinks, library_name, library_version, user_agent, proxies, etag_timeout, resume_download, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class)
    233         _inner_hf_hub_download(file)
    234 else:
--> 235     thread_map(
    236         _inner_hf_hub_download,
    237         filtered_repo_files,
    238         desc=f"Fetching {len(filtered_repo_files)} files",
    239         max_workers=max_workers,
    240         # User can use its own tqdm class or the default one from `huggingface_hub.utils`
    241         tqdm_class=tqdm_class or hf_tqdm,
    242     )
    244 if local_dir is not None:
    245     return str(os.path.realpath(local_dir))

File /usr/local/lib/python3.10/dist-packages/tqdm/contrib/concurrent.py:69, in thread_map(fn, *iterables, **tqdm_kwargs)
     55 """
     56 Equivalent of `list(map(fn, *iterables))`
     57 driven by `concurrent.futures.ThreadPoolExecutor`.
   (...)
     66     [default: max(32, cpu_count() + 4)].
     67 """
     68 from concurrent.futures import ThreadPoolExecutor
---> 69 return _executor_map(ThreadPoolExecutor, fn, *iterables, **tqdm_kwargs)

File /usr/local/lib/python3.10/dist-packages/tqdm/contrib/concurrent.py:51, in _executor_map(PoolExecutor, fn, *iterables, **tqdm_kwargs)
     47 with ensure_lock(tqdm_class, lock_name=lock_name) as lk:
     48     # share lock in case workers are already using `tqdm`
     49     with PoolExecutor(max_workers=max_workers, initializer=tqdm_class.set_lock,
     50                       initargs=(lk,)) as ex:
---> 51         return list(tqdm_class(ex.map(fn, *iterables, chunksize=chunksize), **kwargs))

File /usr/local/lib/python3.10/dist-packages/tqdm/notebook.py:254, in tqdm_notebook.__iter__(self)
    252 try:
    253     it = super(tqdm_notebook, self).__iter__()
--> 254     for obj in it:
    255         # return super(tqdm...) will not catch exception
    256         yield obj
    257 # NB: except ... [ as ...] breaks IPython async KeyboardInterrupt

File /usr/local/lib/python3.10/dist-packages/tqdm/std.py:1178, in tqdm.__iter__(self)
   1175 time = self._time
   1177 try:
-> 1178     for obj in iterable:
   1179         yield obj
   1180         # Update and possibly print the progressbar.
   1181         # Note: does not call self.update(1) for speed optimisation.

File /usr/lib/python3.10/concurrent/futures/_base.py:621, in Executor.map.<locals>.result_iterator()
    618 while fs:
    619     # Careful not to keep a reference to the popped future
    620     if timeout is None:
--> 621         yield _result_or_cancel(fs.pop())
    622     else:
    623         yield _result_or_cancel(fs.pop(), end_time - time.monotonic())

File /usr/lib/python3.10/concurrent/futures/_base.py:319, in _result_or_cancel(***failed resolving arguments***)
    317 try:
    318     try:
--> 319         return fut.result(timeout)
    320     finally:
    321         fut.cancel()

File /usr/lib/python3.10/concurrent/futures/_base.py:458, in Future.result(self, timeout)
    456     raise CancelledError()
    457 elif self._state == FINISHED:
--> 458     return self.__get_result()
    459 else:
    460     raise TimeoutError()

File /usr/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self)
    401 if self._exception:
    402     try:
--> 403         raise self._exception
    404     finally:
    405         # Break a reference cycle with the exception in self._exception
    406         self = None

File /usr/lib/python3.10/concurrent/futures/thread.py:58, in _WorkItem.run(self)
     55     return
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:
     60     self.future.set_exception(exc)

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/_snapshot_download.py:211, in snapshot_download.<locals>._inner_hf_hub_download(repo_file)
    210 def _inner_hf_hub_download(repo_file: str):
--> 211     return hf_hub_download(
    212         repo_id,
    213         filename=repo_file,
    214         repo_type=repo_type,
    215         revision=commit_hash,
    216         cache_dir=cache_dir,
    217         local_dir=local_dir,
    218         local_dir_use_symlinks=local_dir_use_symlinks,
    219         library_name=library_name,
    220         library_version=library_version,
    221         user_agent=user_agent,
    222         proxies=proxies,
    223         etag_timeout=etag_timeout,
    224         resume_download=resume_download,
    225         force_download=force_download,
    226         token=token,
    227     )

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
    115 if check_use_auth_token:
    116     kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1364, in hf_hub_download(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, local_dir_use_symlinks, user_agent, force_download, force_filename, proxies, etag_timeout, resume_download, token, local_files_only, legacy_cache_layout)
   1361 with temp_file_manager() as temp_file:
   1362     logger.info("downloading %s to %s", url, temp_file.name)
-> 1364     http_get(
   1365         url_to_download,
   1366         temp_file,
   1367         proxies=proxies,
   1368         resume_size=resume_size,
   1369         headers=headers,
   1370         expected_size=expected_size,
   1371     )
   1373 if local_dir is None:
   1374     logger.info(f"Storing {url} in cache at {blob_path}")

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:544, in http_get(url, temp_file, proxies, resume_size, headers, timeout, max_retries, expected_size)
    542     if chunk:  # filter out keep-alive new chunks
    543         progress.update(len(chunk))
--> 544         temp_file.write(chunk)
    546 if expected_size is not None and expected_size != temp_file.tell():
    547     raise EnvironmentError(
    548         f"Consistency check failed: file should be of size {expected_size} but has size"
    549         f" {temp_file.tell()} ({displayed_name}).\nWe are sorry for the inconvenience. Please retry download and"
    550         " pass `force_download=True, resume_download=False` as argument.\nIf the issue persists, please let us"
    551         " know by opening an issue on https://github.com/huggingface/huggingface_hub."
    552     )

File /usr/lib/python3.10/tempfile.py:622, in _TemporaryFileWrapper.__getattr__.<locals>.func_wrapper(*args, **kwargs)
    620 @_functools.wraps(func)
    621 def func_wrapper(*args, **kwargs):
--> 622     return func(*args, **kwargs)

OSError: [Errno 28] No space left on device


### System Info

A100 - 80gb RAM

### Who can help?

@williamberman @patrickvonplaten @sayakpaul 

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingstaleIssues that haven't received updates

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions