-
Notifications
You must be signed in to change notification settings - Fork 56
Open
Description
I have made a little progress implementing this extension in SD Forge. I had opened an issue on IceClear/StableSR/issues/157 but this is probably the better place.
In Forge, modules/sd_hijack_optimizations.py is completely commented out, which is why we get the error:
*** Error loading script: stablesr.py
Traceback (most recent call last):
File "/path-to/stable-diffusion-webui-forge/modules/scripts.py", line 525, in load_scripts
script_module = script_loading.load_module(scriptfile.path)
File "/path-tostable-diffusion-webui-forge/modules/script_loading.py", line 13, in load_module
module_spec.loader.exec_module(module)
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/path-to/stable-diffusion-webui-forge/extensions/sd-webui-stablesr/scripts/stablesr.py", line 55, in <module>
from srmodule.struct_cond import EncoderUNetModelWT, build_unetwt
File "/path-to/stable-diffusion-webui-forge/extensions/sd-webui-stablesr/srmodule/struct_cond.py", line 20, in <module>
from srmodule.attn import get_attn_func
File "/path-to/stable-diffusion-webui-forge/extensions/sd-webui-stablesr/srmodule/attn.py", line 8, in <module>
from modules.sd_hijack_optimizations import get_available_vram, get_xformers_flash_attention_op, sub_quad_attention
ImportError: cannot import name 'get_available_vram' from 'modules.sd_hijack_optimizations' (/path-to/stable-diffusion-webui-forge/modules/sd_hijack_optimizations.py)So I uncommented the following in modules/sd_hijiack_optimizations.py:
# from __future__ import annotations
# import math
+ import psutil
+ import platform
#
+ import torch
# from torch import einsum
#
# from ldm.util import default
# from einops import rearrange
#
- from modules import shared, errors, devices, sub_quadratic_attention
+ from modules import shared, errors, devices
+ from backend.misc import sub_quadratic_attention
...
+ def get_available_vram():
+ if shared.device.type == 'cuda':
+ stats = torch.cuda.memory_stats(shared.device)
+ mem_active = stats['active_bytes.all.current']
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
+ mem_free_torch = mem_reserved - mem_active
+ mem_free_total = mem_free_cuda + mem_free_torch
+ return mem_free_total
+ else:
+ return psutil.virtual_memory().available
...
+ def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_size_min=None, chunk_threshold=None, use_checkpoint=True):
+ bytes_per_token = torch.finfo(q.dtype).bits//8
+ batch_x_heads, q_tokens, _ = q.shape
+ _, k_tokens, _ = k.shape
+ qk_matmul_size_bytes = batch_x_heads * bytes_per_token * q_tokens * k_tokens
+
+ if chunk_threshold is None:
+ if q.device.type == 'mps':
+ chunk_threshold_bytes = 268435456 * (2 if platform.processor() == 'i386' else bytes_per_token)
+ else:
+ chunk_threshold_bytes = int(get_available_vram() * 0.7)
+ elif chunk_threshold == 0:
+ chunk_threshold_bytes = None
+ else:
+ chunk_threshold_bytes = int(0.01 * chunk_threshold * get_available_vram())
+
+ if kv_chunk_size_min is None and chunk_threshold_bytes is not None:
+ kv_chunk_size_min = chunk_threshold_bytes // (batch_x_heads * bytes_per_token * (k.shape[2] + v.shape[2]))
+ elif kv_chunk_size_min == 0:
+ kv_chunk_size_min = None
+
+ if chunk_threshold_bytes is not None and qk_matmul_size_bytes <= chunk_threshold_bytes:
+ # the big matmul fits into our memory limit; do everything in 1 chunk,
+ # i.e. send it down the unchunked fast-path
+ kv_chunk_size = k_tokens
+
+ with devices.without_autocast(disable=q.dtype == v.dtype):
+ return sub_quadratic_attention.efficient_dot_product_attention(
+ q,
+ k,
+ v,
+ query_chunk_size=q_chunk_size,
+ kv_chunk_size=kv_chunk_size,
+ kv_chunk_size_min = kv_chunk_size_min,
+ use_checkpoint=use_checkpoint,
+ )
+
+
+ def get_xformers_flash_attention_op(q, k, v):
+ if not shared.cmd_opts.xformers_flash_attention:
+ return None
+
+ try:
+ flash_attention_op = xformers.ops.MemoryEfficientAttentionFlashAttentionOp
+ fw, bw = flash_attention_op
+ if fw.supports(xformers.ops.fmha.Inputs(query=q, key=k, value=v, attn_bias=None)):
+ return flash_attention_op
+ except Exception as e:
+ errors.display_once(e, "enabling flash attention")
+
+ return NoneAlso, sub_quadratic_attention is no longer under modules but under backend/misc, so I changed the import for that.
Now the extension gets loaded without any errors. However, when I attempt to run the script in the img2img tab, I get the following error:
Traceback (most recent call last):
File "/path-to/stable-diffusion-webui-forge/modules_forge/main_thread.py", line 30, in work
self.result = self.func(*self.args, **self.kwargs)
File "/path-to/stable-diffusion-webui-forge/modules/img2img.py", line 246, in img2img_function
processed = modules.scripts.scripts_img2img.run(p, *args)
File "/path-to/stable-diffusion-webui-forge/modules/scripts.py", line 792, in run
processed = script.run(p, *script_args)
File "/path-to/stable-diffusion-webui-forge/extensions/sd-webui-stablesr/scripts/stablesr.py", line 205, in run
first_param = shared.sd_model.parameters().__next__()
AttributeError: 'FakeInitialModel' object has no attribute 'parameters'
'FakeInitialModel' object has no attribute 'parameters'I'm guessing this last error probably has something to do with this discussion: lllyasviel/stable-diffusion-webui-forge/discussions/964 ?
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels