Skip to content

Commit a2feaa9

Browse files
Merge pull request #5194 from brkirch/autocast-and-mps-randn-fixes
Use devices.autocast() and fix MPS randn issues
2 parents c7af672 + 0fddb4a commit a2feaa9

File tree

8 files changed

+29
-31
lines changed

8 files changed

+29
-31
lines changed

modules/devices.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,15 @@ def enable_tf32():
6666

6767

6868
def randn(seed, shape):
69-
# Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
70-
if device.type == 'mps':
71-
generator = torch.Generator(device=cpu)
72-
generator.manual_seed(seed)
73-
noise = torch.randn(shape, generator=generator, device=cpu).to(device)
74-
return noise
75-
7669
torch.manual_seed(seed)
70+
if device.type == 'mps':
71+
return torch.randn(shape, device=cpu).to(device)
7772
return torch.randn(shape, device=device)
7873

7974

8075
def randn_without_seed(shape):
81-
# Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
8276
if device.type == 'mps':
83-
generator = torch.Generator(device=cpu)
84-
noise = torch.randn(shape, generator=generator, device=cpu).to(device)
85-
return noise
86-
77+
return torch.randn(shape, device=cpu).to(device)
8778
return torch.randn(shape, device=device)
8879

8980

modules/hypernetworks/hypernetwork.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
495495
if shared.state.interrupted:
496496
break
497497

498-
with torch.autocast("cuda"):
498+
with devices.autocast():
499499
x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
500500
if tag_drop_out != 0 or shuffle_tags:
501501
shared.sd_model.cond_stage_model.to(devices.device)

modules/interrogate.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ def interrogate(self, pil_image):
148148

149149
clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
150150

151-
precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext
152-
with torch.no_grad(), precision_scope("cuda"):
151+
with torch.no_grad(), devices.autocast():
153152
image_features = self.clip_model.encode_image(clip_image).type(self.dtype)
154153

155154
image_features /= image_features.norm(dim=-1, keepdim=True)

modules/sd_hijack.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,7 @@ def register_buffer(self, name, attr):
183183

184184
if type(attr) == torch.Tensor:
185185
if attr.device != devices.device:
186-
187-
if devices.has_mps():
188-
attr = attr.to(device="mps", dtype=torch.float32)
189-
else:
190-
attr = attr.to(devices.device)
186+
attr = attr.to(device=devices.device, dtype=(torch.float32 if devices.device.type == 'mps' else None))
191187

192188
setattr(self, name, attr)
193189

modules/sd_samplers.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from PIL import Image
77
import inspect
88
import k_diffusion.sampling
9+
import torchsde._brownian.brownian_interval
910
import ldm.models.diffusion.ddim
1011
import ldm.models.diffusion.plms
1112
from modules import prompt_parser, devices, processing, images
@@ -364,7 +365,23 @@ def randn_like(self, x):
364365
if noise.shape == x.shape:
365366
return noise
366367

367-
return torch.randn_like(x)
368+
if x.device.type == 'mps':
369+
return torch.randn_like(x, device=devices.cpu).to(x.device)
370+
else:
371+
return torch.randn_like(x)
372+
373+
374+
# MPS fix for randn in torchsde
375+
def torchsde_randn(size, dtype, device, seed):
376+
if device.type == 'mps':
377+
generator = torch.Generator(devices.cpu).manual_seed(int(seed))
378+
return torch.randn(size, dtype=dtype, device=devices.cpu, generator=generator).to(device)
379+
else:
380+
generator = torch.Generator(device).manual_seed(int(seed))
381+
return torch.randn(size, dtype=dtype, device=device, generator=generator)
382+
383+
384+
torchsde._brownian.brownian_interval._randn = torchsde_randn
368385

369386

370387
class KDiffusionSampler:
@@ -415,8 +432,7 @@ def initialize(self, p):
415432
self.model_wrap.step = 0
416433
self.eta = p.eta or opts.eta_ancestral
417434

418-
if self.sampler_noises is not None:
419-
k_diffusion.sampling.torch = TorchHijack(self.sampler_noises)
435+
k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else [])
420436

421437
extra_params_kwargs = {}
422438
for param_name in self.extra_params:

modules/swinir_model.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@
1313
from modules.swinir_model_arch_v2 import Swin2SR as net2
1414
from modules.upscaler import Upscaler, UpscalerData
1515

16-
precision_scope = (
17-
torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext
18-
)
19-
2016

2117
class UpscalerSwinIR(Upscaler):
2218
def __init__(self, dirname):
@@ -112,7 +108,7 @@ def upscale(
112108
img = np.moveaxis(img, 2, 0) / 255
113109
img = torch.from_numpy(img).float()
114110
img = img.unsqueeze(0).to(devices.device_swinir)
115-
with torch.no_grad(), precision_scope("cuda"):
111+
with torch.no_grad(), devices.autocast():
116112
_, _, h_old, w_old = img.size()
117113
h_pad = (h_old // window_size + 1) * window_size - h_old
118114
w_pad = (w_old // window_size + 1) * window_size - w_old

modules/textual_inversion/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_to
8282
torchdata = torch.from_numpy(npimage).permute(2, 0, 1).to(device=device, dtype=torch.float32)
8383
latent_sample = None
8484

85-
with torch.autocast("cuda"):
85+
with devices.autocast():
8686
latent_dist = model.encode_first_stage(torchdata.unsqueeze(dim=0))
8787

8888
if latent_sampling_method == "once" or (latent_sampling_method == "deterministic" and not isinstance(latent_dist, DiagonalGaussianDistribution)):
@@ -101,7 +101,7 @@ def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_to
101101
entry.cond_text = self.create_text(filename_text)
102102

103103
if include_cond and not (self.tag_drop_out != 0 or self.shuffle_tags):
104-
with torch.autocast("cuda"):
104+
with devices.autocast():
105105
entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)
106106

107107
self.dataset.append(entry)

modules/textual_inversion/textual_inversion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
316316
if shared.state.interrupted:
317317
break
318318

319-
with torch.autocast("cuda"):
319+
with devices.autocast():
320320
# c = stack_conds(batch.cond).to(devices.device)
321321
# mask = torch.tensor(batch.emb_index).to(devices.device, non_blocking=pin_memory)
322322
# print(mask)

0 commit comments

Comments
 (0)