Skip to content

Commit 0123112

Browse files
authored
Merge branch 'main' into aobaseconfig
2 parents fd22426 + 4067d6c commit 0123112

File tree

2 files changed

+120
-3
lines changed

2 files changed

+120
-3
lines changed

src/diffusers/quantizers/gguf/utils.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,64 @@ def dequantize_blocks_BF16(blocks, block_size, type_size, dtype=None):
429429
return (blocks.view(torch.int16).to(torch.int32) << 16).view(torch.float32)
430430

431431

432+
# this part from calcuis (gguf.org)
433+
# more info: https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/quant2c.py
434+
435+
436+
def dequantize_blocks_IQ4_NL(blocks, block_size, type_size, dtype=None):
437+
kvalues = torch.tensor(
438+
[-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113],
439+
dtype=torch.float32,
440+
device=blocks.device,
441+
)
442+
n_blocks = blocks.shape[0]
443+
d, qs = split_block_dims(blocks, 2)
444+
d = d.view(torch.float16).to(dtype)
445+
qs = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor(
446+
[0, 4], device=blocks.device, dtype=torch.uint8
447+
).reshape((1, 1, 2, 1))
448+
qs = (qs & 15).reshape((n_blocks, -1)).to(torch.int64)
449+
kvalues = kvalues.view(1, 1, 16)
450+
qs = qs.unsqueeze(-1)
451+
qs = torch.gather(kvalues.expand(qs.shape[0], qs.shape[1], 16), 2, qs)
452+
qs = qs.squeeze(-1).to(dtype)
453+
return d * qs
454+
455+
456+
def dequantize_blocks_IQ4_XS(blocks, block_size, type_size, dtype=None):
457+
kvalues = torch.tensor(
458+
[-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113],
459+
dtype=torch.float32,
460+
device=blocks.device,
461+
)
462+
n_blocks = blocks.shape[0]
463+
d, scales_h, scales_l, qs = split_block_dims(blocks, 2, 2, QK_K // 64)
464+
d = d.view(torch.float16).to(dtype)
465+
scales_h = scales_h.view(torch.int16)
466+
scales_l = scales_l.reshape((n_blocks, -1, 1)) >> torch.tensor(
467+
[0, 4], device=blocks.device, dtype=torch.uint8
468+
).reshape((1, 1, 2))
469+
scales_h = scales_h.reshape((n_blocks, 1, -1)) >> torch.tensor(
470+
[2 * i for i in range(QK_K // 32)], device=blocks.device, dtype=torch.uint8
471+
).reshape((1, -1, 1))
472+
scales_l = scales_l.reshape((n_blocks, -1)) & 0x0F
473+
scales_h = scales_h.reshape((n_blocks, -1)) & 0x03
474+
scales = (scales_l | (scales_h << 4)) - 32
475+
dl = (d * scales.to(dtype)).reshape((n_blocks, -1, 1))
476+
shifts_q = torch.tensor([0, 4], device=blocks.device, dtype=torch.uint8).reshape(1, 1, 2, 1)
477+
qs = qs.reshape((n_blocks, -1, 1, 16)) >> shifts_q
478+
qs = (qs & 15).reshape((n_blocks, -1, 32)).to(torch.int64)
479+
kvalues = kvalues.view(1, 1, 1, 16)
480+
qs = qs.unsqueeze(-1)
481+
qs = torch.gather(kvalues.expand(qs.shape[0], qs.shape[1], qs.shape[2], 16), 3, qs)
482+
qs = qs.squeeze(-1).to(dtype)
483+
return (dl * qs).reshape(n_blocks, -1)
484+
485+
432486
GGML_QUANT_SIZES = gguf.GGML_QUANT_SIZES
433487
dequantize_functions = {
488+
gguf.GGMLQuantizationType.IQ4_NL: dequantize_blocks_IQ4_NL,
489+
gguf.GGMLQuantizationType.IQ4_XS: dequantize_blocks_IQ4_XS,
434490
gguf.GGMLQuantizationType.BF16: dequantize_blocks_BF16,
435491
gguf.GGMLQuantizationType.Q8_0: dequantize_blocks_Q8_0,
436492
gguf.GGMLQuantizationType.Q5_1: dequantize_blocks_Q5_1,

tests/pipelines/marigold/test_marigold_intrinsics.py

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
)
3535

3636
from ...testing_utils import (
37+
Expectations,
3738
backend_empty_cache,
3839
enable_full_determinism,
3940
floats_tensor,
@@ -416,7 +417,7 @@ def _test_marigold_intrinsics(
416417
expected_slice: np.ndarray = None,
417418
model_id: str = "prs-eth/marigold-iid-appearance-v1-1",
418419
image_url: str = "https://marigoldmonodepth.github.io/images/einstein.jpg",
419-
atol: float = 1e-4,
420+
atol: float = 1e-3,
420421
**pipe_kwargs,
421422
):
422423
from_pretrained_kwargs = {}
@@ -531,11 +532,41 @@ def test_marigold_intrinsics_einstein_f16_accelerator_G0_S1_P512_E1_B1_M1(self):
531532
)
532533

533534
def test_marigold_intrinsics_einstein_f16_accelerator_G0_S1_P768_E3_B1_M1(self):
535+
expected_slices = Expectations(
536+
{
537+
("xpu", 3): np.array(
538+
[
539+
0.62655,
540+
0.62477,
541+
0.62161,
542+
0.62452,
543+
0.62454,
544+
0.62454,
545+
0.62255,
546+
0.62647,
547+
0.63379,
548+
]
549+
),
550+
("cuda", 7): np.array(
551+
[
552+
0.61572,
553+
0.1377,
554+
0.61182,
555+
0.61426,
556+
0.61377,
557+
0.61426,
558+
0.61279,
559+
0.61572,
560+
0.62354,
561+
]
562+
),
563+
}
564+
)
534565
self._test_marigold_intrinsics(
535566
is_fp16=True,
536567
device=torch_device,
537568
generator_seed=0,
538-
expected_slice=np.array([0.61572, 0.61377, 0.61182, 0.61426, 0.61377, 0.61426, 0.61279, 0.61572, 0.62354]),
569+
expected_slice=expected_slices.get_expectation(),
539570
num_inference_steps=1,
540571
processing_resolution=768,
541572
ensemble_size=3,
@@ -545,11 +576,41 @@ def test_marigold_intrinsics_einstein_f16_accelerator_G0_S1_P768_E3_B1_M1(self):
545576
)
546577

547578
def test_marigold_intrinsics_einstein_f16_accelerator_G0_S1_P768_E4_B2_M1(self):
579+
expected_slices = Expectations(
580+
{
581+
("xpu", 3): np.array(
582+
[
583+
0.62988,
584+
0.62792,
585+
0.62548,
586+
0.62841,
587+
0.62792,
588+
0.62792,
589+
0.62646,
590+
0.62939,
591+
0.63721,
592+
]
593+
),
594+
("cuda", 7): np.array(
595+
[
596+
0.61914,
597+
0.6167,
598+
0.61475,
599+
0.61719,
600+
0.61719,
601+
0.61768,
602+
0.61572,
603+
0.61914,
604+
0.62695,
605+
]
606+
),
607+
}
608+
)
548609
self._test_marigold_intrinsics(
549610
is_fp16=True,
550611
device=torch_device,
551612
generator_seed=0,
552-
expected_slice=np.array([0.61914, 0.6167, 0.61475, 0.61719, 0.61719, 0.61768, 0.61572, 0.61914, 0.62695]),
613+
expected_slice=expected_slices.get_expectation(),
553614
num_inference_steps=1,
554615
processing_resolution=768,
555616
ensemble_size=4,

0 commit comments

Comments
 (0)