Skip to content

Commit 6ab0d64

Browse files
authored
vulkan: enable mmvq for q2_k on NVIDIA (#17675)
1 parent 93bb926 commit 6ab0d64

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_
69486948
// Quantization overhead is not worth it for small k
69496949
switch (device->vendor_id) {
69506950
case VK_VENDOR_ID_NVIDIA:
6951+
if (src0_type == GGML_TYPE_Q2_K) {
6952+
return true;
6953+
}
6954+
69516955
if (k <= 4096) {
69526956
return false;
69536957
}

0 commit comments

Comments
 (0)