Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4916,7 +4916,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
return
tensor->nb[0] == ggml_type_size(tensor->type) &&
tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@0cc4m do you recall where there is a check for dim3 here at all? Based on the function name it seems like it should only care about dims 0,1.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it should. I'm not 100% sure, but it was maybe related to multiple mul_mat calls or broadcasting. When this was written the mul_mat shader handled only the first two dimensions and was called multiple times to do the other dimensions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I remove the last part of the check, there are some failures in mul_mat tests. Maybe worth looking into, but I think this change is OK for now.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably because it falls back to dequant to fp16 + matmul in a few cases due to the third check.

(tensor->ne[3] == 1 || tensor->nb[3] == tensor->nb[2]*tensor->ne[2]);
}

static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) {
Expand Down Expand Up @@ -10350,10 +10350,6 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
// If there's not enough shared memory for row_ids and the result tile, fallback to CPU
return false;
}
// Check against size of shared memory variable
if (op->src[2]->ne[0] > 4096) {
return false;
}
}
switch (src0_type) {
case GGML_TYPE_F32:
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
void main() {
[[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
const uint i = gl_WorkGroupID.x * 256 + wgy;
if (i >= p.M * p.K / QUANT_K) {
if (i >= p.nel / QUANT_K) {
return;
}

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
void main() {
[[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
const uint i = uint(gl_WorkGroupID.x * 256 + wgy);
if (i >= p.M * p.K / QUANT_K) {
if (i >= p.nel / QUANT_K) {
return;
}

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
void main() {
[[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
const uint ib = gl_WorkGroupID.x * 256 + wgy;
if (ib >= p.M * p.K / QUANT_K) {
if (ib >= p.nel / QUANT_K) {
return;
}

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
void main() {
[[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
const uint ib = gl_WorkGroupID.x * 256 + wgy;
if (ib >= p.M * p.K / QUANT_K) {
if (ib >= p.nel / QUANT_K) {
return;
}

Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
void main() {
[[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
const uint i = gl_WorkGroupID.x * 256 + wgy;
if (i >= p.M * p.K / QUANT_K) {
if (i >= p.nel / QUANT_K) {
return;
}
const uint tid = gl_LocalInvocationID.x;
Expand Down
Loading