Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ggml/src/ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2790,10 +2790,15 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,
output_ne_offset);

int64_t antiquantGroupSize = 0;
if (src0->ne[0] > QK8_0) {
antiquantGroupSize = QK8_0;
}

ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
acl_input_tensor, acl_weight_tensor, acl_scale_tensor, nullptr,
nullptr, nullptr, nullptr, QK8_0, acl_output_tensor,
nullptr, nullptr, nullptr, antiquantGroupSize, acl_output_tensor,
&workspaceSize, &executor));
if (workspaceAddr == nullptr) {
workspaceAddr = workspace_allocator.alloc(workspaceSize);
Expand Down Expand Up @@ -2833,7 +2838,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,

ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
acl_input_tensor, acl_weight_tensor, acl_scale_tensor,
nullptr, nullptr, nullptr, nullptr, QK8_0,
nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
acl_output_tensor, &workspaceSize, &executor));
ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
workspaceAddr, workspaceSize, executor, ctx.stream()));
Expand Down
5 changes: 0 additions & 5 deletions ggml/src/ggml-cann/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1689,11 +1689,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
case GGML_OP_MUL_MAT: {
switch (op->src[0]->type) {
case GGML_TYPE_Q8_0:
// Current groupsize should not be greater than k-1 in
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
if (op->src[0]->ne[0] <= QK8_0) {
return false;
}
case GGML_TYPE_F16:
case GGML_TYPE_F32:
case GGML_TYPE_Q4_0:
Expand Down
Loading