Skip to content

Commit 157d59f

Browse files
committed
make the conditions for converting weights to NZ format consistent
1 parent 3d0d03a commit 157d59f

File tree

3 files changed

+33
-22
lines changed

3 files changed

+33
-22
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
17911791
#ifdef ASCEND_310P
17921792
weightToNZ = (getenv("GGML_CANN_WEIGHT_NZ") != nullptr);
17931793
#endif
1794-
if (weightToNZ && n_dims == 2) {
1794+
if (weightToNZ && is_matmul_weight(weight)) {
17951795
int64_t acl_stride[2] = {1, transpose_ne[1]};
17961796

17971797
// Reverse ne.

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#ifndef CANN_ACLNN_OPS
2424
#define CANN_ACLNN_OPS
2525

26+
#include <unordered_set>
2627
#include <functional>
2728
#include <aclnnop/aclnn_abs.h>
2829
#include <aclnnop/aclnn_neg.h>
@@ -1020,6 +1021,37 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
10201021
*/
10211022
void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
10221023

1024+
/**
1025+
* @brief Check whether a tensor is a weight tensor for matrix multiplication.
1026+
*
1027+
* @details Checks whether the given tensor serves as weight parameters in matrix multiplication operations,
1028+
* typically within neural network layers. The function maintains a static set of canonical weight
1029+
* naming suffixes from Transformer-based architectures. Uses substring matching to identify weight
1030+
* tensors even with hierarchical naming patterns.
1031+
*
1032+
* @param tensor Pointer to the target ggml_tensor object (const-qualified).
1033+
*/
1034+
static bool is_matmul_weight(const ggml_tensor* tensor) {
1035+
std::string name = ggml_get_name(tensor);
1036+
static const std::unordered_set<std::string> weight_suffixes{
1037+
"output.weight",
1038+
"attn_q.weight",
1039+
"attn_k.weight",
1040+
"attn_v.weight",
1041+
"attn_output.weight",
1042+
"ffn_gate.weight",
1043+
"ffn_up.weight",
1044+
"ffn_down.weight"
1045+
};
1046+
1047+
for (const auto& suffix : weight_suffixes) {
1048+
if (name.find(suffix) != std::string::npos) {
1049+
return true;
1050+
}
1051+
}
1052+
return false;
1053+
}
1054+
10231055
/**
10241056
* @brief Applies a element-wise operation to two input tensors using the CANN
10251057
* backend.

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,27 +1116,6 @@ static enum ggml_status ggml_backend_cann_buffer_init_tensor(
11161116
return GGML_STATUS_SUCCESS;
11171117
}
11181118

1119-
static bool is_matmul_weight(const ggml_tensor* tensor) {
1120-
std::string name = ggml_get_name(tensor);
1121-
static const std::unordered_set<std::string> weight_suffixes{
1122-
"output.weight",
1123-
"attn_q.weight",
1124-
"attn_k.weight",
1125-
"attn_v.weight",
1126-
"attn_output.weight",
1127-
"ffn_gate.weight",
1128-
"ffn_up.weight",
1129-
"ffn_down.weight"
1130-
};
1131-
1132-
for (const auto& suffix : weight_suffixes) {
1133-
if (name.find(suffix) != std::string::npos) {
1134-
return true;
1135-
}
1136-
}
1137-
return false;
1138-
}
1139-
11401119
static int CreateAclTensorWeight(const void *hostData, const std::vector<int64_t> &shape, void **deviceAddr,
11411120
aclDataType dataType, aclTensor **tensor)
11421121
{

0 commit comments

Comments
 (0)