@@ -1156,7 +1156,6 @@ namespace {
11561156 * across calls. This reduces overhead from repeated memory allocation and deallocation.
11571157 */
11581158static void weight_format_to_nz (ggml_tensor *tensor, const void *data, size_t offset) {
1159- std::vector<int64_t > weightTransposedShape = {tensor->ne [1 ], tensor->ne [0 ]};
11601159 aclTensor* weightTransposed = ggml_cann_create_tensor (tensor, tensor->ne ,
11611160 tensor->nb , 2 , ACL_FORMAT_ND, offset);
11621161 uint64_t workspaceSize = 0 ;
@@ -1197,11 +1196,11 @@ static void ggml_backend_cann_buffer_set_tensor(
11971196 // Why aclrtSynchronizeDevice?
11981197
11991198 // Only check env once.
1200- static bool wight_to_nz = parse_bool (get_env (" GGML_CANN_WEIGHT_NZ" ).value_or (" " ));
1199+ static bool weight_to_nz = parse_bool (get_env (" GGML_CANN_WEIGHT_NZ" ).value_or (" " ));
12011200 if (!need_transform (tensor->type )) {
12021201 ACL_CHECK (aclrtMemcpy ((char *)tensor->data + offset, size, data, size,
12031202 ACL_MEMCPY_HOST_TO_DEVICE));
1204- if (wight_to_nz && is_matmul_weight ((const ggml_tensor*)tensor)) {
1203+ if (weight_to_nz && is_matmul_weight ((const ggml_tensor*)tensor)) {
12051204 GGML_ASSERT (tensor->ne [2 ] == 1 );
12061205 GGML_ASSERT (tensor->ne [3 ] == 1 );
12071206 weight_format_to_nz (tensor, data, offset);
@@ -1440,7 +1439,7 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(
14401439 int64_t ne0 = tensor->ne [0 ];
14411440
14421441 // Only check env once.
1443- static bool wight_to_nz = parse_bool (get_env (" GGML_CANN_WEIGHT_NZ" ).value_or (" " ));
1442+ static bool weight_to_nz = parse_bool (get_env (" GGML_CANN_WEIGHT_NZ" ).value_or (" " ));
14441443
14451444 // last line must bigger than 32, because every single op deal at
14461445 // least 32 bytes.
@@ -1453,7 +1452,7 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(
14531452 size += ggml_row_size (
14541453 tensor->type , MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
14551454 }
1456- } else if (wight_to_nz && is_matmul_weight ((const ggml_tensor*)tensor)) {
1455+ } else if (weight_to_nz && is_matmul_weight ((const ggml_tensor*)tensor)) {
14571456 // NZ format weight are not support quantized yet.
14581457 // If ND tensor transform to NZ, size may changed.
14591458 int64_t shape[] = {tensor->ne [1 ], tensor->ne [0 ]};
@@ -2091,7 +2090,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(
20912090 (ggml_backend_cann_context*)backend->context ;
20922091
20932092 ggml_cann_set_device (cann_ctx->device );
2094- // release temp buffer create when load model .
2093+ // release temp buffer create by set tensor .
20952094 release_nz_workspace ();
20962095
20972096 for (int i = 0 ; i < cgraph->n_nodes ; i++) {
0 commit comments