@@ -1155,7 +1155,7 @@ namespace {
11551155 * @note The workspace buffer used in this function is managed globally and reused
11561156 * across calls. This reduces overhead from repeated memory allocation and deallocation.
11571157 */
1158- static void weight_format_to_nz (ggml_tensor *tensor, const void *data, size_t offset) {
1158+ static void weight_format_to_nz (ggml_tensor *tensor, size_t offset) {
11591159 aclTensor* weightTransposed = ggml_cann_create_tensor (tensor, tensor->ne ,
11601160 tensor->nb , 2 , ACL_FORMAT_ND, offset);
11611161 uint64_t workspaceSize = 0 ;
@@ -1203,7 +1203,7 @@ static void ggml_backend_cann_buffer_set_tensor(
12031203 if (weight_to_nz && is_matmul_weight ((const ggml_tensor*)tensor)) {
12041204 GGML_ASSERT (tensor->ne [2 ] == 1 );
12051205 GGML_ASSERT (tensor->ne [3 ] == 1 );
1206- weight_format_to_nz (tensor, data, offset);
1206+ weight_format_to_nz (tensor, offset);
12071207 }
12081208 } else {
12091209 void *transform_buffer = malloc (size);
@@ -2491,7 +2491,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
24912491 return true ;
24922492 case GGML_OP_SCALE:
24932493 float bias;
2494- memcpy (&bias, (float *) op->op_params + 1 , sizeof (float ));
2494+ memcpy (&bias, (const float *)( op->op_params ) + 1 , sizeof (float ));
24952495 return bias == 0 .0f ; // TODO: support bias != 0.0f
24962496 case GGML_OP_SOFT_MAX:
24972497 // TODO: support attention sinks [TAG_ATTN_SINKS]
@@ -2534,7 +2534,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
25342534 return false ;
25352535 }
25362536 float logitSoftcap = 0 .0f ;
2537- memcpy (&logitSoftcap, ( float *) op->op_params + 2 , sizeof (float ));
2537+ memcpy (&logitSoftcap, ( const float *)( op->op_params ) + 2 , sizeof (float ));
25382538 if (logitSoftcap != 0 .0f ) {
25392539 return false ;
25402540 }
0 commit comments