@@ -1155,7 +1155,7 @@ namespace {
1155
1155
* @note The workspace buffer used in this function is managed globally and reused
1156
1156
* across calls. This reduces overhead from repeated memory allocation and deallocation.
1157
1157
*/
1158
- static void weight_format_to_nz (ggml_tensor *tensor, const void *data, size_t offset) {
1158
+ static void weight_format_to_nz (ggml_tensor *tensor, size_t offset) {
1159
1159
aclTensor* weightTransposed = ggml_cann_create_tensor (tensor, tensor->ne ,
1160
1160
tensor->nb , 2 , ACL_FORMAT_ND, offset);
1161
1161
uint64_t workspaceSize = 0 ;
@@ -1203,7 +1203,7 @@ static void ggml_backend_cann_buffer_set_tensor(
1203
1203
if (weight_to_nz && is_matmul_weight ((const ggml_tensor*)tensor)) {
1204
1204
GGML_ASSERT (tensor->ne [2 ] == 1 );
1205
1205
GGML_ASSERT (tensor->ne [3 ] == 1 );
1206
- weight_format_to_nz (tensor, data, offset);
1206
+ weight_format_to_nz (tensor, offset);
1207
1207
}
1208
1208
} else {
1209
1209
void *transform_buffer = malloc (size);
@@ -2491,7 +2491,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
2491
2491
return true ;
2492
2492
case GGML_OP_SCALE:
2493
2493
float bias;
2494
- memcpy (&bias, (float *) op->op_params + 1 , sizeof (float ));
2494
+ memcpy (&bias, (const float *)( op->op_params ) + 1 , sizeof (float ));
2495
2495
return bias == 0 .0f ; // TODO: support bias != 0.0f
2496
2496
case GGML_OP_SOFT_MAX:
2497
2497
// TODO: support attention sinks [TAG_ATTN_SINKS]
@@ -2534,7 +2534,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
2534
2534
return false ;
2535
2535
}
2536
2536
float logitSoftcap = 0 .0f ;
2537
- memcpy (&logitSoftcap, ( float *) op->op_params + 2 , sizeof (float ));
2537
+ memcpy (&logitSoftcap, ( const float *)( op->op_params ) + 2 , sizeof (float ));
2538
2538
if (logitSoftcap != 0 .0f ) {
2539
2539
return false ;
2540
2540
}
0 commit comments