Skip to content

Commit 78d7023

Browse files
committed
remove quant weight format to nz
1 parent 1d9d543 commit 78d7023

File tree

2 files changed

+2
-39
lines changed

2 files changed

+2
-39
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1930,37 +1930,14 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
19301930
int64_t output_ne_offset = 0;
19311931
int64_t output_ne[2] = {weight_ne[0], dst->ne[1]};
19321932

1933-
aclTensor* acl_weight_tensor;
1934-
aclTensor* acl_scale_tensor;
1935-
1936-
bool weightToNZ = false;
1937-
#ifdef ASCEND_310P
1938-
weightToNZ = (getenv("GGML_CANN_WEIGHT_NZ") != nullptr);
1939-
#endif
1940-
if (weightToNZ) {
1941-
int64_t acl_weight_stride[] = {weight_ne[1], 1};
1942-
std::vector<int64_t> storageDims = {weight_ne[0], weight_ne[1]};
1943-
acl_weight_tensor = aclCreateTensor(
1944-
weight_ne, 2, ggml_cann_type_mapping(type), acl_weight_stride,
1945-
weight_ne_offset / ggml_element_size(src0), ACL_FORMAT_FRACTAL_NZ, storageDims.data(), 2,
1946-
src0->data);
1947-
1948-
int64_t acl_scale_stride[] = {scale_ne[1], 1};
1949-
std::vector<int64_t> scaleStorageDims = {scale_ne[0], scale_ne[1]};
1950-
acl_scale_tensor = aclCreateTensor(
1951-
scale_ne, 2, ACL_FLOAT16, acl_scale_stride,
1952-
scale_ne_offset, ACL_FORMAT_ND, scaleStorageDims.data(), 2,
1953-
scale_offset + batch0 * scale_stride);
1954-
} else {
1955-
acl_weight_tensor = ggml_cann_create_tensor(
1933+
aclTensor* acl_weight_tensor = ggml_cann_create_tensor(
19561934
(char*)src0->data + batch0 * weight_stride,
19571935
ggml_cann_type_mapping(type), weight_elem_size, weight_ne,
19581936
weight_nb, 2, ACL_FORMAT_ND, weight_ne_offset);
1959-
acl_scale_tensor = ggml_cann_create_tensor(
1937+
aclTensor* acl_scale_tensor = ggml_cann_create_tensor(
19601938
scale_offset + batch0 * scale_stride, ACL_FLOAT16,
19611939
scale_elem_size, scale_ne, scale_nb, 2, ACL_FORMAT_ND,
19621940
scale_ne_offset);
1963-
}
19641941
aclTensor* acl_output_tensor = ggml_cann_create_tensor(
19651942
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
19661943
output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,8 +1158,6 @@ static int CreateAclTensorWeight(const void *hostData, const std::vector<int64_t
11581158
strides[i] = shape[i + 1] * strides[i + 1];
11591159
}
11601160

1161-
// std::vector<int64_t> storageShape;
1162-
// storageShape.push_back(size);
11631161
*tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND,
11641162
shape.data(), shape.size(), *deviceAddr);
11651163
return 0;
@@ -1169,13 +1167,9 @@ static void weight_format_to_nz(ggml_tensor *tensor, const void *data, size_t of
11691167
aclrtStream stream;
11701168
ACL_CHECK(aclrtCreateStream(&stream));
11711169

1172-
std::vector<int64_t> weightShape = {tensor->ne[0], tensor->ne[1]};
11731170
std::vector<int64_t> weightTransposedShape = {tensor->ne[1], tensor->ne[0]};
1174-
void *weightDeviceAddr = nullptr;
11751171
void *weightTransposedDeviceAddr = nullptr;
1176-
aclTensor *weight = nullptr;
11771172
aclTensor *weightTransposed = nullptr;
1178-
CreateAclTensorWeight(data, weightShape, &weightDeviceAddr, ggml_cann_type_mapping(tensor->type), &weight);
11791173
CreateAclTensorWeight(data, weightTransposedShape, &weightTransposedDeviceAddr,
11801174
ggml_cann_type_mapping(tensor->type), &weightTransposed);
11811175

@@ -1196,13 +1190,8 @@ static void weight_format_to_nz(ggml_tensor *tensor, const void *data, size_t of
11961190

11971191
aclrtMemcpy((char *)tensor->data + offset, size,
11981192
weightTransposedDeviceAddr, size, ACL_MEMCPY_HOST_TO_DEVICE);
1199-
ACL_CHECK(aclDestroyTensor(weight));
12001193
ACL_CHECK(aclDestroyTensor(weightTransposed));
1201-
aclrtFree(weightDeviceAddr);
12021194
aclrtFree(weightTransposedDeviceAddr);
1203-
if (workspaceSize > 0) {
1204-
aclrtFree(workspaceAddr);
1205-
}
12061195
}
12071196

12081197
// TODO: need handle tensor which has paddings.
@@ -1246,9 +1235,6 @@ static void ggml_backend_cann_buffer_set_tensor(
12461235
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
12471236
transform_buffer, size,
12481237
ACL_MEMCPY_HOST_TO_DEVICE));
1249-
if (weightToNZ && is_matmul_weight((const ggml_tensor*)tensor)) {
1250-
weight_format_to_nz(tensor, transform_buffer, offset);
1251-
}
12521238
free(transform_buffer);
12531239
}
12541240
}

0 commit comments

Comments
 (0)