Skip to content

Commit dc60ede

Browse files
committed
CANN: Fix the bug build fail on Ascend310P under two cases:
1) Manual specify SOC_TYPE 2) Under some unusual compile environment
1 parent b742013 commit dc60ede

File tree

6 files changed

+54
-8
lines changed

6 files changed

+54
-8
lines changed

ggml/src/ggml-cann/CMakeLists.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@ if(NOT SOC_TYPE)
2222
detect_ascend_soc_type(SOC_VERSION)
2323
set(SOC_TYPE "${SOC_VERSION}")
2424
message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
25-
else()
26-
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
2725
endif()
2826

29-
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND310P.
27+
string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
28+
29+
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P.
3030
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
3131
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
32+
string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
3233

3334
if (CANN_INSTALL_DIR)
3435
# Only Support Linux.

ggml/src/ggml-cann/kernels/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@ ascendc_library(ascendc_kernels STATIC
2525
${SRC_FILES}
2626
)
2727

28-
message(STATUS "CANN: compile ascend kernels witch SOC_VERSION:${SOC_VERSION}.")
28+
message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
2929
ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
3030
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)

ggml/src/ggml-cann/kernels/get_row_q4_0.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
// optimize me. Use template to avoid copy code.
44
using namespace AscendC;
5+
#ifdef ASCEND_310P // 310P not support 4bit get row
6+
extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
7+
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
8+
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
9+
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
10+
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
11+
printf("Ascend310P not support 4bit get row.\n");
12+
}
13+
#else
514

615
#define BUFFER_NUM 2
716

@@ -110,12 +119,9 @@ class GET_ROW_Q4_0 {
110119
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
111120

112121
// TODO: cast more data to speed up.
113-
#ifdef ASCEND_310P
114-
// TODO: 310P support quantification
115-
#else
116122
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
117123
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
118-
#endif
124+
119125
// Only mul need compile by group.
120126
half scale = scale_gm.GetValue(scale_offset);
121127

@@ -194,3 +200,5 @@ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
194200
indices_nb_ub, output_ne_ub, output_nb_ub);
195201
op.calculate();
196202
}
203+
204+
#endif // #ifdef ASCEND_310P

ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
#include "kernel_operator.h"
22

33
using namespace AscendC;
4+
#ifdef ASCEND_310P
5+
extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
6+
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
7+
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
8+
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
9+
printf("Ascend310P not support f16->8bit quantization.\n");
10+
}
11+
#else
412

513
#define BUFFER_NUM 2
614
#define QK8_0 32
@@ -206,3 +214,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
206214
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
207215
op.calculate();
208216
}
217+
218+
#endif // #ifdef ASCEND_310P

ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
#include "kernel_operator.h"
22

33
using namespace AscendC;
4+
#ifdef ASCEND_310P // 310P not support f32->8bit quantization
5+
extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
6+
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
7+
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
8+
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
9+
printf("Ascend310P not support f32->8bit quantization.\n");
10+
}
11+
#else
412

513
#define BUFFER_NUM 2
614
#define QK8_0 32
@@ -204,3 +212,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
204212
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
205213
op.calculate();
206214
}
215+
216+
#endif // #ifdef ASCEND_310P

ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
#include "kernel_operator.h"
22

33
using namespace AscendC;
4+
#ifdef ASCEND_310P // 310P not support float->4bit quantization
5+
extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
6+
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
7+
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
8+
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
9+
printf("Ascend310P not support f32->4bit quantization.\n");
10+
}
11+
12+
extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
13+
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
14+
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
15+
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
16+
printf("Ascend310P not support f16->4bit quantization.\n");
17+
}
18+
#else
419

520
#define BUFFER_NUM 2
621
#define Group_Size 32
@@ -276,3 +291,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
276291
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
277292
op.calculate();
278293
}
294+
295+
#endif // #ifdef ASCEND_310P

0 commit comments

Comments
 (0)