Skip to content

Commit 858ed79

Browse files
author
zhouwg
committed
ggml-qnn: add build script for libggmlop_skel.so
1 parent e3b4df2 commit 858ed79

File tree

4 files changed

+51
-19
lines changed

4 files changed

+51
-19
lines changed

ggml/src/ggml-qnn/kernels/Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
2+
3+
TARGET=libggmlop_skel.so
4+
5+
INCS=-I${HEXAGON_SDK_PATH}/incs -I${HEXAGON_SDK_PATH}/libs/qprintf/inc -I${HEXAGON_SDK_PATH}/incs/stddef -I${HEXAGON_SDK_PATH}/ipc/fastrpc/incs -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rpcmem/inc -I${HEXAGON_SDK_PATH}/utils/examples -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rtld/ship/inc -I${HEXAGON_SDK_PATH}/libs/atomic/inc -I${HEXAGON_SDK_PATH}/utils/sim_utils/inc
6+
7+
CFLAGS=-mv75 -c -Ofast -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fdata-sections -fpic -D__V_DYNAMIC__ -mhvx -mhvx-length=128B ${INCS}
8+
9+
LDFLAGS=-mv75 -Wl,--defsym=ISDB_TRUSTED_FLAG=2 -Wl,--defsym=ISDB_SECURE_FLAG=2 -Wl,--no-threads -fpic -shared -Wl,-Bsymbolic -Wl,--wrap=malloc -Wl,--wrap=calloc -Wl,--wrap=free -Wl,--wrap=realloc -Wl,--wrap=memalign -lc -Wl,-soname=${TARGET}
10+
11+
SRCS = ggmlop_cdsp.c ggmlop_cdsp_skel.c
12+
OBJS = $(patsubst %.c, %.o, $(SRCS))
13+
14+
ALL:$(OBJS)
15+
${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang ${LDFLAGS} -o ${TARGET} -Wl,--start-group ${OBJS} -Wl,--end-group
16+
@ls -l ${TARGET}
17+
18+
%.o:%.c
19+
@echo "${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang ${CFLAGS} -D__FILENAME__=\"$<\" -o $@ -c $< "
20+
${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang ${CFLAGS} -D__FILENAME__=\"$<\" -o $@ -c $<
21+
@echo "\n"
22+
23+
clean:
24+
rm -f *.o

ggml/src/ggml-qnn/kernels/ggmlop_cdsp.c

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -349,27 +349,23 @@ static void ggml_compute_forward_add_f32(
349349
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
350350
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
351351

352-
const int ith = 0;
353-
const int nth = 1;
354-
355352
const int nr = ggml_nrows(src0);
356353

357354
GGML_TENSOR_BINARY_OP_LOCALS
358355

359356
GGML_ASSERT( nb0 == sizeof(float));
360357
GGML_ASSERT(nb00 == sizeof(float));
361358

362-
// rows per thread
363-
const int dr = (nr + nth - 1)/nth;
359+
const int dr = nr;
364360

365361
// row range for this thread
366-
const int ir0 = dr*ith;
362+
const int ir0 = 0;
367363
const int ir1 = MIN(ir0 + dr, nr);
368364

369365
ggml_dump_tensor(src0);
370366
ggml_dump_tensor(src1);
371367

372-
#if 1 //naive algorithm, can works with llama-cli
368+
#if 1 //naive algorithm for fp32, can works with llama-cli
373369
float * a = (float*)src0->data;
374370
float * b = (float*)src1->data;
375371
float * c = (float*)dst->data;
@@ -473,9 +469,6 @@ static void ggml_compute_forward_mul_mat_one_chunk(
473469
const int64_t r2 = ne12 / ne02;
474470
const int64_t r3 = ne13 / ne03;
475471

476-
//printf("ir0_start = %6lld, ir0_end = %6lld, ir1_start = %6lld, ir1_end = %6lld\n", ir0_start, ir0_end, ir1_start, ir1_end);
477-
478-
// threads with no work simply yield (not sure if it helps)
479472
if (ir0_start >= ir0_end || ir1_start >= ir1_end) {
480473
return;
481474
}
@@ -514,20 +507,12 @@ static void ggml_compute_forward_mul_mat_one_chunk(
514507

515508
const char * src0_row = (const char*)src0->data + (0 + i02 * nb02 + i03 * nb03);
516509

517-
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
518-
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
519-
// the original src1 data pointer, so we should index using the indices directly
520-
// TODO: this is a bit of a hack, we should probably have a better way to handle this
521510
const char * src1_col = (const char*)wdata +
522511
(src1_cont || src1->type != vec_dot_type
523512
? (i11 + i12 * ne11 + i13 * ne12 * ne11) * row_size
524513
: (i11 * nb11 + i12 * nb12 + i13 * nb13));
525514
float * dst_col = (float*)((char*)dst->data + (i1 * nb1 + i2 * nb2 + i3 * nb3));
526515

527-
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ++ir0) {
528-
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
529-
//}
530-
531516
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ir0 += num_rows_per_vec_dot) {
532517
vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot);
533518
}
@@ -574,7 +559,7 @@ int ggmlop_dsp_mulmat(remote_handle64 h, const ggml_tensor * src0, const ggml_te
574559
int M = src0->ne[1];
575560
int K = src0->ne[0];
576561
int N = src1->ne[1];
577-
float sum = 0;
562+
float sum = 0;
578563
for (int i = 0; i < M; i++) {
579564
for (int j = 0; j < N; j++) {
580565
sum = 0;
-13.4 KB
Binary file not shown.

scripts/build-run-android.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@ function show_pwd()
3333
}
3434

3535

36+
function check_hexagon_sdk()
37+
{
38+
if [ ! -d ${HEXAGON_SDK_PATH} ]; then
39+
echo -e "HEXAGON_SDK_PATH ${HEXAGON_SDK_PATH} not exist, pls install it accordingly...\n"
40+
exit 0
41+
fi
42+
}
43+
44+
3645
function check_and_download_qnn_sdk()
3746
{
3847
is_qnn_sdk_exist=1
@@ -98,6 +107,16 @@ function check_and_download_ndk()
98107
}
99108

100109

110+
function build_dsp
111+
{
112+
cd ggml/src/ggml-qnn/kernels/
113+
show_pwd
114+
make clean
115+
make
116+
cd -
117+
}
118+
119+
101120
function build_arm64
102121
{
103122
cmake -H. -B./out/android -DCMAKE_BUILD_TYPE=Release -DGGML_OPENMP=OFF -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DQNN_SDK_PATH=${QNN_SDK_PATH} -DHEXAGON_SDK_PATH=${HEXAGON_SDK_PATH}
@@ -106,6 +125,8 @@ function build_arm64
106125
show_pwd
107126

108127
cd -
128+
129+
build_dsp
109130
}
110131

111132

@@ -158,6 +179,7 @@ function build_ggml_qnn()
158179
show_pwd
159180
check_and_download_ndk
160181
check_and_download_qnn_sdk
182+
check_hexagon_sdk
161183
dump_vars
162184
remove_temp_dir
163185
build_arm64
@@ -314,6 +336,7 @@ show_pwd
314336

315337
check_and_download_ndk
316338
check_and_download_qnn_sdk
339+
check_hexagon_sdk
317340

318341
if [ $# == 0 ]; then
319342
show_usage

0 commit comments

Comments
 (0)