Skip to content

Commit 8bb2bba

Browse files
author
zhouwg
committed
project: sync with upstream(PR-14501:remove kompute backend)
1 parent 1245c4e commit 8bb2bba

File tree

7 files changed

+164
-159
lines changed

7 files changed

+164
-159
lines changed

ggml/include/ggml-hexagon.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ GGML_BACKEND_API void ggml_backend_hexagon_set_cfg(int new_hexagon
4444

4545
GGML_BACKEND_API int ggml_backend_hexagon_get_mulmat_algotype(void);
4646

47+
GGML_BACKEND_API void ggml_backend_hexagon_set_mulmat_algotype(int new_mulmat_algotype);
48+
4749
#ifdef __cplusplus
4850
}
4951
#endif

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ endif()
3232
#v69 --- Snapdragon 8 Gen1
3333
#v73 --- Snapdragon 8 Gen2
3434
#v75 --- Snapdragon 8 Gen3
35-
#v79 --- Snapdragon 8 Elite(aka Gen4)
35+
#v79 --- Snapdragon 8 Elite
3636
if(NOT DEFINED HTP_ARCH_VERSION)
3737
message(FATAL_ERROR "HTP_ARCH_VERSION not defined, valid htp arch: v68,v69,v73,v75,v79")
3838
endif()

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 151 additions & 107 deletions
Large diffs are not rendered by default.

ggml/src/ggml-hexagon/kernels/add.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ static void ggml_compute_forward_add_f32(
134134
GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );
135135
}
136136

137-
//FIXME: why failed with test-backend-ops when disable ion rpc mempool
138137
int ggmlop_dsp_add(remote_handle64 h, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
139138
GGMLHEXAGON_LOG_DEBUG("enter %s\n", __func__);
140139
ggml_compute_forward_add_f32(src0, src1, dst);

ggml/src/ggml-hexagon/kernels/mulmat.c

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,5 @@
11
#include "ggml-dsp.h"
22

3-
// 128 byte vectors
4-
#define VSIZE_BYTES 128
5-
#define VSIZE_WORDS VSIZE_BYTES/4
6-
7-
union ui32f { int32_t i; float f; };
8-
9-
// create a vector of floats from a float
10-
static __attribute__((always_inline)) HVX_Vector create_sfv_from_sf(float value) {
11-
union ui32f cvt;
12-
cvt.f = value;
13-
HVX_Vector tmp = Q6_V_vsplat_R(cvt.i);
14-
return tmp;
15-
}
16-
17-
// create a vector of qf32's from a float
18-
static __attribute__((always_inline)) HVX_Vector create_qf32v_from_sf(float value) {
19-
HVX_Vector tmp = Q6_Vqf32_vadd_Vqf32Vsf(Q6_V_vsplat_R(0), create_sfv_from_sf(value));
20-
return tmp;
21-
}
22-
23-
// convert qf32 vector to float vector
24-
static __attribute__((always_inline)) HVX_Vector convert_qf32v_to_fltv(HVX_Vector vect) {
25-
HVX_Vector tmp = Q6_Vsf_equals_Vqf32(vect);
26-
return tmp;
27-
}
28-
29-
// get lowest float from a vector of floats
30-
static __attribute__((always_inline)) float get_flt0_from_fltv(HVX_Vector vect) {
31-
union ui32f cvt;
32-
cvt.i = vect[0];
33-
return cvt.f;
34-
}
35-
36-
// get lowest float from a vector of qf32's
37-
static __attribute__((always_inline)) float get_flt0_from_qf32v(HVX_Vector vect) {
38-
union ui32f cvt;
39-
HVX_Vector tmp = convert_qf32v_to_fltv(vect);
40-
cvt.i = tmp[0];
41-
return cvt.f;
42-
}
43-
443
static void vec_dot_f32(int n, float *GGML_RESTRICT s, size_t bs, const float *GGML_RESTRICT x,
454
size_t bx, const float *GGML_RESTRICT y, size_t by, int nrc) {
465
assert(nrc == 1);
@@ -145,7 +104,6 @@ static void ggml_compute_forward_mul_mat_one_chunk(const ggml_tensor *src0, cons
145104
}
146105
}
147106

148-
//TODO: only support fp32 mulmat on cDSP
149107
static int ggmlop_dsp_mulmat_singlethread(remote_handle64 h, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
150108
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
151109
ggmlhexagon_dump_tensor(src0, 0);
@@ -274,7 +232,6 @@ static int ggmlop_dsp_mulmat_singlethread(remote_handle64 h, const ggml_tensor *
274232
return 0;
275233
}
276234

277-
//TODO:multithreading mulmat
278235
static int ggmlop_dsp_mulmat_multithread(remote_handle64 h, const struct dsptensor * src0, const struct dsptensor * src1, dsptensor * dst) {
279236
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
280237
GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );

scripts/build-run-android.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ PROJECT_ROOT_PATH=${PROJECT_HOME_PATH}
1414
HOST_CPU_COUNTS=`cat /proc/cpuinfo | grep "processor" | wc | awk '{print int($1)}'`
1515

1616
#running path on Android phone
17-
REMOTE_PATH=/data/local/tmp/
17+
REMOTE_PATH=/data/local/tmp
1818

1919
#Android NDK can be found at:
2020
#https://developer.android.com/ndk/downloads
@@ -31,9 +31,11 @@ QNN_SDK_VERSION=2.32.0.250228
3131
QNN_SDK_VERSION=2.33.0.250327
3232
QNN_SDK_VERSION=2.34.0.250424
3333
QNN_SDK_VERSION=2.35.0.250530
34+
QNN_SDK_VERSION=2.36.0.250627
3435
#fully official QNN SDK, will be downloaded automatically via this script
35-
QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.34.0.250424/
36-
QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.35.0.250530/
36+
QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.34.0.250424
37+
QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.35.0.250530
38+
QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.36.0.250627
3739

3840
#Qualcomm Hexagon SDK can be found at:
3941
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
@@ -42,8 +44,8 @@ HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
4244
#customized/tailored Hexagon SDK from the offcial Hexagon SDK for simplify workflow
4345
HEXAGON_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/6.2.0.1
4446

45-
#running_params="- ngl 99 -t 8 -n 256 --no-warmup -fa 1 "
46-
running_params=" -ngl 99 -t 8 -n 256 --no-warmup "
47+
#running_params="- ngl 99 -t 4 -n 256 --no-warmup -fa 1 "
48+
running_params=" -ngl 99 -t 4 -n 256 --no-warmup "
4749

4850
######## part-2 ########
4951

@@ -52,8 +54,9 @@ PROMPT_STRING="introduce the movie Once Upon a Time in America briefly.\n"
5254
#1.12 GiB, will be downloadded automatically via this script
5355
GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
5456

55-
#ref: https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie
5657
#supported htp arch version:
58+
#v68 --- Snapdragon 888
59+
#v69 --- Snapdragon 8 Gen1
5760
#v73 --- Snapdragon 8 Gen2
5861
#v75 --- Snapdragon 8 Gen3
5962
#v79 --- Snapdragon 8 Elite

scripts/ggml-hexagon.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[general]
22
#version of ggml-hexagon.cpp on ARM-AP side
3-
version = "1.12"
3+
version = "1.13"
44
#version of ggml-dsp.c on cDSP side
55
ggmldsp_version = "0.63"
66

0 commit comments

Comments
 (0)