Skip to content

Commit 6051578

Browse files
committed
ggml-hexagon: add mulmat_algotype for further usage
1 parent 477c0a3 commit 6051578

File tree

11 files changed

+55
-28
lines changed

11 files changed

+55
-28
lines changed

ggml/include/ggml-hexagon.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ GGML_BACKEND_API int ggml_backend_hexagon_get_device_count(void);
3838

3939
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
4040

41-
//attention:the following APIs are only available in the ggml-hexagon backend
4241
GGML_BACKEND_API const char * ggml_backend_hexagon_get_devname(size_t dev_num);
4342

44-
GGML_BACKEND_API void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_approach);
43+
GGML_BACKEND_API void ggml_backend_hexagon_set_cfg(int new_hexagon_backend, int new_hwaccel_approach);
44+
45+
GGML_BACKEND_API int ggml_backend_hexagon_get_mulmat_algotype(void);
4546

4647
#ifdef __cplusplus
4748
}

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ struct hexagon_appcfg_t {
342342
int profiler_duration; // threshold of duration in profiler, per seconds
343343
int profiler_counts; // threshold of counts in profiler
344344
int thread_counts; // thread_counts on cDSP side
345+
int mulmat_algotype; // algorithm type of mulmat on cDSP side
345346
const char * cfgfilename;
346347
const char * runtime_libpath;
347348
char ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
@@ -367,6 +368,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
367368
.profiler_duration = 5, //seconds
368369
.profiler_counts = 100,
369370
.thread_counts = 4,
371+
.mulmat_algotype = 0,
370372
.cfgfilename = "ggml-hexagon.cfg",
371373
#if defined(__ANDROID__)
372374
#if defined(STANDARD_ANDROID_APP)
@@ -379,7 +381,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
379381
#elif defined(_WIN32)
380382
.qnn_runtimelib_path = "C:\\",
381383
#endif
382-
.ggml_hexagon_version = {"1.11"},
384+
.ggml_hexagon_version = {"1.12"},
383385
.ggml_dsp_version = {"0.63"},
384386
};
385387

@@ -1322,11 +1324,9 @@ class hexagon_perf {
13221324
// had to expose two public function in hexagon_profiler class
13231325
if (g_hexagon_profiler.profiler_get_frame_index() <= g_hexagon_profiler.profiler_get_threshold_count()) {
13241326
const char * devname = ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend);
1327+
//the logic here is make sense because already checked in ggml_backend_hexagon_device_init_backend
13251328
if (g_hexagon_appcfg.hexagon_backend != HEXAGON_BACKEND_GGML) {
1326-
//add this check for a special scenario: an invalid value passed from user's program
1327-
if (0 != memcmp(devname, "unknown", strlen("unknown"))) {
1328-
devname += 16;
1329-
}
1329+
devname += 16;
13301330
}
13311331
GGMLHEXAGON_LOG_VERBOSE("inference duration of %s through %s: %lld microseconds",
13321332
_perf_name.c_str(), devname, _duration);
@@ -2006,6 +2006,7 @@ static void ggmlhexagon_load_cfg() {
20062006
hexagoncfg_instance.get_intvalue("cdsp", "enable_rpc_ion_mempool", g_hexagon_appcfg.enable_rpc_ion_mempool, 0);
20072007
hexagoncfg_instance.get_intvalue("cdsp", "enable_all_q_mulmat", g_hexagon_appcfg.enable_all_q_mulmat, 0);
20082008
hexagoncfg_instance.get_intvalue("cdsp", "thread_counts", g_hexagon_appcfg.thread_counts, 4);
2009+
hexagoncfg_instance.get_intvalue("cdsp", "mulmat_algotype", g_hexagon_appcfg.mulmat_algotype, 0);
20092010

20102011
memcpy(g_hexagon_appcfg.ggml_dsp_version, ggmldsp_version.c_str(), strlen(ggmldsp_version.c_str()));
20112012

@@ -2053,7 +2054,7 @@ static void ggmlhexagon_load_cfg() {
20532054
initialized = true;
20542055
}
20552056

2056-
void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_approach) {
2057+
void ggml_backend_hexagon_set_cfg(int new_hexagon_backend, int new_hwaccel_approach) {
20572058
std::string cfg_filename = std::string(g_hexagon_appcfg.runtime_libpath) + std::string(g_hexagon_appcfg.cfgfilename);
20582059
GGMLHEXAGON_LOG_VERBOSE("load hexagon appcfg from %s", cfg_filename.c_str());
20592060
hexagon_appcfg hexagoncfg_instance;
@@ -2063,14 +2064,25 @@ void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_appro
20632064
hexagoncfg_instance.dump([](const std::string & section, const std::string & key, const std::string value) {
20642065
std::ostringstream tmposs;
20652066
tmposs << "section[" << std::setw(10) << std::left << section << "],[" << std::setw(25) << std::left << key << "] = [" << value << "]";
2067+
#if 0
20662068
if (ggmlhexagon_is_llamabench_running()) {
20672069
GGMLHEXAGON_LOG_VERBOSE("%s", tmposs.str().c_str());
20682070
} else {
20692071
GGMLHEXAGON_LOG_INFO("%s", tmposs.str().c_str());
20702072
}
2073+
#endif
2074+
GGMLHEXAGON_LOG_VERBOSE("%s", tmposs.str().c_str());
20712075
});
20722076
}
20732077

2078+
int ggml_backend_hexagon_get_mulmat_algotype() {
2079+
std::string cfg_filename = std::string(g_hexagon_appcfg.runtime_libpath) + std::string(g_hexagon_appcfg.cfgfilename);
2080+
hexagon_appcfg hexagoncfg_instance;
2081+
hexagoncfg_instance.load(cfg_filename);
2082+
hexagoncfg_instance.get_intvalue("cdsp", "mulmat_algotype", g_hexagon_appcfg.mulmat_algotype, 0);
2083+
return g_hexagon_appcfg.mulmat_algotype;
2084+
}
2085+
20742086
static bool ggmlhexagon_check_valid_appcfg() {
20752087
bool is_valid_appcfg = true;
20762088

@@ -5641,7 +5653,9 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
56415653
}
56425654
ggmlhexagon_probe_dspinfo(ctx);
56435655
//FIXME: re-use this function to pass thread_counts info to code on cDSP side before fully understand qidl mechanism
5644-
ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, 1, g_hexagon_appcfg.thread_counts);
5656+
//ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, 1, g_hexagon_appcfg.thread_counts);
5657+
//backward compatible with previous codes on cDSP side
5658+
ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, g_hexagon_appcfg.mulmat_algotype, g_hexagon_appcfg.thread_counts);
56455659
ggmlhexagon_set_rpc_latency(ctx->ggmlop_handle, RPC_POLL_QOS, 100);
56465660
int result = ggmlhexagon_init_rpcmempool(ctx);
56475661
if (0 != result) {
@@ -6427,7 +6441,10 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
64276441
if (dev_index < 0) {
64286442
GGMLHEXAGON_LOG_VERBOSE("it shouldn't happend\n");
64296443
//test-thread-safety might-be running at the moment or an invalid value passed from user's program
6430-
dev_index = 0;
6444+
dev_index = HEXAGON_BACKEND_QNNCPU; //0
6445+
}
6446+
if (dev_index > GGML_HEXAGON_MAX_DEVICES) {
6447+
dev_index = HEXAGON_BACKEND_GGML; //4
64316448
}
64326449
g_hexagon_appcfg.hexagon_backend = dev_index;
64336450
GGMLHEXAGON_LOG_VERBOSE("program specified dev_index %d\n", dev_index);

ggml/src/ggml-hexagon/kernels/skel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_open)(const char* uri, remote_
272272
* @retval, 0 on success, should always succeed
273273
*/
274274
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_close)(remote_handle64 h) __QAIC_HEADER_ATTRIBUTE;
275-
__QAIC_HEADER_EXPORT AEEResult __QAIC_HEADER(ggmlop_dsp_setclocks)(remote_handle64 _h, int32 power_level, int32 latency, int32 dcvs_enable, int32 threads) __QAIC_HEADER_ATTRIBUTE;
275+
__QAIC_HEADER_EXPORT AEEResult __QAIC_HEADER(ggmlop_dsp_setclocks)(remote_handle64 _h, int32 power_level, int32 latency, int32 mulmat_algotype, int32 thread_counts) __QAIC_HEADER_ATTRIBUTE;
276276
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_add)(remote_handle64 _h, const dsptensor* src0, const dsptensor* src1, dsptensor* dst) __QAIC_HEADER_ATTRIBUTE;
277277
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_mulmat)(remote_handle64 _h, const dsptensor* src0, const dsptensor* src1, dsptensor* dst) __QAIC_HEADER_ATTRIBUTE;
278278
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_softmax)(remote_handle64 _h, const dsptensor* src0, const dsptensor* src1, dsptensor* dst) __QAIC_HEADER_ATTRIBUTE;

ggml/src/ggml-hexagon/kernels/stub.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,9 +312,9 @@ static __inline int _stub_method(remote_handle64 _handle, uint32_t _mid, uint32_
312312
}
313313
return _nErr;
314314
}
315-
__QAIC_STUB_EXPORT AEEResult __QAIC_STUB(ggmlop_dsp_setclocks)(remote_handle64 _handle, int32 power_level, int32 latency, int32 dcvs_enable, int32 threads) __QAIC_STUB_ATTRIBUTE {
315+
__QAIC_STUB_EXPORT AEEResult __QAIC_STUB(ggmlop_dsp_setclocks)(remote_handle64 _handle, int32 power_level, int32 latency, int32 mulmat_algotype, int32 threads) __QAIC_STUB_ATTRIBUTE {
316316
uint32_t _mid = 2;
317-
return _stub_method(_handle, _mid, (uint32_t*)&power_level, (uint32_t*)&latency, (uint32_t*)&dcvs_enable, (uint32_t*)&threads);
317+
return _stub_method(_handle, _mid, (uint32_t*)&power_level, (uint32_t*)&latency, (uint32_t*)&mulmat_algotype, (uint32_t*)&threads);
318318
}
319319
static __inline int _stub_unpack(_ATTRIBUTE_UNUSED remote_arg* _praROutPost, _ATTRIBUTE_UNUSED remote_arg* _ppraROutPost[1], _ATTRIBUTE_UNUSED void* _primROut, _ATTRIBUTE_UNUSED uint32_t _rout0[1], _ATTRIBUTE_UNUSED uint32_t _rout1[4], _ATTRIBUTE_UNUSED uint32_t _rout2[4], _ATTRIBUTE_UNUSED uint32_t _rout3[1], _ATTRIBUTE_UNUSED uint32_t _rout4[16], _ATTRIBUTE_UNUSED uint32_t _rout5[1], _ATTRIBUTE_UNUSED char* _rout6[1], _ATTRIBUTE_UNUSED uint32_t _rout6Len[1]) {
320320
int _nErr = 0;

scripts/build-run-android.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,16 +486,15 @@ function run_llamacli()
486486
function run_llamabench()
487487
{
488488
prepare_run_on_phone llama-bench
489-
llamabench_running_params=" -ngl 99 -t 8 -n 256 "
490489

491490
echo "adb shell \"cd ${REMOTE_PATH} \
492491
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
493-
&& ${REMOTE_PATH}/llama-bench ${llamabench_running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}\""
494-
echo "${REMOTE_PATH}/llama-bench ${llamabench_running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}"
492+
&& ${REMOTE_PATH}/llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}\""
493+
echo "${REMOTE_PATH}/llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}"
495494

496495
adb shell "cd ${REMOTE_PATH} \
497496
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
498-
&& ${REMOTE_PATH}/llama-bench ${llamabench_running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}"
497+
&& ${REMOTE_PATH}/llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}"
499498

500499
}
501500

scripts/ggml-hexagon-for-binary-lib.cfg

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[general]
22
#version of ggml-hexagon.cpp on ARM-AP side
3-
version = "1.11"
3+
version = "1.12"
44
#version of ggml-dsp.c on cDSP side
55
ggmldsp_version = "0.97"
66

@@ -90,3 +90,6 @@ enable_all_q_mulmat = 1
9090
# 1 disable multi-threading on cDSP side
9191
# 2-8 thread_counts on cDSP side
9292
thread_counts = 8
93+
94+
#algorithm type of mulmat on cDSP side
95+
mulmat_algotype = 0

scripts/ggml-hexagon.cfg

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[general]
22
#version of ggml-hexagon.cpp on ARM-AP side
3-
version = "1.11"
3+
version = "1.12"
44
#version of ggml-dsp.c on cDSP side
55
ggmldsp_version = "0.63"
66

@@ -89,3 +89,6 @@ enable_all_q_mulmat = 0
8989
# 1 disable multi-threading on cDSP side
9090
# 2-8 thread_counts on cDSP side
9191
thread_counts = 1
92+
93+
#algorithm type of mulmat on cDSP side
94+
mulmat_algotype = 0

tests/ggmlhexagon-benchmark.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,10 +320,10 @@ int main(int argc, char * argv[]) {
320320
#ifdef GGML_USE_HEXAGON
321321
//avoid manually modify ggml-hexagon.cfg
322322
if (n_backend_type >= HEXAGON_BACKEND_CDSP) {
323-
ggml_backend_set_hexagon_cfg(n_backend_type, HWACCEL_CDSP);
323+
ggml_backend_hexagon_set_cfg(n_backend_type, HWACCEL_CDSP);
324324
}
325325
if (n_backend_type < HEXAGON_BACKEND_CDSP) {
326-
ggml_backend_set_hexagon_cfg(n_backend_type, HWACCEL_QNN);
326+
ggml_backend_hexagon_set_cfg(n_backend_type, HWACCEL_QNN);
327327
}
328328
#endif
329329

@@ -457,7 +457,11 @@ int main(int argc, char * argv[]) {
457457
get_timestring(currenttime_string);
458458

459459
#ifdef GGML_USE_HEXAGON
460-
printf("[%s] duration of ut GGML_OP_%s with backend %s: %ld milliseconds\n", currenttime_string, ggml_op_name((enum ggml_op)n_ggml_op_type), ggml_backend_hexagon_get_devname(n_backend_type), n_duration);
460+
if (n_backend_type == HEXAGON_BACKEND_CDSP) {
461+
printf("[%s] duration of ut GGML_OP_%s with backend %s(algo type:%d): %ld milliseconds\n", currenttime_string, ggml_op_name((enum ggml_op)n_ggml_op_type), ggml_backend_hexagon_get_devname(n_backend_type), ggml_backend_hexagon_get_mulmat_algotype(), n_duration);
462+
} else {
463+
printf("[%s] duration of ut GGML_OP_%s with backend %s: %ld milliseconds\n", currenttime_string, ggml_op_name((enum ggml_op)n_ggml_op_type), ggml_backend_hexagon_get_devname(n_backend_type), n_duration);
464+
}
461465
#else
462466
printf("[%s] duration of ut GGML_OP_%s with the default ggml backend: %ld milliseconds\n", currenttime_string, ggml_op_name((enum ggml_op)n_ggml_op_type), n_duration);
463467
#endif

tests/test-thread-safety.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ int main(int argc, char ** argv) {
2626
}
2727
printf("backend %d\n", backend);
2828
if (backend >= HEXAGON_BACKEND_CDSP) {
29-
ggml_backend_set_hexagon_cfg(backend, HWACCEL_CDSP);
29+
ggml_backend_hexagon_set_cfg(backend, HWACCEL_CDSP);
3030
}
3131
if (backend < HEXAGON_BACKEND_CDSP) {
32-
ggml_backend_set_hexagon_cfg(backend, HWACCEL_QNN);
32+
ggml_backend_hexagon_set_cfg(backend, HWACCEL_QNN);
3333
}
3434
#endif
3535
common_params params;

tools/llama-bench/llama-bench.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,10 +1849,10 @@ int main(int argc, char ** argv) {
18491849
}
18501850
printf("backend %d\n", backend);
18511851
if (backend >= HEXAGON_BACKEND_CDSP) {
1852-
ggml_backend_set_hexagon_cfg(backend, HWACCEL_CDSP);
1852+
ggml_backend_hexagon_set_cfg(backend, HWACCEL_CDSP);
18531853
}
18541854
if (backend < HEXAGON_BACKEND_CDSP) {
1855-
ggml_backend_set_hexagon_cfg(backend, HWACCEL_QNN);
1855+
ggml_backend_hexagon_set_cfg(backend, HWACCEL_QNN);
18561856
}
18571857
#endif
18581858
// try to set locale for unicode characters in markdown

0 commit comments

Comments
 (0)