Skip to content

Commit b32ba64

Browse files
author
zhouwg
committed
ggml-hexagon: sync with self-build branch
1 parent 657ccee commit b32ba64

File tree

5 files changed

+54
-44
lines changed

5 files changed

+54
-44
lines changed

CMakeLists.txt

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,27 @@ set(CMAKE_WARN_UNUSED_CLI YES)
88
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
99

1010
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
11+
set(CMAKE_VERBOSE_MAKEFILE ON)
1112
if(DEFINED HTP_ARCH_VERSION)
1213
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
13-
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
14-
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -ffp-model=fast -fno-finite-math-only")
15-
message("OPT_FLAG:${OPT_FLAG}")
16-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
17-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
18-
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
19-
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
14+
#works fine on Snapdragon 8Gen3 & 8Elite
15+
set(OPT_FLAG " -O3 -march=armv8.7-a+dotprod+fp16+i8mm -mcpu=cortex-x1 -mtune=cortex-x1 -ffp-model=fast -fno-finite-math-only")
16+
else()
17+
#should be works fine with mainstream mobile SoC
18+
set(OPT_FLAG " -O3 -march=armv8.2-a+dotprod+fp16 -ffp-model=fast -fno-finite-math-only")
2019
endif()
20+
else()
21+
#should be works fine with mainstream mobile SoC
22+
set(OPT_FLAG " -O3 -march=armv8.2-a+dotprod+fp16 -ffp-model=fast -fno-finite-math-only")
2123
endif()
24+
25+
message("OPT_FLAG:${OPT_FLAG}")
26+
#ensure the same toolchain optimization for ggml-opencl, ggml-vulkan, ggml-hexagon on Android phone
27+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${DEBUG_FLAG} ${OPT_FLAG}")
28+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${DEBUG_FLAG} ${OPT_FLAG}")
29+
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${DEBUG_FLAG} ${OPT_FLAG}")
30+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${DEBUG_FLAG} ${OPT_FLAG}")
31+
2232
endif()
2333

2434
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,6 @@ if (${CHECK_HTP_ARCH} STREQUAL "WRONG")
4949
endif()
5050

5151
#check optimization flags
52-
set(OPT_FLAG " ")
53-
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
54-
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
55-
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -flto -D_GNU_SOURCE -fvectorize -ffp-model=fast -fno-finite-math-only")
56-
endif()
5752
message("OPT_FLAG:${OPT_FLAG}")
5853

5954
if(CMAKE_SYSTEM_NAME STREQUAL "Android")

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@
2424
* - GGML_OP_ADD & GGML_OP_MUL_MAT:
2525
* this is a hwaccel skeleton, can expand other ggml ops accordingly
2626
*
27+
* there is a key point in this PR:
28+
* when hwaccel_approach is HWACCEL_QNN, there are 4 backends:
29+
* HEXAGON_BACKEND_QNNCPU, HEXAGON_BACKEND_QNNGPU, HEXAGON_BACKEND_QNNNPU, HEXAGON_BACKEND_GGML(the default ggml backend);
30+
* when hwaccel_approach is HWACCEL_CDSP, there are 2 backends(which is exactly similar to ggml-opencl or ggml-vulkan):
31+
* HEXAGON_BACKEND_CDSP, HEXAGON_BACKEND_GGML.
32+
*
33+
* the reason for this is to facilitate the performance comparison between the cDSP approach and the QNN approach.
34+
* accordingly, this PR not only support QNN-based approach but also support the cDSP based approach.
2735
*/
2836
#include <stdio.h>
2937
#include <stdlib.h>
@@ -80,7 +88,6 @@
8088
#include "rpcmem.h"
8189
#include "remote.h"
8290
#include "os_defines.h"
83-
#include "domain.h"
8491
#include "AEEStdErr.h"
8592
#include "HAP_power.h"
8693
#include "HAP_farf.h"
@@ -377,7 +384,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
377384
#elif defined(_WIN32)
378385
.qnn_runtimelib_path = "C:\\",
379386
#endif
380-
.ggml_hexagon_version = {"1.13"},
387+
.ggml_hexagon_version = {"1.14"},
381388
.ggml_dsp_version = {"0.63"},
382389
};
383390

@@ -888,29 +895,29 @@ static void ggmlhexagon_print_tensors_info(const char * func_name, const ggml_ba
888895
}
889896

890897
if (nullptr != func_name && nullptr != ctx) {
891-
GGMLHEXAGON_LOG_DEBUG("call %s in dev %s\n", func_name, ctx->name);
898+
GGMLHEXAGON_LOG_VERBOSE("call %s in dev %s\n", func_name, ctx->name);
892899
}
893900
if (nullptr != src0) {
894-
GGMLHEXAGON_LOG_DEBUG(
901+
GGMLHEXAGON_LOG_VERBOSE(
895902
"%-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)",
896903
src0->name,
897904
src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
898905
src0->ne[3],
899906
src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
900907
}
901908
if (nullptr != src1) {
902-
GGMLHEXAGON_LOG_DEBUG(
909+
GGMLHEXAGON_LOG_VERBOSE(
903910
"%-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)",
904911
src1->name,
905912
src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
906913
src1->ne[3],
907914
src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
908915
}
909-
GGMLHEXAGON_LOG_DEBUG("%-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)",
916+
GGMLHEXAGON_LOG_VERBOSE("%-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi, %5zi)",
910917
dst->name,
911918
dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
912919
dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3]);
913-
GGMLHEXAGON_LOG_DEBUG("\n");
920+
GGMLHEXAGON_LOG_VERBOSE("\n");
914921
}
915922

916923
static void ggmlhexagon_dump_op_info(const struct ggml_tensor * tensor) {
@@ -921,7 +928,7 @@ static void ggmlhexagon_dump_op_info(const struct ggml_tensor * tensor) {
921928
const struct ggml_tensor * src0 = tensor->src[0];
922929
struct ggml_tensor * src1 = tensor->src[1];
923930
struct ggml_tensor * dst = const_cast<ggml_tensor *>(tensor);
924-
GGMLHEXAGON_LOG_DEBUG("op name:%s, tensor type:%s", ggml_op_name(tensor->op), ggml_type_name(tensor->type));
931+
GGMLHEXAGON_LOG_VERBOSE("op name:%s, tensor type:%s", ggml_op_name(tensor->op), ggml_type_name(tensor->type));
925932
ggmlhexagon_print_tensors_info(nullptr, nullptr, src0, src1, dst);
926933
}
927934

@@ -939,7 +946,7 @@ static void ggmlhexagon_dump_tensor_elements(const ggml_tensor * tensor) {
939946
<< " ";
940947
}
941948
if (strlen(tmposs.str().c_str()) <= (GGMLHEXAGON_LOGBUF_LEN - 96)) {
942-
GGMLHEXAGON_LOG_DEBUG("%s\n", tmposs.str().c_str());
949+
GGMLHEXAGON_LOG_VERBOSE("%s\n", tmposs.str().c_str());
943950
}
944951
tmposs.clear();
945952
tmposs.str("");
@@ -948,7 +955,7 @@ static void ggmlhexagon_dump_tensor_elements(const ggml_tensor * tensor) {
948955
}
949956
}
950957

951-
GGMLHEXAGON_LOG_DEBUG("\n");
958+
GGMLHEXAGON_LOG_VERBOSE("\n");
952959
}
953960

954961
static void ggmlhexagon_dump_tensor(const ggml_tensor * tensor, const char * name) {
@@ -3375,7 +3382,7 @@ static void ggmlqnn_sdk_logcallback(const char * fmt,
33753382
{
33763383
std::lock_guard<std::mutex> lock(log_mutex);
33773384
memset(s_ggmlqnn_sdk_logbuf, 0, GGMLHEXAGON_LOGBUF_LEN);
3378-
vsnprintf(reinterpret_cast<char *const>(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
3385+
vsnprintf(reinterpret_cast<char *>(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
33793386
GGMLHEXAGON_LOG_DEBUG("%8.1fms [%-7s] %s\n", ms, log_level_desc, s_ggmlqnn_sdk_logbuf);
33803387
}
33813388
#if !GGMLHEXAGON_DEBUG
@@ -4599,7 +4606,7 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_hexagon_context * ctx, ggml_ten
45994606
//retrieve computational resource from cached QNN graph
46004607
qnn_singlenode_res_t & graph_item = ctx->qnn_singlenode_graph_map[graph_name];
46014608
graph_handle = std::get<0>(graph_item);
4602-
qnn_ptensors_t &tensors = std::get<1>(graph_item);
4609+
qnn_ptensors_t & tensors = std::get<1>(graph_item);
46034610
p_tensor0 = tensors[0];
46044611
p_tensor1 = tensors[1];
46054612
p_tensor2 = tensors[2];
@@ -5849,6 +5856,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
58495856
switch (op_tensor->op) {
58505857
case GGML_OP_ADD:
58515858
{
5859+
ggmlhexagon_dump_op_info(op_tensor);
58525860
//TODO:workaround approach to fix HWACCEL_CDSP can't works in ASR inference and LLM inference
58535861
// with some LLM models in a standard Android APP
58545862
if (ne00 < 1024) {
@@ -5927,8 +5935,9 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
59275935
return false;
59285936
}
59295937

5930-
if (ne00 < 32)
5938+
if (ne00 < 32) {
59315939
return false;
5940+
}
59325941

59335942
return ggmlhexagon_same_types(ctx, op_tensor);
59345943
}

scripts/build-run-android.sh

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22

33
# build llama.cpp + ggml-hexagon backend on Linux for Android phone equipped with Qualcomm Snapdragon mobile SoC
44
# this script will setup local dev envs automatically
@@ -61,19 +61,7 @@ GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
6161
#v75 --- Snapdragon 8 Gen3
6262
#v79 --- Snapdragon 8 Elite
6363

64-
#8Gen2
65-
#HTP_ARCH_VERSION=v73
66-
#HTP_ARCH_VERSION_a=V73
67-
68-
#8Gen3
69-
#HTP_ARCH_VERSION=v75
70-
#HTP_ARCH_VERSION_a=V75
71-
72-
#8Elite
73-
#HTP_ARCH_VERSION=v79
74-
#HTP_ARCH_VERSION_a=V79
75-
76-
#modify the following two lines to adapt to test phone
64+
#modify the following two lines to suit the test phone
7765
HTP_ARCH_VERSION=v79
7866
HTP_ARCH_VERSION_a=V79
7967

@@ -132,7 +120,7 @@ function check_and_download_hexagon_sdk()
132120
echo -e "minimal-hexagon-sdk-6.2.0.1.xz already exist\n"
133121
else
134122
echo -e "begin downloading minimal-hexagon-sdk-6.2.0.1.xz \n"
135-
wget --no-config --quiet --show-progress -O ${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/minimal-hexagon-sdk-6.2.0.1.xz https://github.com/kantv-ai/toolchain/raw/refs/heads/main/minimal-hexagon-sdk-6.2.0.1.xz
123+
wget --no-config --quiet --show-progress -O ${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/minimal-hexagon-sdk-6.2.0.1.xz https://github.com/zhouwg/toolchain/raw/refs/heads/main/minimal-hexagon-sdk-6.2.0.1.xz
136124
if [ $? -ne 0 ]; then
137125
printf "failed to download minimal-hexagon-sdk-6.2.0.1.xz\n"
138126
exit 1
@@ -168,8 +156,13 @@ function check_and_download_qnn_sdk()
168156
is_qnn_sdk_exist=0
169157
fi
170158

159+
if [ ! -f ${QNN_SDK_PATH}/NOTICE.txt ]; then
160+
echo -e "${TEXT_RED}${QNN_SDK_PATH}/NOTICE.txt not exist${TEXT_RESET}\n"
161+
is_qnn_sdk_exist=0
162+
fi
163+
171164
if [ ${is_qnn_sdk_exist} -eq 0 ]; then
172-
if [ ! -f ${PROJECT_ROOT_PATH}/prebuild/v${QNN_SDK_VERSION}.zip ]; then
165+
if [ ! -f ${PROJECT_ROOT_PATH}/prebuilts/v${QNN_SDK_VERSION}.zip ]; then
173166
wget --no-config --quiet --show-progress -O ${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/v${QNN_SDK_VERSION}.zip https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/${QNN_SDK_VERSION}/v${QNN_SDK_VERSION}.zip
174167
fi
175168
if [ $? -ne 0 ]; then
@@ -178,6 +171,10 @@ function check_and_download_qnn_sdk()
178171
fi
179172
cd ${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/
180173
unzip v${QNN_SDK_VERSION}.zip
174+
if [ $? -ne 0 ]; then
175+
printf "failed to decompress Qualcomm QNN SDK to %s \n" "${QNN_SDK_PATH}"
176+
exit 1
177+
fi
181178
printf "Qualcomm QNN SDK saved to ${QNN_SDK_PATH} \n\n"
182179
cd ${PROJECT_ROOT_PATH}
183180
else

scripts/ggml-hexagon.cfg

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ hwaccel_approach = 2
3030
# b. we can compare Hexagon NPU performance between HWACCEL_CDSP/QNNNPU/the default ggml backend accordingly
3131

3232

33-
#enable/disable offload quantized type mulmat
34-
#quatized type mulmat works fine through QNNNPU at the moment
35-
#quatized type mulmat doesn't works fine through HWACCEL_CDSP at the moment
33+
#enable/disable offload quantized mulmat
34+
#quantized mulmat works fine through QNNNPU at the moment
3635
#this item will make mulmat performance comprision easily
3736
enable_q_mulmat = 0
3837

0 commit comments

Comments
 (0)