Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
201 commits
Select commit Hold shift + click to select a range
f190e4f
ggml-qnn: add Qualcomm QNN backend for GGML
jeffzhou2000 Feb 14, 2025
e3a54bf
ggml-qnn: santiy check
jeffzhou2000 Feb 15, 2025
824ac72
ggml-qnn: update script build-run-android.sh to compare peformance of…
jeffzhou2000 Feb 16, 2025
428a6f4
ggml-qnn: fix minor issue in test-backend-ops.cpp
jeffzhou2000 Feb 17, 2025
1763ec5
ggml-qnn: merge QNN RPC feature from https://github.com/zhouwg/kantv/…
jeffzhou2000 Feb 18, 2025
db2985a
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 18, 2025
0ade459
ggml-qnn: a concise approach to offload mulmat to QNN backend(sync fr…
jeffzhou2000 Feb 19, 2025
27af242
ggml-qnn: remove redundant codes
jeffzhou2000 Feb 20, 2025
3251453
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 20, 2025
1dfb4d6
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 20, 2025
fffb616
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 21, 2025
9b54d7b
ggml-qnn: add Qualcomm QNN backend for GGML
jeffzhou2000 Feb 14, 2025
8cfe5e3
ggml-qnn: merge QNN RPC feature from https://github.com/zhouwg/kantv/…
jeffzhou2000 Feb 18, 2025
496a5fa
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 18, 2025
386010a
ggml-qnn: a concise approach to offload mulmat to QNN backend(sync fr…
jeffzhou2000 Feb 19, 2025
b8b3310
ggml-qnn: remove redundant codes
jeffzhou2000 Feb 20, 2025
6f777fd
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 20, 2025
a6dd130
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 20, 2025
5d8e9b9
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
jeffzhou2000 Feb 21, 2025
27d165e
ggml-qnn: fix a minior typo in internal doc
jeffzhou2000 Feb 23, 2025
f3b8449
ggml-qnn: refine function ggml_qnn_create_general_tensor() to avoid c…
jeffzhou2000 Feb 23, 2025
398fea7
ggml-qnn: fix a minor typo in source code
jeffzhou2000 Feb 24, 2025
c81d308
build: avoid ggml-qnn backend breaking other backend's builds
jeffzhou2000 Feb 24, 2025
122df52
ggml-qnn: remove redundant codes to make PR reviewers happy
jeffzhou2000 Feb 25, 2025
bd9a573
ggml-qnn: refine code format
jeffzhou2000 Feb 25, 2025
1ecc448
ggml-qnn: offload quantized type mulmat to QNN backend
jeffzhou2000 Feb 26, 2025
ba0878d
ggml-qnn: refine source code structure to make code more clearly
jeffzhou2000 Feb 27, 2025
1f9c1e3
ggml-qnn: enable release build with necessary logs to make reviewers …
jeffzhou2000 Feb 27, 2025
a3bf24f
ggml-qnn: enable all quantize type with 2d mulmat
jeffzhou2000 Feb 27, 2025
d2bb13f
ggml-qnn: enable log output of GGMLQNN_LOG_INFO in command line mode …
jeffzhou2000 Feb 28, 2025
cff7b15
ggml-qnn: Windows port --- step2
jeffzhou2000 Feb 28, 2025
400fca5
ggml-qnn: merge UT code and corresponding script from local dev branc…
jeffzhou2000 Mar 2, 2025
d9bdb1b
ggml-qnn: merge ggml_qnn_mul_mat_4d from local dev branch to make wor…
jeffzhou2000 Mar 2, 2025
0b58e18
ggml-qnn: submit AI-assisted ggml_qnn_mul_mat_4d(not worked currently…
jeffzhou2000 Mar 2, 2025
8270d2a
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step2
jeffzhou2000 Mar 2, 2025
5c83e24
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step3
jeffzhou2000 Mar 2, 2025
3a403e2
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step4
jeffzhou2000 Mar 2, 2025
1290704
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step5
jeffzhou2000 Mar 2, 2025
bc9205e
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step6
jeffzhou2000 Mar 2, 2025
5fd1301
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step7
jeffzhou2000 Mar 2, 2025
d39fd59
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step8
jeffzhou2000 Mar 2, 2025
4201db1
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- good in step9
jeffzhou2000 Mar 2, 2025
a0dda86
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- narrow down t…
jeffzhou2000 Mar 2, 2025
be1a168
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step10
jeffzhou2000 Mar 2, 2025
d9dd12b
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- narrow down t…
jeffzhou2000 Mar 2, 2025
ef7aea8
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- step11
jeffzhou2000 Mar 2, 2025
5cc37e9
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 --- both ok in st…
jeffzhou2000 Mar 2, 2025
b343a2b
ggml-qnn: AI-assisted ggml_qnn_mul_mat_4d by Grok 3 ---finalizing ver…
jeffzhou2000 Mar 2, 2025
d012bd0
ggml-qnn: refine ggml_qnn_mul_mat and ggml_qnn_general_node according…
jeffzhou2000 Mar 2, 2025
bf94c3d
ggml-qnn: remove no-needed comments
jeffzhou2000 Mar 2, 2025
5b9ae53
ggml-qnn: Windows port --- step3
jeffzhou2000 Mar 3, 2025
4e2f9c1
ggml-qnn: remove un-needed function
jeffzhou2000 Mar 4, 2025
a682c87
ggml-qnn:rebase to upstream
jeffzhou2000 Mar 4, 2025
b19e1da
ggml-qnn: fix a minior issue during rebase to upstream
jeffzhou2000 Mar 4, 2025
3739278
ggml-qnn: update script according to https://github.com/ggml-org/llam…
jeffzhou2000 Mar 4, 2025
2a756b7
ggml-qnn: fix a minior issue in ggmlqnn_create_general_tensor()
jeffzhou2000 Mar 4, 2025
50cad73
ggml-qnn: active member variable _device_id in class qnn_instance
jeffzhou2000 Mar 4, 2025
46a6401
ggml-qnn: refine ggml_qnn_general_node and ggml_qnn_mul_mat to make c…
jeffzhou2000 Mar 4, 2025
e5bdf7f
ggml-qnn: Windows port --- step4
jeffzhou2000 Mar 6, 2025
9a62f25
ggml-qnn: Windows port -- step5
jeffzhou2000 Mar 7, 2025
d4792e3
ggml-qnn: WoA(Windows on ARM) -- step6
jeffzhou2000 Mar 8, 2025
b34012d
ggml-qnn: rebase to upstream
jeffzhou2000 Mar 9, 2025
018aa94
ggml-qnn: pr to upstream
jeffzhou2000 Mar 11, 2025
b1fbc80
ggml-qnn: rebase to upstream
jeffzhou2000 Mar 18, 2025
4e7cfab
ggml-qnn: self code-review
jeffzhou2000 Mar 18, 2025
017311a
ggml-qnn: rebase upstream
jeffzhou2000 Mar 19, 2025
c12303b
ggml-qnn: add approach through Hexagon cDSP
jeffzhou2000 Mar 22, 2025
b47816b
ggml-qnn: refine general approach through Hexagon cDSP
jeffzhou2000 Mar 23, 2025
825b7e0
ggml-qnn: refine the entire ggml-qnn.cpp to make code more clear
jeffzhou2000 Mar 24, 2025
97a87db
ggml-qnn: refine the entire ggml-qnn.cpp to make code more clear
jeffzhou2000 Mar 24, 2025
5000e9b
ggml-qnn: add build script for libggmlop_skel.so
jeffzhou2000 Mar 24, 2025
9df1b78
ggml-qnn: remove redundant functions in this PR and make codes more c…
jeffzhou2000 Mar 25, 2025
f0ca5f3
ggml-qnn: original ggml_compute_forward_add and ggml_compute_forward_…
jeffzhou2000 Mar 25, 2025
afe6df5
ggml-qnn: modify build-run-android.sh to verify mulmat and validate m…
jeffzhou2000 Mar 25, 2025
0545297
ggml-qnn: make host code(ggml-qnn.cpp) more clear and more stable
jeffzhou2000 Mar 26, 2025
b10b07c
ggml-qnn: refine code according to self code-review and make code mor…
jeffzhou2000 Mar 26, 2025
934b6c0
ggml-qnn: offload more ggml op to Hexagon cDSP
jeffzhou2000 Mar 27, 2025
afeaa5f
ggml-hexagon: code on AP(arm-cpu) side is stable now
jeffzhou2000 Mar 28, 2025
6ac5531
ggml-hexagon: optimize GGML_OP_ADD on cDSP side
jeffzhou2000 Mar 28, 2025
b0ddd56
ggml-hexagon: simplify hexagon-kernel build logic in CMakeLists.txt
jeffzhou2000 Mar 29, 2025
cfa29bf
ggml-hexagon: release ggml-hexagon v0.98
jeffzhou2000 Mar 29, 2025
100b7cc
ggml-hexagon: release ggml-hexagon v0.99
jeffzhou2000 Mar 29, 2025
4bc9841
ggml-hexagon: try to offload q6_k mulmat to cDSP
jeffzhou2000 Mar 29, 2025
5bf4eb3
ggml-hexagon: fix minior issue in ggml-hexagon.cpp after self code-re…
jeffzhou2000 Mar 29, 2025
fcc4ae5
ggml-hexagon: check validation of ggml-hexagon.cfg before create appr…
jeffzhou2000 Mar 30, 2025
05e26c3
ggml-hexagon: fix all compiler warnings in ggml-hexagon.cpp
jeffzhou2000 Mar 30, 2025
e3891d7
ggml-hexagon: enable only one backend device for HWACCEL_CDSP and ena…
jeffzhou2000 Mar 31, 2025
6823e37
ggml-hexagon: rpc ion memory pool and test-backend-ops works fine in …
jeffzhou2000 Mar 31, 2025
5fb5653
ggml-hexagon: make comprision of mulmat performance between HWACCEL_Q…
jeffzhou2000 Mar 31, 2025
7c461a1
ggml-hexagon: release ggml-hexagon v1.00
jeffzhou2000 Mar 31, 2025
2a400b2
ggml-hexagon: rebase to upstream
jeffzhou2000 Apr 1, 2025
61ac739
ggml-hexagon: check configuration of enable_rpc_dma_mempool in functi…
jeffzhou2000 Apr 1, 2025
0d0f42d
ggml-hexagon: uniform rpc_ion_memsize and rpc_ion_usage between HWACC…
jeffzhou2000 Apr 1, 2025
bc3291f
ggml-hexagon: make buffer mechanism more clear in HWACCEL_CDSP approach
jeffzhou2000 Apr 1, 2025
24449f9
ggml-hexagon: add perf function in hexagon kernerls on cDSP side
jeffzhou2000 Apr 2, 2025
32fd1d1
ggml-hexagon: fix a stupid issue of why set rpc latency failure and i…
jeffzhou2000 Apr 2, 2025
1aac91e
ggml-hexagon: make helper function ggmlhexagon_get_timestring() threa…
jeffzhou2000 Apr 2, 2025
e5e85f4
ggml-hexagon: fix a typo in ggml-hexagon.cpp
jeffzhou2000 Apr 2, 2025
6410ca1
ggml-hexagon: list all known todo and fixme tasks in ggml-hexagon.cpp
jeffzhou2000 Apr 2, 2025
2b7ba2a
ggml-hexagon: fix units MB -> MiB
jeffzhou2000 Apr 2, 2025
1525e3c
ggml-hexagon: try to make ggml-hexagon backend works fine in a standa…
jeffzhou2000 Apr 3, 2025
47182e6
ggml-hexagon: remove reduament code and make debug log more clear
jeffzhou2000 Apr 3, 2025
82d97ba
ggml-hexagon: add gemma-3-4b-it-Q8_0.gguf to verify q8_0 mulmat on cDSP
jeffzhou2000 Apr 3, 2025
9911718
ggml-hexagon:add skeleton code of offload GGML_OP_SOFT_MAX/GGML_OP_RM…
jeffzhou2000 Apr 3, 2025
3650e8e
ggml-hexagon: release ggml-dsp v0.60 on cDSP side
jeffzhou2000 Apr 4, 2025
8af502d
ggml-hexagon: merge build logic in kernels/Makefile to ggml-hexagon/C…
jeffzhou2000 Apr 5, 2025
5c62b4a
ggml-hexagon: fix a typo in ggml-hexagon.cpp
jeffzhou2000 Apr 5, 2025
7b7aa34
ggml-hexagon: uniform NDEBUG usage in ggml-hexagon.cpp and ggml-dsp.c
jeffzhou2000 Apr 6, 2025
a6c1d70
ggml-hexagon: add profiler feature for purpose of visualize NPU perfo…
jeffzhou2000 Apr 7, 2025
1601870
ggml-hexagon: remove so-called dma memory pool to avoid confusion and…
jeffzhou2000 Apr 8, 2025
3067d01
ggml-hexagon: make function ggmlhexagon_init_rpcmempool in ggml-hexag…
jeffzhou2000 Apr 8, 2025
437d0f5
ggml-hexagon: fix potential resource leak in class hexagon_profiler
jeffzhou2000 Apr 8, 2025
91809e3
ggml-hexagon: enable multi-threading feature on cDSP side
jeffzhou2000 Apr 8, 2025
af890f5
ggml-hexagon: upgrade QNN SDK to v2.33.0.250327
jeffzhou2000 Apr 9, 2025
bbc9c5d
ggml-hexagon: fix typo in ggml-hexagon.cpp
jeffzhou2000 Apr 9, 2025
1774c48
ggml-dsp: probe QuRT RTOS information in function ggmlop_dsp_open
jeffzhou2000 Apr 9, 2025
e9be755
ggml-hexagon: setting enable_rpc_ion_mempool to 1 and make test-backe…
jeffzhou2000 Apr 10, 2025
d591b1a
ggml-hexagon: check whether user's specified htp arch is valid in CMa…
jeffzhou2000 Apr 10, 2025
cf3c263
ggml-hexagon: sync with upstream
jeffzhou2000 Apr 11, 2025
9553af2
ggml-hexagon: refine pinned-memory feature
jeffzhou2000 Apr 11, 2025
d8c0d91
ggml-hexagon: refine build system in ggml-hexagon
jeffzhou2000 Apr 11, 2025
bc21f63
ggml-hexagon: remove redundant code in struct ggml_backend_hexagon_bu…
jeffzhou2000 Apr 11, 2025
bcc5875
ggml-hexagon: upgrade Android NDK to android-ndk-r28
jeffzhou2000 Apr 11, 2025
4250d99
ggml-dsp: split ggml-dsp.c into multiple files and cleanup
jeffzhou2000 Apr 11, 2025
36b56b5
ggml-dsp: refine ggml-dsp and make ggml-dsp more clear
jeffzhou2000 Apr 12, 2025
35fce70
ggml-hexagon: fix a minior issue in dev ops
jeffzhou2000 Apr 12, 2025
9e4398d
ggml-hexagon: fix a build issue in CI
jeffzhou2000 Apr 12, 2025
771ed7c
ggml-dsp: cleanup code
jeffzhou2000 Apr 15, 2025
cc1b7b0
ggml-hexagon: sync with upstream
jeffzhou2000 Apr 15, 2025
41e0ac8
ggml-dsp: cleanup code
jeffzhou2000 Apr 16, 2025
878d8b0
ggml-dsp:refine ggmlhexagon_dsp_add_f32
jeffzhou2000 Apr 16, 2025
84b64a1
ggml-dsp: refine logic of thread_counts
jeffzhou2000 Apr 17, 2025
9ce5af4
ggml-hexagon: release v1.06 and ready for code review
jeffzhou2000 Apr 17, 2025
07332e8
ggml-dsp: make GGML_OP_ADD more faster on cDSP side
jeffzhou2000 Apr 19, 2025
0d76478
ggml-hexagon: sync from project kantv(make ggml-hexagon backend can w…
jeffzhou2000 Apr 24, 2025
f3e6720
sync with upstream llama.cpp and sync ggml-hexagon.cpp from project k…
jeffzhou2000 Apr 29, 2025
9e079a6
sync with upstream
jeffzhou2000 May 7, 2025
302b4ab
sync with upstream
jeffzhou2000 May 10, 2025
c156d3c
ggml-hexagon: upgrade QNN SDK to v2.34.0.250424
jeffzhou2000 May 11, 2025
2dc9656
sync with upstream
jeffzhou2000 May 16, 2025
490bf8b
ggml-hexagon: sync from project kantv(fix a long-term issue which int…
jeffzhou2000 May 17, 2025
417bc96
ggml-hexagon: sync with upstream llama.cpp
jeffzhou2000 May 23, 2025
ef6858a
build: enable self-contained-build to simplify workflow
jeffzhou2000 May 23, 2025
c377836
sync with upstream
jeffzhou2000 May 23, 2025
92ea30b
add prebuilt binary libggmlop-skel.so
jeffzhou2000 May 31, 2025
0a4cfcd
refine ggml-hexagon.cfg for the prebuilt binary libggmlop-skel.so
jeffzhou2000 May 31, 2025
e6d5450
refine scripts to avoid confusion
jeffzhou2000 Jun 1, 2025
2114445
ggml-hexagon: add set_hexagon_cfg(int new_hexagon_backend, int new_hw…
jeffzhou2000 Jun 3, 2025
799ef77
project: rename libggmlop-skel.so to libggmldsp-skel.so and add ggmlh…
jeffzhou2000 Jun 7, 2025
079e24a
project: release libggmldsp-skel.so v0.97
jeffzhou2000 Jun 9, 2025
0644c40
ggml-hexagon: upgrade QNN SDK to v2.35.0.250530
jeffzhou2000 Jun 10, 2025
5527019
project: fix typo and build issue
jeffzhou2000 Jun 10, 2025
568fa38
ggmlhexagon-benchmark: add running timestamp and enable ggmlhexagon-b…
jeffzhou2000 Jun 10, 2025
842e133
ggml-hexagon: update ggml-hexagon.cpp to v1.11 and refine related cod…
jeffzhou2000 Jun 12, 2025
c156ec7
llama-bench: add running timestamp to analysis regression issue in ll…
jeffzhou2000 Jun 13, 2025
4e36b39
project: add prebuilt LLM models for compare inference peformance bet…
jeffzhou2000 Jun 14, 2025
be0c856
script: refine scripts/build-run-android.sh
jeffzhou2000 Jun 14, 2025
e970841
project: sync with upstream
jeffzhou2000 Jun 16, 2025
c4dddf9
troubleshooting: add ggml-20250531 to troubleshooting performance reg…
jeffzhou2000 Jun 16, 2025
e0b2bbe
script: simplify workflow
jeffzhou2000 Jun 16, 2025
d9e59d1
project: sync with upstream
jeffzhou2000 Jun 16, 2025
e81f961
project: sync with upstream
jeffzhou2000 Jun 16, 2025
4a7317d
project: sync with upstream
jeffzhou2000 Jun 17, 2025
68166cc
project: add prebuilt LLM model t5-277M-F32.gguf for compare inferenc…
jeffzhou2000 Jun 18, 2025
4dca244
script: refine scripts/build-run-android.sh
jeffzhou2000 Jun 18, 2025
315f9d5
project: adapt to thread safety test in upstream
jeffzhou2000 Jun 18, 2025
80ce75f
project: remove unused ggml-20250531 which added for troubleshooting …
jeffzhou2000 Jun 18, 2025
48b8a70
ggml-hexagon: fix issue which introduced by test-thread-safety in the…
jeffzhou2000 Jun 18, 2025
8ac710f
project: add codes for developers/experts's effort on cDSP side
jeffzhou2000 Jun 19, 2025
c24f854
build: refine script for developers/experts's effort on cDSP side
jeffzhou2000 Jun 19, 2025
201d5ea
script: fix a minor issue in scripts/build-run-android.sh
jeffzhou2000 Jun 19, 2025
d682082
script: refine script according to https://github.com/quic/ai-hub-app…
jeffzhou2000 Jun 19, 2025
f43cbbf
ggml-hexagon: add mulmat_algotype for further usage
jeffzhou2000 Jun 20, 2025
ff5bbc3
project: sync with upstream
jeffzhou2000 Jun 23, 2025
b9d66ce
ggml-dsp: fix typo
jeffzhou2000 Jun 23, 2025
358c720
project: release libggmldsp-skel.so v0.98
jeffzhou2000 Jun 25, 2025
d2c4e97
project: sync with upstream
jeffzhou2000 Jun 26, 2025
9c0dd2f
project: sync with upstream
jeffzhou2000 Jun 26, 2025
6028dd7
project: sync with upstream
jeffzhou2000 Jun 27, 2025
7c9b099
test: verify Google gemma-3n on Android phone
jeffzhou2000 Jun 27, 2025
4be1358
project: release libggmldsp-skel.so v0.98.8
jeffzhou2000 Jun 27, 2025
5eb8d90
project: sync with upstream(adapt to PR-14158 in the upstream)
jeffzhou2000 Jun 30, 2025
ce1ad4c
ggml-hexagon: add ggmlhexagon-testops.cpp
jeffzhou2000 Jul 1, 2025
a3026b4
script: update copyright info of scripts/build-run-android.sh
jeffzhou2000 Jul 1, 2025
3f8aa70
ggml-hexagon: upgrade QNN SDK to v2.36.0.250627
jeffzhou2000 Jul 2, 2025
6a67b64
ggml-hexagon: modify default mulmat_algotype from 0 to 32
jeffzhou2000 Jul 2, 2025
b8a47ee
ggml-hexagon: forward compatible with Qualcomm's new SDK
jeffzhou2000 Jul 2, 2025
bcecdfd
project: sync with upstream
jeffzhou2000 Jul 3, 2025
d14a9a0
project: sync with upstream(PR-14501:remove kompute backend)
jeffzhou2000 Jul 3, 2025
a4ca5f6
script: add MiniCPM4-0.5B-F32.gguf in build-run-android.sh
jeffzhou2000 Jul 4, 2025
b299aa3
ggml:fix minior issue during rebase upstream PR-14501: remove kompute…
jeffzhou2000 Jul 4, 2025
6bdef2e
project: sync with upstream
jeffzhou2000 Jul 5, 2025
d081d22
script: add build-run-ggmlopencl-android.sh
jeffzhou2000 Jul 6, 2025
584bc48
build: refine build-run-ggmlopencl-android.sh and build-run-ggmlhexag…
jeffzhou2000 Jul 6, 2025
753844b
script: update copyright info in build-run-ggmlopencl-android.sh
jeffzhou2000 Jul 6, 2025
30b5589
script: add build-run-ggmlvulkan-android.sh
jeffzhou2000 Jul 6, 2025
cc82661
script: update copyright info in build-run-ggmlvulkan-android.sh
jeffzhou2000 Jul 6, 2025
c41f7c1
script: fix a minior regression in build-run-ggmlhexagon-android.sh
jeffzhou2000 Jul 6, 2025
973948b
ggml-hexagon: apply similar logic as in ggml-vulkan
jeffzhou2000 Jul 7, 2025
00ca48b
script: refine build-run-ggmlhexagon-android.sh
jeffzhou2000 Jul 7, 2025
9cedcc6
ggml-hexagon: refine format
jeffzhou2000 Jul 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,12 @@ poetry.toml
# Local scripts
/run-vim.sh
/run-chat.sh

HEXAGON_Tools/
prebuilts/QNN_SDK/qairt/2.35.0.250530/
prebuilts/QNN_SDK/qairt/2.36.0.250627/
prebuilts/QNN_SDK/v2.35.0.250530.zip
prebuilts/QNN_SDK/v2.36.0.250627.zip
prebuilts/Hexagon_SDK/minimal-hexagon-sdk-6.2.0.1.xz
prebuilts/OpenCL_SDK/
prebuilts/Vulkan_SDK/
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@ set(CMAKE_WARN_UNUSED_CLI YES)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
set(CMAKE_VERBOSE_MAKEFILE ON)
if(DEFINED HTP_ARCH_VERSION)
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -ffp-model=fast -fno-finite-math-only")
message("OPT_FLAG:${OPT_FLAG}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGGML_USE_HEXAGON -DGGML_USE_LLAMAFILE ${DEBUG_FLAG} ${OPT_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_HEXAGON -DGGML_USE_LLAMAFILE ${DEBUG_FLAG} ${OPT_FLAG}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DGGML_USE_HEXAGON -DGGML_USE_LLAMAFILE ${DEBUG_FLAG} ${OPT_FLAG}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DGGML_USE_HEXAGON -DGGML_USE_LLAMAFILE ${DEBUG_FLAG} ${OPT_FLAG}")
endif()
endif()
endif()

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
Expand Down Expand Up @@ -127,6 +142,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
llama_option_depr(WARNING LLAMA_HEXAGON GGML_HEXAGON)

if (NOT MSVC)
if (LLAMA_SANITIZE_THREAD)
Expand Down
2 changes: 2 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels"
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
"gmml: OpenCL API version to target")
option(GGML_HEXAGON "ggml: use HEXAGON" OFF)

# toolchain for vulkan-shaders-gen
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
Expand Down Expand Up @@ -270,6 +271,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-hexagon.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
Expand Down
51 changes: 51 additions & 0 deletions ggml/include/ggml-hexagon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2024-2025 The ggml authors
*/
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_HEXAGON_MAX_DEVICES 4
#define GGML_HEXAGON_BACKEND_NAME "hexagon"

enum HEXAGONBackend {
HEXAGON_BACKEND_QNNCPU = 0,
HEXAGON_BACKEND_QNNGPU = 1,
HEXAGON_BACKEND_QNNNPU = 2,
HEXAGON_BACKEND_CDSP = 3,
HEXAGON_BACKEND_GGML = 4, //"fake" HEXAGON backend for compare performance between HEXAGON backend and ggml backend
};

//0: general approach through QNN:offload ggmlop to QNN(QNNCPU, QNNGPU, QNNNPU)
//1: special approach through QNN-SINGLEGRAPH:mapping entire ggml cgraph to a single QNN graph
//2: general approach through Hexagon cDSP:offload ggmlop to Hexagon cDSP directly
enum hwaccel_approach_type {
HWACCEL_QNN = 0,
HWACCEL_QNN_SINGLEGRAPH= 1,
HWACCEL_CDSP = 2,
};

GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(size_t dev_num, const char * qnn_lib_path);

GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);

GGML_BACKEND_API int ggml_backend_hexagon_get_device_count(void);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);

GGML_BACKEND_API const char * ggml_backend_hexagon_get_devname(size_t dev_num);

GGML_BACKEND_API void ggml_backend_hexagon_set_cfg(int new_hexagon_backend, int new_hwaccel_approach);

GGML_BACKEND_API int ggml_backend_hexagon_get_mulmat_algotype(void);

GGML_BACKEND_API void ggml_backend_hexagon_set_mulmat_algotype(int new_mulmat_algotype);

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(OpenCL)
ggml_add_backend(HEXAGON)

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
#include "ggml-cann.h"
#endif

#ifdef GGML_USE_HEXAGON
#include "ggml-hexagon.h"
#endif

// disable C++17 deprecation warning for std::codecvt_utf8
#if defined(__clang__)
# pragma clang diagnostic push
Expand Down Expand Up @@ -185,6 +189,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_RPC
register_backend(ggml_backend_rpc_reg());
#endif
#ifdef GGML_USE_HEXAGON
register_backend(ggml_backend_hexagon_reg());
#endif
#ifdef GGML_USE_CPU
register_backend(ggml_backend_cpu_reg());
#endif
Expand Down Expand Up @@ -574,6 +581,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("cpu", silent, dir_path);
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
const char * backend_path = std::getenv("GGML_BACKEND_PATH");
Expand Down
139 changes: 139 additions & 0 deletions ggml/src/ggml-hexagon/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
project(ggml-hexagon)
message(STATUS "Using HEXAGON backend")
message("CMAKE_SYSTEM_NAME : ${CMAKE_SYSTEM_NAME}")

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if(NOT DEFINED QNN_SDK_PATH)
message(FATAL_ERROR "QNN_SDK_PATH not defined")
endif()

if(NOT DEFINED HEXAGON_SDK_PATH)
message(FATAL_ERROR "HEXAGON_SDK_PATH not defined")
endif()

message("QNN_SDK_PATH : ${QNN_SDK_PATH}")
message("HEXAGON_SDK_PATH: ${HEXAGON_SDK_PATH}")
message("HTP_ARCH_VERSION: ${HTP_ARCH_VERSION}")

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEBUG_FLAG "-DDEBUG -Wall")
message("Debug mode:${DEBUG_FLAG}")
else()
set(DEBUG_FLAG "-DNDEBUG -Wall")
#manually disable all verbose logs in ggml-hexagon/CMakeLists.txt to
#make compare NPU performance through llama-bench more clear
#set(DEBUG_FLAG "-DNDEBUG -Wall -DDISABLE_ALL_LOG")
message("Release mode:${DEBUG_FLAG}")
endif()

#v68 --- Snapdragon 888
#v69 --- Snapdragon 8 Gen1
#v73 --- Snapdragon 8 Gen2
#v75 --- Snapdragon 8 Gen3
#v79 --- Snapdragon 8 Elite(aka Gen4)
if(NOT DEFINED HTP_ARCH_VERSION)
message(FATAL_ERROR "HTP_ARCH_VERSION not defined, valid htp arch: v68,v69,v73,v75,v79")
endif()

#check whether user's specified htp arch is valid
set(CHECK_HTP_ARCH "WRONG")
#ref: https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie
#foreach (feat v68 v69 v73 v75 v79)
#foreach (feat v73 v75 v79)
#for simplify workflow, only support v75 and v79, or only support 8Gen3 and 8Elite
foreach (feat v75 v79)
if (${feat} STREQUAL ${HTP_ARCH_VERSION})
set(CHECK_HTP_ARCH "GOOD")
endif()
endforeach()
if (${CHECK_HTP_ARCH} STREQUAL "WRONG")
#message(FATAL_ERROR "ggml-hexagon backend only support htp arch v68,v69,v73,v75,v79")
#for simplify workflow, only support v75 and v79, or only support 8Gen3 and 8Elite
message(FATAL_ERROR "ggml-hexagon backend only support htp arch v75,v79")
endif()

#check optimization flags
set(OPT_FLAG " ")
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -flto -D_GNU_SOURCE -fvectorize -ffp-model=fast -fno-finite-math-only")
endif()
message("OPT_FLAG:${OPT_FLAG}")

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
find_library(LOG_LIB log)

add_library(cdsprpc
SHARED
IMPORTED)
set_target_properties(cdsprpc
PROPERTIES
IMPORTED_LOCATION
${HEXAGON_SDK_PATH}/ipc/fastrpc/remote/ship/android_aarch64/libcdsprpc.so)

set(QNN_LINK_LIBRARIES ${LOG_LIB} cdsprpc)
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")

include_directories(${HEXAGON_SDK_PATH}/incs)
include_directories(${HEXAGON_SDK_PATH}/incs/stddef)
include_directories(${HEXAGON_SDK_PATH}/ipc/fastrpc/incs)
include_directories(${HEXAGON_SDK_PATH}/ipc/fastrpc/rpcmem/inc)
include_directories(${HEXAGON_SDK_PATH}/ipc/fastrpc/remote/ship/android_Debug_aarch64)
include_directories(${HEXAGON_SDK_PATH}/utils/examples)
include_directories(${HEXAGON_SDK_PATH}/ipc/fastrpc/rtld/ship/android_aarch64)
include_directories(${HEXAGON_SDK_PATH}/libs/atomic/inc)
include_directories(${HEXAGON_SDK_PATH}/libs/atomic/android_Debug_aarch64/ship)
include_directories(${CMAKE_SOURCE_DIR}/ggml/src/ggml-hexagon/)
include_directories(${CMAKE_SOURCE_DIR}/ggml/src/ggml-hexagon/kernels/)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
set(QNN_DEFAULT_LIB_SEARCH_PATH "C:\\" CACHE STRING "customized library search path for QNN backend")
else()
message(FATAL_ERROR "ggml-hexagon now only available on Android and Windows(Windows on ARM)")
endif()

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")

file(GLOB HEXAGON_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp" "${CMAKE_CURRENT_LIST_DIR}/kernels/stub.c")
ggml_add_backend_library(ggml-hexagon ${HEXAGON_SOURCES})

target_include_directories(ggml-hexagon PRIVATE ${QNN_SDK_PATH}/include/QNN ${HEXAGON_SDK_PATH} ${CMAKE_CURRENT_LIST_DIR})
target_link_libraries(ggml-hexagon PRIVATE ${QNN_LINK_LIBRARIES})

string(REGEX REPLACE "/$" "" QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
target_compile_definitions(ggml-hexagon PRIVATE QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}/")

#cross compiling source codes of hexagon kernels which running on cDSP side
function(ggml_hexagon_build_kernel KNAME)
message(STATUS "ggml_hexagon: build hexagon-kernel ${KNAME}")

add_custom_command(
TARGET ${PROJECT_NAME}
POST_BUILD
COMMAND echo "current working path:`pwd`\n"
COMMAND echo "${CMAKE_CURRENT_LIST_DIR}/kernels"
COMMAND make -C ${CMAKE_CURRENT_LIST_DIR}/kernels/ clean
COMMAND make -C ${CMAKE_CURRENT_LIST_DIR}/kernels/ HEXAGON_SDK_PATH=${HEXAGON_SDK_PATH} HTP_ARCH_VERSION=${HTP_ARCH_VERSION} DEBUG_FLAG=${DEBUG_FLAG}
COMMAND echo "current working path:`pwd`\n"
COMMAND ls -l ../../../bin/libggmldsp-skel.so
COMMENT "build hexagon-kernel"
)
endfunction()

function(ggml_hexagon_setup_cfg KNAME)
message(STATUS "ggml_hexagon: setup runtime configuration file ${KNAME}")
add_custom_command(
TARGET ${PROJECT_NAME}
POST_BUILD
COMMAND echo "current working path:`pwd`\n"
COMMAND /bin/cp -fv ../../../../../scripts/${KNAME} ../../../bin/
COMMENT "setup runtime configuration file"
)
endfunction()

ggml_hexagon_build_kernel("cdsp")
ggml_hexagon_setup_cfg("ggml-hexagon.cfg")
Loading
Loading