Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
273 commits
Select commit Hold shift + click to select a range
9456bba
rename
chraac Jun 17, 2024
65a14d9
fix todo
chraac Jun 18, 2024
aeef0c6
make the constant condition first
chraac Jun 19, 2024
dfe159f
remove TODO
chraac Jun 19, 2024
9932062
split logger function, tensors and backend from main qnn source
chraac Jun 19, 2024
3c491a3
remove reference of g_qnn_mgr in qnn_instance
chraac Jun 19, 2024
3fe07eb
fix compiling error
chraac Jun 19, 2024
37a1585
rename
chraac Jun 19, 2024
ff0359d
move qnn helper function into utility files
chraac Jun 19, 2024
e1056da
fix op handle checker
chraac Jun 24, 2024
c9e99bd
split qnn ops into file
chraac Jun 24, 2024
3808a4c
Merge branch 'master' into dev-refactoring
chraac Jul 1, 2024
8b677d1
move qnn backend into sub folder
chraac Jul 2, 2024
38f88d5
fix compiling error after merge latest master
chraac Jul 2, 2024
000240c
add clang format file and reformating
chraac Jul 4, 2024
ca0d999
add ggml_qnn_graph
chraac Jul 4, 2024
4b2ee61
move graph map to backend object
chraac Jul 5, 2024
a688ed3
add op param to add_nodes
chraac Jul 5, 2024
13dc3a0
use qnn graph inside add and mul ops
chraac Jul 5, 2024
58cec14
reformat
chraac Jul 5, 2024
0f2e687
move tensor related function to utils
chraac Jul 5, 2024
4b0f6b0
add helper function to get Qnn_TensorType_t from ggml_tensor
chraac Jul 5, 2024
263ffa9
small opt of the qnn graph config init
chraac Jul 5, 2024
874216b
remove unused members
chraac Jul 7, 2024
5f2e391
refactoring ggml_qnn_tensor
chraac Jul 7, 2024
af869fd
fix compiling error in debug build
chraac Jul 9, 2024
a7be069
add log
chraac Jul 9, 2024
9add256
use helper function instead
chraac Jul 9, 2024
dc7d83e
add log
chraac Jul 9, 2024
e97d3a6
fix tensor buffer allocation
chraac Jul 10, 2024
3feb574
merge register_rpc_mem into alloc_rpc_mem
chraac Jul 10, 2024
b49b501
fix sprintf type
chraac Jul 10, 2024
80051cf
remove unused variables
chraac Jul 10, 2024
b6f2927
add function to get graph from cache
chraac Jul 10, 2024
7ea28a6
add helper function for binary op
chraac Jul 10, 2024
8932135
add sqrt and mul ops
chraac Jul 10, 2024
be3aa96
use template function directly
chraac Jul 10, 2024
f0894d8
wip
chraac Jul 12, 2024
0eb595c
use table to simpilify the op mapping
chraac Jul 12, 2024
e3aa43a
suppress warning
chraac Jul 12, 2024
7cbc4fb
add mul
chraac Jul 12, 2024
100ccd5
add unary op template and more ops
chraac Jul 12, 2024
c1e2283
expose op at unit test
chraac Jul 13, 2024
148ceab
add log op
chraac Jul 14, 2024
30b4000
remove unused declarations
chraac Jul 14, 2024
c46b4de
[unit test] init all tensor by one function
chraac Jul 15, 2024
4410fd6
format with clang-format
chraac Jul 15, 2024
cd5a733
add cpu backend as cross reference
chraac Jul 15, 2024
f32327e
remove multiply declearation of log in unit test
chraac Jul 15, 2024
ff601ab
add todo
chraac Jul 15, 2024
0301b50
refactoring: prevent leak the QNN_INTERFACE_VER_TYPE and QNN_SYSTEM_I…
chraac Jul 16, 2024
b1ef302
refactoring: remove depend of dlsym at utils.hpp
chraac Jul 17, 2024
63dc587
refactoring: make the buffer alloc and free stay in same class
chraac Jul 17, 2024
bb13795
refactoring: remove unused functions and variables
chraac Jul 17, 2024
861bb9c
Merge tag 'b3405' into dev-refactoring
chraac Jul 17, 2024
eed9605
add build step of QNN backend at ggml
chraac Jul 17, 2024
454deef
register qnn backend
chraac Jul 17, 2024
2502b57
fix warnings
chraac Jul 17, 2024
b7d781e
remove qnn dedicated unit tests since we're now using the `test-backe…
chraac Jul 17, 2024
6457a68
disable qnn profiling in release build
chraac Jul 17, 2024
c76fc9a
fix warnings
chraac Jul 17, 2024
ce199b2
refactoring: downgrade some log to debug level
chraac Jul 17, 2024
d82b3a0
feat: add GGML_UNARY_OP_GELU
chraac Jul 18, 2024
15f5cc4
bug: fix allocation size overflow at log
chraac Jul 18, 2024
665f823
fix op checker
chraac Jul 18, 2024
ce3d09e
tried fix the add node error 6005
chraac Jul 19, 2024
f45fbec
Revert "tried fix the add node error 6005"
chraac Jul 19, 2024
0153a23
fix support ops
chraac Jul 19, 2024
a607995
Reapply "tried fix the add node error 6005"
chraac Jul 19, 2024
b1b5cc1
add function to convert qnn error into string
chraac Jul 19, 2024
1679dcf
fix: check all dimentions in `can offload`
chraac Jul 19, 2024
28a00e5
fix: try fix QNN_GRAPH_ERROR_INVALID_OP_CONFIG
chraac Jul 20, 2024
2729946
fix: try fix tensor type error
chraac Jul 20, 2024
51f95d6
fix: dimension could be wrong for tensor liked 1x1x8
chraac Jul 20, 2024
5f3b1ae
fix: try fix graph cache with append the tensors name
chraac Jul 20, 2024
b173c4e
feat: update tensor name when bind to graph
chraac Jul 20, 2024
3b47056
refactoring: change the tensor binding mode between qnn tensor and gg…
chraac Jul 22, 2024
706793f
fix: back to qnn tensor v1 to fix the create tensor error
chraac Jul 22, 2024
f843e5a
fix: 1.free up rpc memory at destruct
chraac Jul 22, 2024
ee305cc
refactoring: split qnn rpc buffer into dedicated class
chraac Jul 26, 2024
47735cb
fix: try fix error in 2nd run by appending dimension into graph key
chraac Jul 26, 2024
be9a8c7
fix: suppress warning
chraac Jul 26, 2024
18aa665
refactoring: opt graph key gen
chraac Jul 27, 2024
2c73791
refactoring: remove dup code
chraac Jul 27, 2024
ccfec70
refactoring: remove unused get_rpcmem_from_memhandle func
chraac Jul 27, 2024
867c91b
feat: add error string for QnnOpPackage_Error_t
chraac Jul 27, 2024
5da73f8
refactoring: move forward and supports_op into ops file
chraac Jul 27, 2024
e0c9b34
feat: check if dims equal for add
chraac Jul 27, 2024
8ab1f15
refactoring: remove internal functions, use op table directly
chraac Jul 27, 2024
e33b5c9
refactoring: print the name of unsupport op
chraac Jul 27, 2024
1f9d2a7
refactoring: improve tensor print
chraac Jul 28, 2024
5ecbeb5
Merge branch 'master' into dev-refactoring
chraac Jul 29, 2024
6da8294
refactoring: set the default qnn lib search path at CMakeLists.txt by…
chraac Jul 29, 2024
9a5f802
refactoring: add convient macro to disable copy and move of class
chraac Jul 29, 2024
74eb05a
feat: add ggml_qnn_op_config for handle different op
chraac Jul 29, 2024
6cc7432
Merge remote-tracking branch 'origin/master' into dev-refactoring
chraac Jul 31, 2024
47f6e02
fix: try fix the tensor rank of mul mat
chraac Jul 31, 2024
5ea980d
Merge branch 'master' into dev-refactoring
chraac Aug 5, 2024
dedadf2
Fixed a bug where debug code was included in the release, resulting i…
myan-o Aug 20, 2024
6bee798
Merge branch 'master' into dev-refactoring
chraac Aug 20, 2024
c9be2ba
Merge branch 'master' into dev-refactoring
chraac Aug 30, 2024
67e8af7
Merge branch 'master' into dev-refactoring
chraac Sep 7, 2024
481cb3a
fix compiling error
chraac Sep 7, 2024
b0b75d4
Merge branch 'master' into dev-refactoring
chraac Sep 10, 2024
8e7807e
Merge tag 'b3779' into dev-refactoring
chraac Sep 18, 2024
b7aea04
fix compiling error
chraac Sep 18, 2024
a1ceaae
fix compiling error at older ndk (r23c)
chraac Sep 28, 2024
2ef0904
Merge branch 'master' into dev-refactoring
chraac Sep 30, 2024
1da8a3e
fix compiling error after merge
chraac Sep 30, 2024
8e30038
Merge branch 'master' into dev-refactoring
chraac Oct 7, 2024
181cf52
adapt new register backend interface and fix missing ops
chraac Oct 10, 2024
17cc17e
Merge branch 'master' into dev-refactoring
chraac Oct 11, 2024
f260498
remove unused function
chraac Oct 11, 2024
4abaf7d
feat: fix mulmat (#2)
chraac Oct 28, 2024
c42433c
Merge branch 'master' into dev-refactoring
chraac Oct 28, 2024
5c1e6d4
disable gelu in NPU
chraac Oct 28, 2024
fe565cf
fix compiling error in release
chraac Oct 29, 2024
d963250
Merge branch 'master' into dev-refactoring
chraac Nov 4, 2024
0fec56f
fix compiling error
chraac Nov 4, 2024
8ad86dc
feat: add QNN_OP_TRANSPOSE (#6)
chraac Nov 4, 2024
e6dbdac
feat: fix llama-bench (#7)
chraac Nov 13, 2024
9f62fc9
Merge branch 'master' into dev-refactoring
chraac Nov 13, 2024
a2df09b
[WIP] feat: perf opt (#10)
chraac Nov 28, 2024
5103b16
bugfix: block large tensor calc in npu
chraac Nov 29, 2024
67b183c
Merge branch 'master' into dev-refactoring
chraac Nov 29, 2024
6d4feae
redo conflict changes
chraac Nov 29, 2024
09efaa3
define compile flag as module private
chraac Nov 29, 2024
c5e6549
fix: fix assertion
chraac Nov 29, 2024
cf91253
Merge branch 'master' into dev-refactoring
chraac Dec 3, 2024
0d02ee0
fix int overflow and remove view op to pass unit test
chraac Dec 3, 2024
e36ad89
bugfix: error pre-allocated tensor (k_cache_view-0) (#12)
chraac Dec 11, 2024
6d3267a
Merge branch 'master' into dev-refactoring
chraac Dec 14, 2024
79f124a
add missing op
chraac Dec 14, 2024
8f07b3e
Merge branch 'master' into dev-refactoring
chraac Dec 26, 2024
f2d8d01
[feat] Port ggml graph to QNN graph (#16)
chraac Jan 10, 2025
c410717
Merge branch 'master' into dev-refactoring
chraac Jan 10, 2025
5f93376
fix compiling error after merged
chraac Jan 10, 2025
10bd671
[feat]add more op support (#18)
chraac Jan 18, 2025
3ed9f5b
Merge branch 'master' into dev-refactoring
chraac Jan 18, 2025
34d9b38
Merge branch 'master' into dev-refactoring
chraac Feb 1, 2025
ba324b0
Merge branch 'master' into dev-refactoring
chraac Feb 12, 2025
12c75f1
Merge branch 'master' into dev-refactoring
chraac Feb 13, 2025
a822d00
feat: run on win (#24)
chraac Feb 24, 2025
84328ff
Merge branch 'master' into dev-refactoring
chraac Feb 24, 2025
ff033e1
opt mulmat base on official doc (#25)
chraac Feb 25, 2025
c867641
feat: fix some TODO item in upstream PR #26 (#27)
chraac Feb 27, 2025
f289752
[bugfix]make sure single node op will have the same type (#29)
chraac Feb 28, 2025
8b652dd
bug: fix benchmark debug warning (#31)
chraac Feb 28, 2025
27cec63
Merge branch 'master' into dev-refactoring
chraac Mar 5, 2025
31847c8
fix compiling error after merge
chraac Mar 5, 2025
525cd2d
Merge branch 'master' into dev-refactoring
chraac Mar 14, 2025
a1ab674
[feat] add more op (#35)
chraac Mar 22, 2025
c2887f0
Merge branch 'master' into dev-refactoring
chraac Mar 22, 2025
1caca62
fix compiling error after merge
chraac Mar 22, 2025
e4fcdd4
Merge branch 'master' into dev-refactoring
chraac Apr 3, 2025
a004951
Merge branch 'master' into dev-refactoring
chraac Apr 15, 2025
9e41f79
fix compiling error after merge master
chraac Apr 16, 2025
beff5c4
feat: op perf opt (#38)
chraac Apr 21, 2025
a0e54cf
Merge branch 'master' into dev-refactoring
chraac Apr 24, 2025
c2b6fec
feat: perf opt part2 (#39)
chraac Apr 27, 2025
161c4ee
fix typo
chraac May 6, 2025
aca7069
Merge branch 'master' into dev-refactoring
chraac May 8, 2025
039f835
fix compiling error
chraac May 8, 2025
0ce53ce
fix linking error
chraac May 8, 2025
02af8ff
fix qnn only build flag
chraac May 8, 2025
db2a125
fix GGML_QNN_ENABLE_PERFORMANCE_TRACKING option
chraac May 13, 2025
295f7f5
feat: perf opt part3 (#42)
chraac May 16, 2025
54b3021
Merge branch 'master' into dev-refactoring
chraac May 27, 2025
2306f82
fix compiling error
chraac May 27, 2025
c23ab46
feat: perf opt part4 (#43)
chraac May 27, 2025
da5dc57
Merge branch 'master' into dev-refactoring
chraac Jun 9, 2025
af620a1
feat: flash attention support for hexagon-npu (#45)
chraac Jun 18, 2025
5f442ee
feat: add mixed precision dot product implementation and function dec…
chraac Jun 13, 2025
d80fd56
feat: implement mixed precision vector dot product and conversion fun…
chraac Jun 14, 2025
b9a3b3b
fix: update data type handling in matrix multiplication implementation
chraac Jun 14, 2025
093ad6e
fix: adjust row count handling in matrix multiplication implementatio…
chraac Jun 14, 2025
3382faf
fix: optimize matrix multiplication implementation by unroll loop
chraac Jun 14, 2025
54d6911
update performance tracking for matrix multiplication implementation
chraac Jun 14, 2025
77f66f9
add fetching
chraac Jun 14, 2025
48f9f52
wip
chraac Jun 14, 2025
dbedd6a
fix: support F16 * F32 multiplication in is_mul_mat_supported function
chraac Jun 15, 2025
34e1a6a
fix: improve src0 fetching logic in vec_dot_product_mixed_impl for be…
chraac Jun 15, 2025
a22957b
fix test failure for row width 67
chraac Jun 16, 2025
3651e89
try fix failed test
chraac Jun 17, 2025
680504d
fix: rename aligned_address to align_down for clarity in vector align…
chraac Jun 18, 2025
842812c
wip
chraac Jun 19, 2025
c156560
qnn fix: update device capabilities for quantized types in qnn-lib to…
chraac Jun 20, 2025
813672d
fix test failure at width == 193
chraac Jun 20, 2025
e502ecd
fix: replace zero vector initialization with previous vector in mixed…
chraac Jun 22, 2025
e0925fd
wip
chraac Jun 22, 2025
e449f06
fix: improve handling of last vector in mixed dot product implementation
chraac Jun 22, 2025
3c68259
wip
chraac Jun 22, 2025
2158da9
wip
chraac Jun 22, 2025
0876805
wip
chraac Jun 23, 2025
624b7f7
wip
chraac Jun 23, 2025
332514c
qnn fix: update device capabilities for quantized types in qnn-lib to…
chraac Jun 20, 2025
eb6901f
Merge branch 'dev-refactoring' into dev-perf-opt-quant
chraac Jun 23, 2025
9ce984a
Enhance mul_mat_f32 function to support quantized types and improve s…
chraac Jun 23, 2025
3c70ca0
rename
chraac Jun 23, 2025
039232d
Refactor dequantization functions to use npu_device_fp16_t and improv…
chraac Jun 23, 2025
fbd013d
Optimize dequantization in dequantize_row_q8_0 by replacing qf32 mult…
chraac Jun 24, 2025
641d30e
Optimize dequantization in dequantize_row_q4_0 by replacing qf32 mult…
chraac Jun 24, 2025
246c860
Add hvx_vsf_convert_vhf function for improved vector conversion
chraac Jun 25, 2025
da88536
add perf logs
chraac Jun 26, 2025
5a438b6
Refactor dequantize_row_q4_0 for alignment
chraac Jun 26, 2025
976bdb2
Update logging in supports_op_impl and supports_op to use ggml_op_des…
chraac Jun 26, 2025
a0243fa
Add support for ROPE operation in NPU capabilities and related functions
chraac Jun 27, 2025
15b89ea
Implement ROPE operation in tensor and op_rope, including cache initi…
chraac Jun 27, 2025
f154fc6
enable ROPE by adding operation validation
chraac Jun 28, 2025
a3812db
add support to freq is null case
chraac Jun 28, 2025
6db24a4
wip
chraac Jun 28, 2025
666d03e
Refactor rope_f32 to improve indexing by introducing total_planes cal…
chraac Jun 28, 2025
5701193
reformat
chraac Jun 28, 2025
77f0c92
Refactor rope_f32 to optimize data access patterns by introducing row…
chraac Jun 28, 2025
41c633a
Add performance tracking to rope_f32 function for enhanced profiling
chraac Jun 28, 2025
7e730ec
Refactor rope_f32 to use a templated implementation
chraac Jun 28, 2025
cb94f3c
Refactor rope_impl to replace loop with memcpy for improved performance
chraac Jun 28, 2025
9f66ffa
Refactor mul_mat_impl to support quantization as a template parameter
chraac Jun 29, 2025
aca826a
wip
chraac Jun 29, 2025
e8e8f0a
wip
chraac Jun 29, 2025
7995cc9
Refactor rope_impl to optimize plane indexing in the processing loop
chraac Jun 29, 2025
1cafeb2
Add aligned vector dot product implementation for mixed precision types
chraac Jun 30, 2025
9a0093b
Merge branch 'master' into dev-refactoring
chraac Jun 30, 2025
989772c
fix compiling error
chraac Jun 30, 2025
333aeaf
Merge branch 'dev-refactoring' into dev-perf-opt-quant
chraac Jun 30, 2025
482ef7f
wip
chraac Jun 30, 2025
ef52220
Enhance matrix multiplication for F32 and F16 types with alignment ch…
chraac Jun 30, 2025
228dbd3
Optimize vec_dot_product_mix_aligned_impl for improved performance wi…
chraac Jun 30, 2025
d670c1e
Add alignment checks for matrix multiplication and vector dot products
chraac Jun 30, 2025
ec35125
Refactor matrix multiplication to use function pointers for improved …
chraac Jun 30, 2025
4d39eba
Fix alignment check in is_dot_product_aligned to ensure correct vecto…
chraac Jun 30, 2025
f932d7e
Remove unused f16_to_f32_table parameter from quantization and dequan…
chraac Jul 1, 2025
e359081
wip
chraac Jul 1, 2025
73ce562
Add L2 fetch for src1 plane rows in matrix multiplication implementation
chraac Jul 1, 2025
1238b85
wip
chraac Jul 1, 2025
c87de31
Refactor hvx_vsf_convert_vhf to accept an additional parameter for fl…
chraac Jul 1, 2025
c08d7d1
Refactor vec_dot_product_mix_aligned_impl to improve variable naming …
chraac Jul 1, 2025
12305a1
Refactor load_dual_block_generic and dequantize_row_q4_0 to improve p…
chraac Jul 2, 2025
4eaaa5d
Refactor vector operation functions to improve clarity and consistenc…
chraac Jul 2, 2025
ca889d3
wip
chraac Jul 2, 2025
47fbbf2
wip
chraac Jul 2, 2025
6ab41f3
Refactor dequantize_row_q4_0_impl for improved clarity and performanc…
chraac Jul 2, 2025
06a6723
wip
chraac Jul 2, 2025
8a1c8af
Update load_dual_block_generic to use intrinsics
chraac Jul 2, 2025
1ae3726
Refactor load_dual_block_generic and load_qual_block_generic for impr…
chraac Jul 2, 2025
1e5be35
wip
chraac Jul 3, 2025
ec48c42
wip
chraac Jul 3, 2025
61c6b89
Optimize dequantize_row_q8_0 for improved performance by unrolling fo…
chraac Jul 3, 2025
c18d4c1
wip
chraac Jul 3, 2025
51c53ae
wip
chraac Jul 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
llama_option_depr(WARNING LLAMA_QNN GGML_QNN)

if (NOT MSVC)
if (LLAMA_SANITIZE_THREAD)
Expand Down
2 changes: 2 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ set (GGML_SYCL_TARGET "INTEL" CACHE STRING
"ggml: sycl target device")
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
"ggml: sycl device architecture")
option(GGML_QNN "ggml: use QNN" OFF)

option(GGML_OPENCL "ggml: use OpenCL" OFF)
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
Expand Down Expand Up @@ -272,6 +273,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-qnn.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
Expand Down
13 changes: 13 additions & 0 deletions ggml/include/ggml-qnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

GGML_API ggml_backend_reg_t ggml_backend_qnn_reg(void);

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(OpenCL)
ggml_add_backend(QNN)

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
Expand Down
7 changes: 7 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@
#include "ggml-kompute.h"
#endif

#ifdef GGML_USE_QNN
#include "ggml-qnn.h"
#endif

// disable C++17 deprecation warning for std::codecvt_utf8
#if defined(__clang__)
# pragma clang diagnostic push
Expand Down Expand Up @@ -192,6 +196,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_KOMPUTE
register_backend(ggml_backend_kompute_reg());
#endif
#ifdef GGML_USE_QNN
register_backend(ggml_backend_qnn_reg());
#endif
#ifdef GGML_USE_CPU
register_backend(ggml_backend_cpu_reg());
#endif
Expand Down
141 changes: 141 additions & 0 deletions ggml/src/ggml-qnn/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
message(STATUS "Using QNN backend")

option(GGML_HEXAGON_NPU_ONLY "ggml-qnn: Only use Hexagon NPU" OFF)
option(GGML_QNN_ENABLE_HEXAGON_BACKEND "ggml-qnn: Enable Hexagon custom package" ${GGML_HEXAGON_NPU_ONLY})
option(GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS "ggml-qnn: Enable quantized tensors support" OFF)
option(GGML_HEXAGON_ENABLE_PERFORMANCE_TRACKING "ggml-qnn: Enable performance tracking" OFF)

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
find_library(LOG_LIB log)
set(COMMON_LINK_LIBRARIES ${LOG_LIB})
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
message("Building for Linux or Windows")
else()
message(FATAL_ERROR "QNN now only available on Android, Windows and Linux")
endif()

if(NOT DEFINED GGML_QNN_SDK_PATH)
# try read from environment variable
# TODO: create a function to search for the SDK path
if(DEFINED ENV{QNN_SDK_PATH})
set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH})
elseif(DEFINED ENV{QNN_SDK_ROOT})
message("found QNN_SDK_ROOT: ${QNN_SDK_ROOT}")
set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_ROOT})
else()
message(FATAL_ERROR "GGML_QNN_SDK_PATH not defined")
endif()
endif()

message("CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
message("CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}")
message("CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}")
message("QNN_SDK_PATH: ${GGML_QNN_SDK_PATH}")

message("GGML_QNN: ${GGML_QNN}")
message("GGML_QNN_ENABLE_HEXAGON_BACKEND: ${GGML_QNN_ENABLE_HEXAGON_BACKEND}")
message("GGML_HEXAGON_NPU_ONLY: ${GGML_HEXAGON_NPU_ONLY}")
message("GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS: ${GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS}")
message("GGML_HEXAGON_ENABLE_PERFORMANCE_TRACKING: ${GGML_HEXAGON_ENABLE_PERFORMANCE_TRACKING}")

ggml_add_backend_library(ggml-qnn
../../include/ggml-qnn.h
)
target_link_libraries(ggml-qnn PRIVATE ${COMMON_LINK_LIBRARIES})

add_subdirectory(shared)

if(GGML_HEXAGON_NPU_ONLY)
message("GGML_HEXAGON_NPU_ONLY is enabled")
set(GGML_QNN_ENABLE_HEXAGON_BACKEND ON)
else()
message("GGML_HEXAGON_NPU_ONLY is disabled")
add_subdirectory(qnn)
target_link_libraries(runtime-common PUBLIC qnn-backend)
endif()

if(GGML_QNN_ENABLE_HEXAGON_BACKEND)
message("GGML_QNN_ENABLE_HEXAGON_BACKEND is enabled")
add_subdirectory(npu)
target_link_libraries(hexagon-npu-host runtime-common)
target_link_libraries(ggml-qnn PRIVATE hexagon-npu-host)
else()
message("GGML_QNN_ENABLE_HEXAGON_BACKEND is disabled")
target_link_libraries(ggml-qnn PRIVATE runtime-common)
endif()

# Copy dynamic libraries
set(BACKEND_RUNTIME_LIBS "")

if(CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
# Android
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-android")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
# Linux x86_64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/x86_64-linux-clang")
else()
# Linux aarch64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-oe-linux-gcc11.2")
endif()

list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/libQnnSystem.so")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/libQnnCpu.so")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/libQnnGpu.so")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/libQnnHtp.so")
file(GLOB HTP_STUB_LIBS "${QNN_SDK_LIB_PATH}/libQnnHtp*.so")
list(APPEND BACKEND_RUNTIME_LIBS ${HTP_STUB_LIBS})

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
file(GLOB HTP_SKEL_LIBS "${GGML_QNN_SDK_PATH}/lib/hexagon-*/unsigned/libQnnHtp*Skel.so")
list(APPEND BACKEND_RUNTIME_LIBS ${HTP_SKEL_LIBS})

if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(EXISTS "${CMAKE_ANDROID_NDK}/prebuilt/android-arm64/gdbserver/gdbserver")
list(APPEND BACKEND_RUNTIME_LIBS "${CMAKE_ANDROID_NDK}/prebuilt/android-arm64/gdbserver/gdbserver")
message("old ndk, copy gdbserver")
else()
file(GLOB LLDB_SERVER "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/aarch64/lldb-server")
list(APPEND BACKEND_RUNTIME_LIBS ${LLDB_SERVER})
message("new ndk, copy lldb-server")
endif()

file(GLOB OMP_LIBS "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/aarch64/libomp.so")
file(GLOB ASAN_LIBS "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/libclang_rt.asan-aarch64-android.so")
list(APPEND BACKEND_RUNTIME_LIBS ${OMP_LIBS})
list(APPEND BACKEND_RUNTIME_LIBS ${ASAN_LIBS})
endif()
else()
# Linux
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/libHtpPrepare.so")
endif()
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
# x86_64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/x86_64-windows-msvc")
else()
# aarch64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-windows-msvc")
endif()

list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/QnnSystem.dll")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/QnnCpu.dll")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/QnnGpu.dll")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/QnnHtp.dll")
file(GLOB HTP_STUB_LIBS "${QNN_SDK_LIB_PATH}/QnnHtp*.dll")

if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
list(APPEND BACKEND_RUNTIME_LIBS "${QNN_SDK_LIB_PATH}/HtpPrepare.dll")
endif()

list(APPEND BACKEND_RUNTIME_LIBS ${HTP_STUB_LIBS})
endif()

foreach(RUNTIME_LIB ${BACKEND_RUNTIME_LIBS})
message("Copy: ${RUNTIME_LIB} -> ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
add_custom_command(
TARGET ggml-qnn POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${RUNTIME_LIB}
${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
endforeach()
Loading