Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
127 commits
Select commit Hold shift + click to select a range
b0c3013
ggml: add Qualcomm QNN(Qualcomm Neural Network,aka Qualcomm AI Engine…
jeffzhou2000 Apr 24, 2024
d325088
ggml: add Qualcomm QNN(Qualcomm Neural Network,aka Qualcomm AI Engine…
jeffzhou2000 Apr 24, 2024
c75817b
rebase
jeffzhou2000 Jun 5, 2024
9c872cb
refine ggml-qnn-ut program and script to make reviewers happy
jeffzhou2000 Jun 5, 2024
926a866
review: replace external declaration with NDK header file
jeffzhou2000 Jun 5, 2024
dd29834
add supportive of quantize data type Q8_0
jeffzhou2000 Jun 6, 2024
f4c5303
review: remove unused QNN helper functions
jeffzhou2000 Jun 6, 2024
2fab33d
ggml-qnn: remove static global vars to support multi-instance simulta…
jeffzhou2000 Jun 7, 2024
94ee775
review: remove static global vars to support multi-instance simultane…
jeffzhou2000 Jun 7, 2024
5d691c6
review: put qnn's internal log inside preprocessor diretive
jeffzhou2000 Jun 8, 2024
fdf0272
review: code format using clang-format + manually modification accord…
jeffzhou2000 Jun 8, 2024
3e8b61f
review: fix a memory leak introduced by review modification which exp…
jeffzhou2000 Jun 9, 2024
d38d4a6
npu: probe htp info and capacity of rpc ion memory
jeffzhou2000 Jun 9, 2024
5f8cfe4
ggml-qnn: refine source code of ggml-qnn.cpp to make reviewer more happy
jeffzhou2000 Jun 10, 2024
5269e08
ggml-qnn: refine ggml inference using QNN NPU
jeffzhou2000 Jun 11, 2024
faaa86b
ggml-qnn: refine ggml inference using QNN NPU
jeffzhou2000 Jun 12, 2024
5598fbd
review: make a MVP(Minimum Viable PR) style PR in upstream
jeffzhou2000 Jun 13, 2024
5e18cdc
init the test array with const values
chraac Jun 15, 2024
6c68adc
add ggml_qnn_tensor_binder
chraac Jun 14, 2024
37bb926
use tensor wrapper in add
chraac Jun 15, 2024
36e41a1
use tensor wrapper in matmul
chraac Jun 16, 2024
a5679dd
use ggml_qnn_tensor_reader for output tensor
chraac Jun 16, 2024
5fe7b87
use ggml_qnn_tensor_writer for all parameters
chraac Jun 16, 2024
9456bba
rename
chraac Jun 17, 2024
65a14d9
fix todo
chraac Jun 18, 2024
aeef0c6
make the constant condition first
chraac Jun 19, 2024
dfe159f
remove TODO
chraac Jun 19, 2024
9932062
split logger function, tensors and backend from main qnn source
chraac Jun 19, 2024
3c491a3
remove reference of g_qnn_mgr in qnn_instance
chraac Jun 19, 2024
3fe07eb
fix compiling error
chraac Jun 19, 2024
37a1585
rename
chraac Jun 19, 2024
ff0359d
move qnn helper function into utility files
chraac Jun 19, 2024
e1056da
fix op handle checker
chraac Jun 24, 2024
c9e99bd
split qnn ops into file
chraac Jun 24, 2024
3808a4c
Merge branch 'master' into dev-refactoring
chraac Jul 1, 2024
8b677d1
move qnn backend into sub folder
chraac Jul 2, 2024
38f88d5
fix compiling error after merge latest master
chraac Jul 2, 2024
000240c
add clang format file and reformating
chraac Jul 4, 2024
ca0d999
add ggml_qnn_graph
chraac Jul 4, 2024
4b2ee61
move graph map to backend object
chraac Jul 5, 2024
a688ed3
add op param to add_nodes
chraac Jul 5, 2024
13dc3a0
use qnn graph inside add and mul ops
chraac Jul 5, 2024
58cec14
reformat
chraac Jul 5, 2024
0f2e687
move tensor related function to utils
chraac Jul 5, 2024
4b0f6b0
add helper function to get Qnn_TensorType_t from ggml_tensor
chraac Jul 5, 2024
263ffa9
small opt of the qnn graph config init
chraac Jul 5, 2024
874216b
remove unused members
chraac Jul 7, 2024
5f2e391
refactoring ggml_qnn_tensor
chraac Jul 7, 2024
af869fd
fix compiling error in debug build
chraac Jul 9, 2024
a7be069
add log
chraac Jul 9, 2024
9add256
use helper function instead
chraac Jul 9, 2024
dc7d83e
add log
chraac Jul 9, 2024
e97d3a6
fix tensor buffer allocation
chraac Jul 10, 2024
3feb574
merge register_rpc_mem into alloc_rpc_mem
chraac Jul 10, 2024
b49b501
fix sprintf type
chraac Jul 10, 2024
80051cf
remove unused variables
chraac Jul 10, 2024
b6f2927
add function to get graph from cache
chraac Jul 10, 2024
7ea28a6
add helper function for binary op
chraac Jul 10, 2024
8932135
add sqrt and mul ops
chraac Jul 10, 2024
be3aa96
use template function directly
chraac Jul 10, 2024
f0894d8
wip
chraac Jul 12, 2024
0eb595c
use table to simpilify the op mapping
chraac Jul 12, 2024
e3aa43a
suppress warning
chraac Jul 12, 2024
7cbc4fb
add mul
chraac Jul 12, 2024
100ccd5
add unary op template and more ops
chraac Jul 12, 2024
c1e2283
expose op at unit test
chraac Jul 13, 2024
148ceab
add log op
chraac Jul 14, 2024
30b4000
remove unused declarations
chraac Jul 14, 2024
c46b4de
[unit test] init all tensor by one function
chraac Jul 15, 2024
4410fd6
format with clang-format
chraac Jul 15, 2024
cd5a733
add cpu backend as cross reference
chraac Jul 15, 2024
f32327e
remove multiply declearation of log in unit test
chraac Jul 15, 2024
ff601ab
add todo
chraac Jul 15, 2024
0301b50
refactoring: prevent leak the QNN_INTERFACE_VER_TYPE and QNN_SYSTEM_I…
chraac Jul 16, 2024
b1ef302
refactoring: remove depend of dlsym at utils.hpp
chraac Jul 17, 2024
63dc587
refactoring: make the buffer alloc and free stay in same class
chraac Jul 17, 2024
bb13795
refactoring: remove unused functions and variables
chraac Jul 17, 2024
861bb9c
Merge tag 'b3405' into dev-refactoring
chraac Jul 17, 2024
eed9605
add build step of QNN backend at ggml
chraac Jul 17, 2024
454deef
register qnn backend
chraac Jul 17, 2024
2502b57
fix warnings
chraac Jul 17, 2024
b7d781e
remove qnn dedicated unit tests since we're now using the `test-backe…
chraac Jul 17, 2024
6457a68
disable qnn profiling in release build
chraac Jul 17, 2024
c76fc9a
fix warnings
chraac Jul 17, 2024
ce199b2
refactoring: downgrade some log to debug level
chraac Jul 17, 2024
d82b3a0
feat: add GGML_UNARY_OP_GELU
chraac Jul 18, 2024
15f5cc4
bug: fix allocation size overflow at log
chraac Jul 18, 2024
665f823
fix op checker
chraac Jul 18, 2024
ce3d09e
tried fix the add node error 6005
chraac Jul 19, 2024
f45fbec
Revert "tried fix the add node error 6005"
chraac Jul 19, 2024
0153a23
fix support ops
chraac Jul 19, 2024
a607995
Reapply "tried fix the add node error 6005"
chraac Jul 19, 2024
b1b5cc1
add function to convert qnn error into string
chraac Jul 19, 2024
1679dcf
fix: check all dimentions in `can offload`
chraac Jul 19, 2024
28a00e5
fix: try fix QNN_GRAPH_ERROR_INVALID_OP_CONFIG
chraac Jul 20, 2024
2729946
fix: try fix tensor type error
chraac Jul 20, 2024
51f95d6
fix: dimension could be wrong for tensor liked 1x1x8
chraac Jul 20, 2024
5f3b1ae
fix: try fix graph cache with append the tensors name
chraac Jul 20, 2024
b173c4e
feat: update tensor name when bind to graph
chraac Jul 20, 2024
3b47056
refactoring: change the tensor binding mode between qnn tensor and gg…
chraac Jul 22, 2024
706793f
fix: back to qnn tensor v1 to fix the create tensor error
chraac Jul 22, 2024
f843e5a
fix: 1.free up rpc memory at destruct
chraac Jul 22, 2024
ee305cc
refactoring: split qnn rpc buffer into dedicated class
chraac Jul 26, 2024
47735cb
fix: try fix error in 2nd run by appending dimension into graph key
chraac Jul 26, 2024
be9a8c7
fix: suppress warning
chraac Jul 26, 2024
18aa665
refactoring: opt graph key gen
chraac Jul 27, 2024
2c73791
refactoring: remove dup code
chraac Jul 27, 2024
ccfec70
refactoring: remove unused get_rpcmem_from_memhandle func
chraac Jul 27, 2024
867c91b
feat: add error string for QnnOpPackage_Error_t
chraac Jul 27, 2024
5da73f8
refactoring: move forward and supports_op into ops file
chraac Jul 27, 2024
e0c9b34
feat: check if dims equal for add
chraac Jul 27, 2024
8ab1f15
refactoring: remove internal functions, use op table directly
chraac Jul 27, 2024
e33b5c9
refactoring: print the name of unsupport op
chraac Jul 27, 2024
1f9d2a7
refactoring: improve tensor print
chraac Jul 28, 2024
5ecbeb5
Merge branch 'master' into dev-refactoring
chraac Jul 29, 2024
6da8294
refactoring: set the default qnn lib search path at CMakeLists.txt by…
chraac Jul 29, 2024
9a5f802
refactoring: add convient macro to disable copy and move of class
chraac Jul 29, 2024
74eb05a
feat: add ggml_qnn_op_config for handle different op
chraac Jul 29, 2024
6cc7432
Merge remote-tracking branch 'origin/master' into dev-refactoring
chraac Jul 31, 2024
47f6e02
fix: try fix the tensor rank of mul mat
chraac Jul 31, 2024
5ea980d
Merge branch 'master' into dev-refactoring
chraac Aug 5, 2024
dedadf2
Fixed a bug where debug code was included in the release, resulting i…
myan-o Aug 20, 2024
6bee798
Merge branch 'master' into dev-refactoring
chraac Aug 20, 2024
c9be2ba
Merge branch 'master' into dev-refactoring
chraac Aug 30, 2024
67e8af7
Merge branch 'master' into dev-refactoring
chraac Sep 7, 2024
481cb3a
fix compiling error
chraac Sep 7, 2024
b0b75d4
Merge branch 'master' into dev-refactoring
chraac Sep 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
llama_option_depr(WARNING LLAMA_QNN GGML_QNN)

#
# build the library
Expand Down
3 changes: 2 additions & 1 deletion ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ option(GGML_SYCL "ggml: use SYCL"
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
"ggml: sycl target device")
option(GGML_QNN "ggml: use QNN" OFF)

# extra artifacts
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
Expand All @@ -165,7 +166,7 @@ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)

if (GGML_SYCL)
if (GGML_SYCL OR GGML_QNN)
set(CMAKE_CXX_STANDARD 17)
else()
set(CMAKE_CXX_STANDARD 11)
Expand Down
41 changes: 41 additions & 0 deletions ggml/include/ggml-qnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#pragma once

#include "ggml.h"

#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_QNN_MAX_DEVICES 3

enum QNNBackend {
QNN_BACKEND_CPU = 0,
QNN_BACKEND_GPU,
QNN_BACKEND_NPU,
QNN_BACKEND_GGML, //"fake" QNN backend, used for compare performance between
// QNN and original GGML
};

/**
*
* @param device 0: QNN_BACKEND_CPU 1: QNN_BACKEND_GPU 2:QNN_BACKEND_NPU
* @param extend_lib_search_path extened lib search path for searching QNN backend dynamic libs
* @return
*/
GGML_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char *extend_lib_search_path);

GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend);

GGML_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);

GGML_API int ggml_backend_qnn_get_device_count(void);

GGML_API void ggml_backend_qnn_get_device_description(size_t dev_num, char *description, size_t description_size);

GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t dev_num);

#ifdef __cplusplus
}
#endif
31 changes: 31 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,36 @@ if (GGML_CANN)
endif()
endif()

if (GGML_QNN)
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
find_library(LOG_LIB log)
find_library(ANDROID_LIB android)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${LOG_LIB} ${ANDROID_LIB})
set(GGML_QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
else()
message(FATAL_ERROR "QNN now only available on Android")
endif()

string(REGEX REPLACE "/$" "" GGML_QNN_DEFAULT_LIB_SEARCH_PATH "${GGML_QNN_DEFAULT_LIB_SEARCH_PATH}")
add_compile_definitions(GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${GGML_QNN_DEFAULT_LIB_SEARCH_PATH}/")
if (NOT DEFINED GGML_QNN_SDK_PATH)
# try read from environment variable
if (DEFINED ENV{QNN_SDK_PATH})
set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH})
else()
message(FATAL_ERROR "GGML_QNN_SDK_PATH not defined")
endif()
endif()

message("QNN_SDK_PATH: ${GGML_QNN_SDK_PATH}")
file(GLOB GGML_SOURCES_QNN "ggml-qnn/*.cpp")
list(APPEND GGML_SOURCES_QNN "ggml-qnn.cpp")
set(GGML_HEADERS_QNN ../include/ggml-qnn.h)
set(QNN_INC_PATH ${GGML_QNN_SDK_PATH}/include/QNN)
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${QNN_INC_PATH} "ggml-qnn")
list(APPEND GGML_CDEF_PUBLIC GGML_USE_QNN)
endif()

function(get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")
Expand Down Expand Up @@ -1315,6 +1345,7 @@ add_library(ggml
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
${GGML_SOURCES_QNN} ${GGML_HEADERS_QNN}
ggml-aarch64.c ggml-aarch64.h
)

Expand Down
5 changes: 5 additions & 0 deletions ggml/src/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL int ggml_backend_cann_reg_devices(void);
ggml_backend_cann_reg_devices();
#endif

#ifdef GGML_USE_QNN
extern GGML_CALL void ggml_backend_qnn_reg_devices(void);
ggml_backend_qnn_reg_devices();
#endif
}

GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
Expand Down
Loading
Loading