@@ -1885,7 +1885,7 @@ class qnn_instance {
18851885 return 0 ;
18861886 }
18871887
1888- std::string &get_qnn_graph_name () { return _graph_name; }
1888+ std::string & get_qnn_graph_name () { return _graph_name; }
18891889
18901890 bool is_rpcmem_initialized () {
18911891 return _rpcmem_initialized;
@@ -1906,8 +1906,10 @@ class qnn_instance {
19061906 void unregister_rpcmem (Qnn_MemHandle_t mem_handle);
19071907
19081908 void * alloc_rpcmem (size_t bytes, size_t alignment);
1909+ void * get_rpcmem_from_memhandle (Qnn_MemHandle_t mem_handle);
19091910
19101911 void free_rpcmem (void * buf);
1912+ void free_rpcmem ();
19111913
19121914 bool is_rpcmem_allocated (void * buf);
19131915
@@ -1975,15 +1977,16 @@ class qnn_instance {
19751977 QNN_INTERFACE_VER_TYPE _qnn_raw_interface;
19761978 QNN_SYSTEM_INTERFACE_VER_TYPE _qnn_raw_system_interface;
19771979
1978- std::unordered_set< Qnn_MemHandle_t> _qnn_mem_set;
1980+ std::unordered_map< void *, Qnn_MemHandle_t> _qnn_mem_set;
19791981 std::unordered_map<void *, Qnn_MemHandle_t> _qnn_rpc_buffer_to_handles;
19801982
1983+
19811984 static std::mutex _init_mutex;
19821985 static std::unordered_map<BackendIdType, void *> _loaded_lib_handle;
19831986 static std::unordered_map<std::string, BackendIdType> _lib_path_to_backend_id;
19841987 static std::unordered_map<BackendIdType, const QnnInterface_t *> _loaded_backend;
19851988
1986- void *_rpc_lib_handle = nullptr ;
1989+ void * _rpc_lib_handle = nullptr ;
19871990 std::atomic_bool _rpcmem_initialized{false };
19881991 pfn_rpc_mem_alloc _pfn_rpc_mem_alloc;
19891992 pfn_rpc_mem_free _pfn_rpc_mem_free;
@@ -2032,11 +2035,30 @@ void qnn_instance::free_rpcmem(void * buf) {
20322035 } else if (0 == _rpcmem_store_map.count (buf)) {
20332036 GGMLQNN_LOG_WARN (" no allocated tensor\n " );
20342037 } else {
2038+ GGMLQNN_LOG_DEBUG (" free rpc mem %p" , _rpcmem_store_map[buf]);
20352039 _pfn_rpc_mem_free (_rpcmem_store_map[buf]);
20362040 _rpcmem_store_map.erase (buf);
20372041 }
20382042}
20392043
2044+ void qnn_instance::free_rpcmem () {
2045+ Qnn_ErrorHandle_t error = QNN_SUCCESS;
2046+
2047+ if (_rpcmem_store_map.empty ()) {
2048+ GGMLQNN_LOG_WARN (" no rpcmem allocated\n " );
2049+ return ;
2050+ }
2051+
2052+ for (std::unordered_map<void *, void *>::iterator it = _rpcmem_store_map.begin ();
2053+ it != _qnn_mem_set.end ();
2054+ it++) {
2055+ void * rpcbuffer = it->second ;
2056+ GGMLQNN_LOG_DEBUG (" free rpc buffer %p" , rpcbuffer);
2057+ _pfn_rpc_mem_free (rpcbuffer);
2058+ }
2059+ _rpcmem_store_map.clear ();
2060+ }
2061+
20402062int32_t qnn_instance::rpcmem_to_fd (void * buf) {
20412063 int32_t mem_fd = -1 ;
20422064 if (!is_rpcmem_initialized ()) {
@@ -2059,10 +2081,6 @@ int qnn_instance::register_rpcmem(void * p_data, Qnn_Tensor_t * p_tensor) {
20592081 return 2 ;
20602082 }
20612083
2062- if (is_rpcmem_allocated (p_data)) {
2063- GGMLQNN_LOG_WARN (" rpc memory already allocated\n " );
2064- // return 3;
2065- }
20662084 if (is_rpcmem_registered ((QNN_VER_PTR (*p_tensor)->memHandle ))) {
20672085 GGMLQNN_LOG_WARN (" tensor %s has been registered shared memory\n " , (QNN_VER_PTR (*p_tensor)->name ));
20682086 return 4 ;
@@ -2094,7 +2112,7 @@ int qnn_instance::register_rpcmem(void * p_data, Qnn_Tensor_t * p_tensor) {
20942112 GGMLQNN_LOG_INFO (" tensor %s successfully register shared memory\n " , (QNN_VER_PTR (*p_tensor)->name ));
20952113 }
20962114 QNN_VER_PTR (*p_tensor)->memHandle = handle;
2097- _qnn_mem_set.insert (handle);
2115+ _qnn_mem_set.insert ((std::pair< void *, Qnn_MemHandle_t>(p_data, handle)) );
20982116
20992117 return 0 ;
21002118}
@@ -2136,17 +2154,36 @@ Qnn_MemHandle_t qnn_instance::register_rpcmem(void * p_data, const uint32_t ran
21362154 return handle;
21372155}
21382156
2157+ void * qnn_instance::get_rpcmem_from_memhandle (Qnn_MemHandle_t mem_handle) {
2158+ for (std::unordered_map<void *, Qnn_MemHandle_t>::iterator it = _qnn_mem_set.begin ();
2159+ it != _qnn_mem_set.end ();
2160+ it++) {
2161+ Qnn_MemHandle_t mem_handle = it->second ;
2162+ if (it->second == mem_handle) {
2163+ return it->first ;
2164+ }
2165+ }
2166+ GGMLQNN_LOG_WARN (" can't find rpcmem from qnn mem handle %p" , mem_handle);
2167+ return nullptr ;
2168+ }
2169+
21392170void qnn_instance::unregister_rpcmem () {
21402171 Qnn_ErrorHandle_t error = QNN_SUCCESS;
21412172
21422173 if (_qnn_mem_set.empty ()) {
21432174 GGMLQNN_LOG_WARN (" no rpcmem registered\n " );
21442175 }
21452176
2146- for (auto &mem_handle : _qnn_mem_set) {
2177+ for (std::unordered_map<void *, Qnn_MemHandle_t>::iterator it = _qnn_mem_set.begin ();
2178+ it != _qnn_mem_set.end ();
2179+ it++) {
2180+ Qnn_MemHandle_t mem_handle = it->second ;
21472181 error = _qnn_interface.qnn_mem_de_register (&mem_handle, 1 );
21482182 if (error != QNN_SUCCESS) {
2149- GGMLQNN_LOG_WARN (" failed to unregister shared memory, error %d\n " , QNN_GET_ERROR_CODE (error));
2183+ GGMLQNN_LOG_WARN (" failed to unregister shared memory, error %d\n " ,
2184+ QNN_GET_ERROR_CODE (error));
2185+ } else {
2186+ GGMLQNN_LOG_DEBUG (" unregister shared memory ok" );
21502187 }
21512188 }
21522189 _qnn_mem_set.clear ();
@@ -2158,14 +2195,14 @@ void qnn_instance::unregister_rpcmem(Qnn_MemHandle_t mem_handle) {
21582195 GGMLQNN_LOG_WARN (" failed to unregister shared memory, error %d" , QNN_GET_ERROR_CODE (error));
21592196 }
21602197
2161- auto it = std::find_if (_qnn_rpc_buffer_to_handles .begin (), _qnn_rpc_buffer_to_handles .end (),
2198+ auto it = std::find_if (_qnn_mem_set .begin (), _qnn_mem_set .end (),
21622199 [mem_handle](const auto &kv) { return kv.second == mem_handle; });
2163- if (it == _qnn_rpc_buffer_to_handles .end ()) {
2200+ if (it == _qnn_mem_set .end ()) {
21642201 GGMLQNN_LOG_WARN (" failed to find shared memory handler: %p" , mem_handle);
21652202 return ;
21662203 }
21672204
2168- _qnn_rpc_buffer_to_handles .erase (it);
2205+ _qnn_mem_set .erase (it);
21692206}
21702207
21712208bool qnn_instance::is_rpcmem_allocated (void * buf) {
@@ -2562,7 +2599,7 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
25622599 temp_context_config.empty () ? nullptr : temp_context_config.data (),
25632600 &_qnn_context_handle);
25642601 if (nullptr == _qnn_context_handle) {
2565- GGMLQNN_LOG_WARN (" why failed to initialize qnn context\n " );
2602+ GGMLQNN_LOG_WARN (" why failed to initialize qnn context, error:%s \n " , strerror (errno) );
25662603 return 8 ;
25672604 } else {
25682605 GGMLQNN_LOG_DEBUG (" initialize qnn context successfully\n " );
@@ -2636,9 +2673,13 @@ int qnn_instance::qnn_finalize() {
26362673 int ret_status = 0 ;
26372674 Qnn_ErrorHandle_t error = QNN_SUCCESS;
26382675
2676+ GGMLQNN_LOG_DEBUG (" enter %s\n " , __func__);
26392677 // FIXME:should be removed in the future
26402678 reset_idx ();
26412679
2680+ free_rpcmem ();
2681+ unregister_rpcmem ();
2682+
26422683 if (nullptr != _pfn_rpc_mem_deinit)
26432684 _pfn_rpc_mem_deinit ();
26442685
@@ -2700,6 +2741,7 @@ int qnn_instance::qnn_finalize() {
27002741 unload_backend ();
27012742
27022743 unload_system ();
2744+ GGMLQNN_LOG_DEBUG (" leave %s\n " , __func__);
27032745
27042746 return ret_status;
27052747}
@@ -2954,6 +2996,10 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
29542996 const ggml_tensor * src1 = op->src [1 ];
29552997 ggml_tensor * dst = op;
29562998
2999+ uint8_t * qnn_rpcbuffer_0 = nullptr ;
3000+ uint8_t * qnn_rpcbuffer_1 = nullptr ;
3001+ uint8_t * qnn_rpcbuffer_2 = nullptr ;
3002+
29573003 GGMLQNN_CHECK_PARAMS (ctx, src0, src1, dst);
29583004
29593005 instance = ctx->instance ;
@@ -3067,6 +3113,19 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
30673113 GGMLQNN_LOG_INFO (" can't create qnn graph handle with graph name %s, error = %d\n " , graph_name.c_str (), error);
30683114 return ;
30693115 }
3116+
3117+ if (ctx->device == QNN_BACKEND_NPU) {
3118+ QNN_VER_PTR (*tensor_0)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3119+ QNN_VER_PTR (*tensor_0)->clientBuf = {.data =nullptr , .dataSize =0 };
3120+
3121+ QNN_VER_PTR (*tensor_1)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3122+ QNN_VER_PTR (*tensor_1)->clientBuf = {.data =nullptr , .dataSize =0 };
3123+
3124+ QNN_VER_PTR (*tensor_2)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3125+ QNN_VER_PTR (*tensor_2)->clientBuf = {.data =nullptr , .dataSize =0 };
3126+ }
3127+
3128+
30703129 error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_0);
30713130 if (QNN_SUCCESS != error) {
30723131 GGMLQNN_LOG_INFO (" error = %d\n " , error);
@@ -3080,9 +3139,43 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
30803139 GGMLQNN_LOG_INFO (" error = %d\n " , error);
30813140 }
30823141
3083- QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
3084- QNN_VER_PTR (*tensor_1)->clientBuf = {src1->data , ggml_get_tensor_data_size (src1)};
3085- QNN_VER_PTR (*tensor_2)->clientBuf = {dst->data , ggml_get_tensor_data_size (dst)};
3142+ if (ctx->device != QNN_BACKEND_NPU) {
3143+ QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
3144+ QNN_VER_PTR (*tensor_1)->clientBuf = {src1->data , ggml_get_tensor_data_size (src1)};
3145+ QNN_VER_PTR (*tensor_2)->clientBuf = {dst->data , ggml_get_tensor_data_size (dst)};
3146+ } else {
3147+ qnn_rpcbuffer_0 = static_cast <uint8_t *>(instance->alloc_rpcmem (
3148+ ggml_nbytes (src0), 4 ));
3149+ if (nullptr == qnn_rpcbuffer_0) {
3150+ GGMLQNN_LOG_WARN (" alloc rpcmem failure, %s\n " , strerror (errno));
3151+ goto failure;
3152+ } else {
3153+ GGMLQNN_LOG_DEBUG (" alloc rpcmem %p successfully\n " , qnn_rpcbuffer_0);
3154+ }
3155+ instance->register_rpcmem (qnn_rpcbuffer_0, tensor_0);
3156+ memcpy (qnn_rpcbuffer_0, src0->data , ggml_nbytes (src0));
3157+
3158+ qnn_rpcbuffer_1 = static_cast <uint8_t *>(instance->alloc_rpcmem (
3159+ ggml_nbytes (src1), 4 ));
3160+ if (nullptr == qnn_rpcbuffer_1) {
3161+ GGMLQNN_LOG_WARN (" alloc rpcmem failure, %s\n " , strerror (errno));
3162+ goto failure;
3163+ } else {
3164+ GGMLQNN_LOG_DEBUG (" alloc rpcmem %p successfully\n " , qnn_rpcbuffer_1);
3165+ }
3166+ instance->register_rpcmem (qnn_rpcbuffer_1, tensor_1);
3167+ memcpy (qnn_rpcbuffer_1, src1->data , ggml_nbytes (src1));
3168+
3169+ qnn_rpcbuffer_2 = static_cast <uint8_t *>(instance->alloc_rpcmem (
3170+ ggml_nbytes (dst), 4 ));
3171+ if (nullptr == qnn_rpcbuffer_2) {
3172+ GGMLQNN_LOG_WARN (" alloc rpcmem failure, %s\n " , strerror (errno));
3173+ goto failure;
3174+ } else {
3175+ GGMLQNN_LOG_DEBUG (" alloc rpcmem %p successfully\n " , qnn_rpcbuffer_2);
3176+ }
3177+ instance->register_rpcmem (qnn_rpcbuffer_2, tensor_2);
3178+ }
30863179
30873180 Qnn_Tensor_t tensor_inputs[] = {
30883181 *tensor_0,
@@ -3119,6 +3212,15 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
31193212 if (QNN_SUCCESS != error) {
31203213 GGMLQNN_LOG_INFO (" error = %d\n " , error);
31213214 }
3215+
3216+ if (ctx->device == QNN_BACKEND_NPU) {
3217+ uint8_t * qnn_rpcbuffer = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (
3218+ QNN_VER_PTR (*tensor_2)->memHandle ));
3219+ GGMLQNN_LOG_INFO (" qnn_rpcbuffer = %p\n " , qnn_rpcbuffer);
3220+ if (nullptr != qnn_rpcbuffer)
3221+ memcpy (dst->data , qnn_rpcbuffer, ggml_nbytes (dst));
3222+ }
3223+
31223224 auto graph_item = std::make_tuple (graph_handle, tensor_0, tensor_1, tensor_2);
31233225 instance->_qnn_graph_map [map_entry] = graph_item;
31243226 } else {
@@ -3138,9 +3240,21 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
31383240 QNN_VER_PTR (*tensor_2)->rank = ggml_get_tensor_rank (dst);
31393241 QNN_VER_PTR (*tensor_2)->dataType = dst_qnn_type;
31403242
3141- QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
3142- QNN_VER_PTR (*tensor_1)->clientBuf = {src1->data , ggml_get_tensor_data_size (src1)};
3143- QNN_VER_PTR (*tensor_2)->clientBuf = {dst->data , ggml_get_tensor_data_size (dst)};
3243+ if (ctx->device != QNN_BACKEND_NPU) {
3244+ QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
3245+ QNN_VER_PTR (*tensor_1)->clientBuf = {src1->data , ggml_get_tensor_data_size (src1)};
3246+ QNN_VER_PTR (*tensor_2)->clientBuf = {dst->data , ggml_get_tensor_data_size (dst)};
3247+ } else {
3248+ uint8_t * qnn_buffer_0 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (
3249+ QNN_VER_PTR (*tensor_0)->memHandle ));
3250+ if (nullptr != qnn_buffer_0)
3251+ memcpy (qnn_buffer_0, src0->data , ggml_nbytes (src0));
3252+
3253+ uint8_t * qnn_buffer_1 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (
3254+ QNN_VER_PTR (*tensor_1)->memHandle ));
3255+ if (nullptr != qnn_buffer_1)
3256+ memcpy (qnn_buffer_1, src1->data , ggml_nbytes (src1));
3257+ }
31443258
31453259 Qnn_Tensor_t tensor_inputs[] = {
31463260 *tensor_0,
@@ -3156,6 +3270,35 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
31563270 if (QNN_SUCCESS != error) {
31573271 GGMLQNN_LOG_INFO (" error = %d\n " , error);
31583272 }
3273+
3274+ if (ctx->device == QNN_BACKEND_NPU) {
3275+ uint8_t * qnn_buffer_2 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (
3276+ QNN_VER_PTR (*tensor_2)->memHandle ));
3277+ if (nullptr != qnn_buffer_2)
3278+ memcpy (dst->data , qnn_buffer_2, ggml_nbytes (dst));
3279+ }
3280+ }
3281+
3282+ failure:
3283+ if (QNN_SUCCESS != error) {
3284+ GGMLQNN_LOG_DEBUG (" tensor0 name %s" , QNN_TENSOR_GET_NAME (*tensor_0));
3285+ GGMLQNN_LOG_DEBUG (" tensor1 name %s" , QNN_TENSOR_GET_NAME (*tensor_1));
3286+ GGMLQNN_LOG_DEBUG (" tensor2 name %s" , QNN_TENSOR_GET_NAME (*tensor_2));
3287+ GGMLQNN_LOG_DEBUG (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
3288+ " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3289+ src0->name , src0->type , ggml_type_name (src0->type ),
3290+ src0->ne [0 ], src0->ne [1 ], src0->ne [2 ], src0->nb [0 ],
3291+ src0->nb [1 ], src0->nb [2 ]);
3292+ GGMLQNN_LOG_DEBUG (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
3293+ " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3294+ src1->name , src1->type , ggml_type_name (src1->type ),
3295+ src1->ne [0 ], src1->ne [1 ], src1->ne [2 ], src1->nb [0 ],
3296+ src1->nb [1 ], src1->nb [2 ]);
3297+ GGMLQNN_LOG_DEBUG (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
3298+ " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3299+ dst->name , dst->type , ggml_type_name (dst->type ),
3300+ dst->ne [0 ], dst->ne [1 ], dst->ne [2 ], dst->nb [0 ],
3301+ dst->nb [1 ], dst->nb [2 ]);
31593302 }
31603303
31613304 // avoid memory leak in func free_qnn_tensor
0 commit comments