11
22#include " backend-ops.hpp"
33
4- #include < memory>
5-
64#include " ggml-impl.h"
75#include " graph.hpp"
86#include " logger.hpp"
97#include " op-config.hpp"
108#include " tensor.hpp"
119#include " utils.hpp"
1210
11+ #include < memory>
12+
1313namespace {
1414
1515qnn::qnn_graph * get_qnn_graph_from_cache (qnn::ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) {
1616 auto & graph_cache = ctx->qnn_graph_cache ;
1717 std::string graph_key;
1818 auto op_data_type = qnn::qnn_graph::get_graph_key_from_cgraph (cgraph, graph_key);
1919 if (graph_key.empty ()) {
20- QNN_LOG_DEBUG (" [%s]empty graph key for cgraph: %p, size: %d\n " , qnn::get_backend_name (ctx->device ),
21- (const void *) cgraph, (int ) cgraph->n_nodes );
20+ QNN_LOG_DEBUG (" [%s]empty graph key for cgraph: %p, size: %d\n " ,
21+ qnn::get_backend_name (ctx->device ),
22+ (const void *) cgraph,
23+ (int ) cgraph->n_nodes );
2224 return nullptr ;
2325 }
2426
2527 auto it = graph_cache.find (graph_key);
2628 qnn::qnn_graph * graph_ptr = nullptr ;
2729 if (it != graph_cache.end ()) {
2830 auto it = graph_cache.find (graph_key);
29- QNN_LOG_DEBUG (" [%s]found graph %s in cache, cache size: %d\n " , qnn::get_backend_name (ctx->device ),
30- graph_key.c_str (), (int ) graph_cache.size ());
31+ QNN_LOG_DEBUG (" [%s]found graph %s in cache, cache size: %d\n " ,
32+ qnn::get_backend_name (ctx->device ),
33+ graph_key.c_str (),
34+ (int ) graph_cache.size ());
3135 graph_ptr = it->second .get ();
3236 } else {
3337 auto precision = qnn::qnn_graph::kHtpDefault ;
3438 if (op_data_type == GGML_TYPE_F16) {
35- QNN_LOG_DEBUG (" [%s][%s]set graph precision to FP16 \n " , qnn::get_backend_name (ctx-> device ),
36- graph_key.c_str ());
39+ QNN_LOG_DEBUG (
40+ " [%s][%s]set graph precision to FP16 \n " , qnn::get_backend_name (ctx-> device ), graph_key.c_str ());
3741 precision = qnn::qnn_graph::kHtpFp16 ;
3842 }
3943
40- auto graph = std::make_unique<qnn::qnn_graph>(graph_key, ctx-> device , ctx-> instance , precision,
41- ctx->socinfo .vtcm_size_in_mb );
44+ auto graph = std::make_unique<qnn::qnn_graph>(
45+ graph_key, ctx-> device , ctx-> instance , precision, ctx->socinfo .vtcm_size_in_mb );
4246 if (!graph->is_valid ()) {
4347 return nullptr ;
4448 }
@@ -50,8 +54,10 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context *
5054
5155 graph_ptr = graph.get ();
5256 graph_cache[graph_key] = std::move (graph);
53- QNN_LOG_DEBUG (" [%s]add graph %s to cache, cache size: %d\n " , qnn::get_backend_name (ctx->device ),
54- graph_key.c_str (), (int ) graph_cache.size ());
57+ QNN_LOG_DEBUG (" [%s]add graph %s to cache, cache size: %d\n " ,
58+ qnn::get_backend_name (ctx->device ),
59+ graph_key.c_str (),
60+ (int ) graph_cache.size ());
5561 }
5662
5763 return graph_ptr;
@@ -62,6 +68,7 @@ constexpr const bool kQnnSupportedOps[] = {
6268 true , // GGML_OP_NONE
6369 false , // GGML_OP_DUP
6470 true , // GGML_OP_ADD
71+ false , // GGML_OP_ADD_ID
6572 false , // GGML_OP_ADD1
6673 false , // GGML_OP_ACC
6774 true , // GGML_OP_SUB
@@ -197,8 +204,13 @@ inline bool is_tensor_size_valid(qnn::ggml_backend_qnn_device_context * ctx, con
197204 const auto tensor_size = get_tensor_size_in_bytes (tensor, type);
198205 if (ctx->max_tensor_size_in_bytes && tensor_size >= ctx->max_tensor_size_in_bytes ) {
199206 QNN_LOG_DEBUG (" [%s]tensor(%s_%dx%dx%dx%d) size(%lld) exceeds the limit(%lld)\n " ,
200- qnn::get_backend_name (ctx->device ), ggml_get_name (tensor), (int ) tensor->ne [0 ],
201- (int ) tensor->ne [1 ], (int ) tensor->ne [2 ], (int ) tensor->ne [3 ], (long long int ) tensor_size,
207+ qnn::get_backend_name (ctx->device ),
208+ ggml_get_name (tensor),
209+ (int ) tensor->ne [0 ],
210+ (int ) tensor->ne [1 ],
211+ (int ) tensor->ne [2 ],
212+ (int ) tensor->ne [3 ],
213+ (long long int ) tensor_size,
202214 (long long int ) ctx->max_tensor_size_in_bytes );
203215 return false ;
204216 }
@@ -215,10 +227,18 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml
215227#ifndef NDEBUG
216228 if (tensor->view_src ) {
217229 auto * src_tensor = tensor->view_src ;
218- QNN_LOG_DEBUG (" [%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n " , qnn::get_backend_name (ctx->device ),
219- ggml_get_name (tensor), (int ) tensor->ne [0 ], (int ) tensor->ne [1 ], (int ) tensor->ne [2 ],
220- (int ) tensor->ne [3 ], ggml_get_name (src_tensor), (int ) src_tensor->ne [0 ], (int ) src_tensor->ne [1 ],
221- (int ) src_tensor->ne [2 ], (int ) src_tensor->ne [3 ]);
230+ QNN_LOG_DEBUG (" [%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n " ,
231+ qnn::get_backend_name (ctx->device ),
232+ ggml_get_name (tensor),
233+ (int ) tensor->ne [0 ],
234+ (int ) tensor->ne [1 ],
235+ (int ) tensor->ne [2 ],
236+ (int ) tensor->ne [3 ],
237+ ggml_get_name (src_tensor),
238+ (int ) src_tensor->ne [0 ],
239+ (int ) src_tensor->ne [1 ],
240+ (int ) src_tensor->ne [2 ],
241+ (int ) src_tensor->ne [3 ]);
222242 }
223243#endif
224244
@@ -227,14 +247,15 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml
227247 case GGML_TYPE_F16:
228248 if (!is_type_bit_enabled (ctx->supported_types , tensor->type )) {
229249 QNN_LOG_DEBUG (" [%s]unsupported data type %s, supported_types: 0x%x\n " ,
230- qnn::get_backend_name (ctx->device ), ggml_type_name (tensor->type ),
250+ qnn::get_backend_name (ctx->device ),
251+ ggml_type_name (tensor->type ),
231252 (unsigned int ) ctx->supported_types );
232253 return false ;
233254 }
234255 break ;
235256 default :
236- QNN_LOG_DEBUG (" [%s]unsupported data type %s \n " , qnn::get_backend_name (ctx-> device ),
237- ggml_type_name (tensor->type ));
257+ QNN_LOG_DEBUG (
258+ " [%s]unsupported data type %s \n " , qnn::get_backend_name (ctx-> device ), ggml_type_name (tensor->type ));
238259 return false ;
239260 }
240261
@@ -277,14 +298,20 @@ bool ggml_qnn_have_same_tensor_types(qnn::ggml_backend_qnn_device_context * ctx,
277298 if (src1) {
278299 if (src0->type != op->type || src1->type != op->type ) {
279300 QNN_LOG_DEBUG (" [%s][%s]type src0(%s), src1(%s) and op(%s) are not equal\n " ,
280- qnn::get_backend_name (ctx->device ), ggml_op_name (op->op ), ggml_type_name (src0->type ),
281- ggml_type_name (src1->type ), ggml_type_name (op->type ));
301+ qnn::get_backend_name (ctx->device ),
302+ ggml_op_name (op->op ),
303+ ggml_type_name (src0->type ),
304+ ggml_type_name (src1->type ),
305+ ggml_type_name (op->type ));
282306 return false ;
283307 }
284308 } else {
285309 if (src0->type != op->type ) {
286- QNN_LOG_DEBUG (" [%s][%s]type src0(%s) and op(%s) are not equal\n " , qnn::get_backend_name (ctx->device ),
287- ggml_op_name (op->op ), ggml_type_name (src0->type ), ggml_type_name (op->type ));
310+ QNN_LOG_DEBUG (" [%s][%s]type src0(%s) and op(%s) are not equal\n " ,
311+ qnn::get_backend_name (ctx->device ),
312+ ggml_op_name (op->op ),
313+ ggml_type_name (src0->type ),
314+ ggml_type_name (op->type ));
288315 return false ;
289316 }
290317 }
@@ -303,7 +330,9 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con
303330 if (is_data_reinterpretation_op (src0->op ) || is_data_reinterpretation_op (src1->op )) {
304331 // TODO: remove the blocker here when we support permute op
305332 QNN_LOG_DEBUG (" [%s][MUL_MAT]data reorganization op is not supported, (%s, %s)\n " ,
306- qnn::get_backend_name (ctx->device ), ggml_op_name (src0->op ), ggml_op_name (src1->op ));
333+ qnn::get_backend_name (ctx->device ),
334+ ggml_op_name (src0->op ),
335+ ggml_op_name (src1->op ));
307336 return false ;
308337 }
309338
@@ -330,7 +359,8 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con
330359 !is_type_bit_enabled (ctx->cpu_preprocess_types , src0->type )) {
331360 // for such cases that src0 is quantized and op is float32, check if the quant type is enabled
332361 QNN_LOG_DEBUG (" [%s][MUL_MAT]quantized src0 type %s is not enabled\n " ,
333- qnn::get_backend_name (ctx->device ), ggml_type_name (src0->type ));
362+ qnn::get_backend_name (ctx->device ),
363+ ggml_type_name (src0->type ));
334364 return false ;
335365 }
336366 break ;
@@ -354,8 +384,12 @@ void print_tensor_info(qnn::ggml_backend_qnn_device_context * ctx, const ggml_te
354384 std::string op_key;
355385 qnn::get_qnn_op_desc (op, true , GGML_TYPE_COUNT, op_key);
356386
357- QNN_LOG_DEBUG (" [%s][%s]op was %s, support/unsupported: %d/%d\n " , qnn::get_backend_name (ctx->device ), op_key.c_str (),
358- supported, ctx->supported_op_count .load (), ctx->unsupported_op_count .load ());
387+ QNN_LOG_DEBUG (" [%s][%s]op was %s, support/unsupported: %d/%d\n " ,
388+ qnn::get_backend_name (ctx->device ),
389+ op_key.c_str (),
390+ supported,
391+ ctx->supported_op_count .load (),
392+ ctx->unsupported_op_count .load ());
359393}
360394
361395#endif
@@ -402,7 +436,9 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t
402436 // TODO: fix this when we have the support for mul with rms_norm
403437 if (ctx->enable_cpu_dequantize && (src0->op == GGML_OP_RMS_NORM || src1->op == GGML_OP_RMS_NORM)) {
404438 QNN_LOG_DEBUG (" [%s][%s]skip unsupported mul with rms norm, (%s, %s)\n " ,
405- qnn::get_backend_name (ctx->device ), ggml_op_desc (op), ggml_op_desc (src0),
439+ qnn::get_backend_name (ctx->device ),
440+ ggml_op_desc (op),
441+ ggml_op_desc (src0),
406442 ggml_op_desc (src1));
407443 is_op_supported = false ;
408444 break ;
@@ -414,7 +450,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t
414450 // TODO: move to op caps array?
415451 if (!ggml_are_same_shape (src0, src1)) {
416452 QNN_LOG_DEBUG (" [%s][%s] src0 and src1 dimensions are not equal\n " ,
417- qnn::get_backend_name (ctx->device ), ggml_op_desc (op));
453+ qnn::get_backend_name (ctx->device ),
454+ ggml_op_desc (op));
418455 is_op_supported = false ;
419456 }
420457 break ;
@@ -442,8 +479,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t
442479}
443480
444481bool device_compute_graph (qnn::ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) {
445- QNN_LOG_DEBUG (" [%s]compute graph start, nodes count: %d \n " , qnn::get_backend_name (ctx-> device ),
446- (int ) cgraph->n_nodes );
482+ QNN_LOG_DEBUG (
483+ " [%s]compute graph start, nodes count: %d \n " , qnn::get_backend_name (ctx-> device ), (int ) cgraph->n_nodes );
447484
448485 auto qnn_graph = get_qnn_graph_from_cache (ctx, cgraph);
449486 bool success = qnn_graph && qnn_graph->execute (cgraph, ctx->convert_context );
0 commit comments