Skip to content

Commit aed9b4f

Browse files
committed
fix compiling error
1 parent ee6326a commit aed9b4f

File tree

2 files changed

+110
-52
lines changed

2 files changed

+110
-52
lines changed

ggml/src/ggml-qnn/qnn/backend-ops.cpp

Lines changed: 70 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,48 @@
11

22
#include "backend-ops.hpp"
33

4-
#include <memory>
5-
64
#include "ggml-impl.h"
75
#include "graph.hpp"
86
#include "logger.hpp"
97
#include "op-config.hpp"
108
#include "tensor.hpp"
119
#include "utils.hpp"
1210

11+
#include <memory>
12+
1313
namespace {
1414

1515
qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) {
1616
auto & graph_cache = ctx->qnn_graph_cache;
1717
std::string graph_key;
1818
auto op_data_type = qnn::qnn_graph::get_graph_key_from_cgraph(cgraph, graph_key);
1919
if (graph_key.empty()) {
20-
QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", qnn::get_backend_name(ctx->device),
21-
(const void *) cgraph, (int) cgraph->n_nodes);
20+
QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n",
21+
qnn::get_backend_name(ctx->device),
22+
(const void *) cgraph,
23+
(int) cgraph->n_nodes);
2224
return nullptr;
2325
}
2426

2527
auto it = graph_cache.find(graph_key);
2628
qnn::qnn_graph * graph_ptr = nullptr;
2729
if (it != graph_cache.end()) {
2830
auto it = graph_cache.find(graph_key);
29-
QNN_LOG_DEBUG("[%s]found graph %s in cache, cache size: %d\n", qnn::get_backend_name(ctx->device),
30-
graph_key.c_str(), (int) graph_cache.size());
31+
QNN_LOG_DEBUG("[%s]found graph %s in cache, cache size: %d\n",
32+
qnn::get_backend_name(ctx->device),
33+
graph_key.c_str(),
34+
(int) graph_cache.size());
3135
graph_ptr = it->second.get();
3236
} else {
3337
auto precision = qnn::qnn_graph::kHtpDefault;
3438
if (op_data_type == GGML_TYPE_F16) {
35-
QNN_LOG_DEBUG("[%s][%s]set graph precision to FP16\n", qnn::get_backend_name(ctx->device),
36-
graph_key.c_str());
39+
QNN_LOG_DEBUG(
40+
"[%s][%s]set graph precision to FP16\n", qnn::get_backend_name(ctx->device), graph_key.c_str());
3741
precision = qnn::qnn_graph::kHtpFp16;
3842
}
3943

40-
auto graph = std::make_unique<qnn::qnn_graph>(graph_key, ctx->device, ctx->instance, precision,
41-
ctx->socinfo.vtcm_size_in_mb);
44+
auto graph = std::make_unique<qnn::qnn_graph>(
45+
graph_key, ctx->device, ctx->instance, precision, ctx->socinfo.vtcm_size_in_mb);
4246
if (!graph->is_valid()) {
4347
return nullptr;
4448
}
@@ -50,8 +54,10 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context *
5054

5155
graph_ptr = graph.get();
5256
graph_cache[graph_key] = std::move(graph);
53-
QNN_LOG_DEBUG("[%s]add graph %s to cache, cache size: %d\n", qnn::get_backend_name(ctx->device),
54-
graph_key.c_str(), (int) graph_cache.size());
57+
QNN_LOG_DEBUG("[%s]add graph %s to cache, cache size: %d\n",
58+
qnn::get_backend_name(ctx->device),
59+
graph_key.c_str(),
60+
(int) graph_cache.size());
5561
}
5662

5763
return graph_ptr;
@@ -62,6 +68,7 @@ constexpr const bool kQnnSupportedOps[] = {
6268
true, // GGML_OP_NONE
6369
false, // GGML_OP_DUP
6470
true, // GGML_OP_ADD
71+
false, // GGML_OP_ADD_ID
6572
false, // GGML_OP_ADD1
6673
false, // GGML_OP_ACC
6774
true, // GGML_OP_SUB
@@ -197,8 +204,13 @@ inline bool is_tensor_size_valid(qnn::ggml_backend_qnn_device_context * ctx, con
197204
const auto tensor_size = get_tensor_size_in_bytes(tensor, type);
198205
if (ctx->max_tensor_size_in_bytes && tensor_size >= ctx->max_tensor_size_in_bytes) {
199206
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) size(%lld) exceeds the limit(%lld)\n",
200-
qnn::get_backend_name(ctx->device), ggml_get_name(tensor), (int) tensor->ne[0],
201-
(int) tensor->ne[1], (int) tensor->ne[2], (int) tensor->ne[3], (long long int) tensor_size,
207+
qnn::get_backend_name(ctx->device),
208+
ggml_get_name(tensor),
209+
(int) tensor->ne[0],
210+
(int) tensor->ne[1],
211+
(int) tensor->ne[2],
212+
(int) tensor->ne[3],
213+
(long long int) tensor_size,
202214
(long long int) ctx->max_tensor_size_in_bytes);
203215
return false;
204216
}
@@ -215,10 +227,18 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml
215227
#ifndef NDEBUG
216228
if (tensor->view_src) {
217229
auto * src_tensor = tensor->view_src;
218-
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", qnn::get_backend_name(ctx->device),
219-
ggml_get_name(tensor), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2],
220-
(int) tensor->ne[3], ggml_get_name(src_tensor), (int) src_tensor->ne[0], (int) src_tensor->ne[1],
221-
(int) src_tensor->ne[2], (int) src_tensor->ne[3]);
230+
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n",
231+
qnn::get_backend_name(ctx->device),
232+
ggml_get_name(tensor),
233+
(int) tensor->ne[0],
234+
(int) tensor->ne[1],
235+
(int) tensor->ne[2],
236+
(int) tensor->ne[3],
237+
ggml_get_name(src_tensor),
238+
(int) src_tensor->ne[0],
239+
(int) src_tensor->ne[1],
240+
(int) src_tensor->ne[2],
241+
(int) src_tensor->ne[3]);
222242
}
223243
#endif
224244

@@ -227,14 +247,15 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml
227247
case GGML_TYPE_F16:
228248
if (!is_type_bit_enabled(ctx->supported_types, tensor->type)) {
229249
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n",
230-
qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type),
250+
qnn::get_backend_name(ctx->device),
251+
ggml_type_name(tensor->type),
231252
(unsigned int) ctx->supported_types);
232253
return false;
233254
}
234255
break;
235256
default:
236-
QNN_LOG_DEBUG("[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device),
237-
ggml_type_name(tensor->type));
257+
QNN_LOG_DEBUG(
258+
"[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type));
238259
return false;
239260
}
240261

@@ -277,14 +298,20 @@ bool ggml_qnn_have_same_tensor_types(qnn::ggml_backend_qnn_device_context * ctx,
277298
if (src1) {
278299
if (src0->type != op->type || src1->type != op->type) {
279300
QNN_LOG_DEBUG("[%s][%s]type src0(%s), src1(%s) and op(%s) are not equal\n",
280-
qnn::get_backend_name(ctx->device), ggml_op_name(op->op), ggml_type_name(src0->type),
281-
ggml_type_name(src1->type), ggml_type_name(op->type));
301+
qnn::get_backend_name(ctx->device),
302+
ggml_op_name(op->op),
303+
ggml_type_name(src0->type),
304+
ggml_type_name(src1->type),
305+
ggml_type_name(op->type));
282306
return false;
283307
}
284308
} else {
285309
if (src0->type != op->type) {
286-
QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n", qnn::get_backend_name(ctx->device),
287-
ggml_op_name(op->op), ggml_type_name(src0->type), ggml_type_name(op->type));
310+
QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n",
311+
qnn::get_backend_name(ctx->device),
312+
ggml_op_name(op->op),
313+
ggml_type_name(src0->type),
314+
ggml_type_name(op->type));
288315
return false;
289316
}
290317
}
@@ -303,7 +330,9 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con
303330
if (is_data_reinterpretation_op(src0->op) || is_data_reinterpretation_op(src1->op)) {
304331
// TODO: remove the blocker here when we support permute op
305332
QNN_LOG_DEBUG("[%s][MUL_MAT]data reorganization op is not supported, (%s, %s)\n",
306-
qnn::get_backend_name(ctx->device), ggml_op_name(src0->op), ggml_op_name(src1->op));
333+
qnn::get_backend_name(ctx->device),
334+
ggml_op_name(src0->op),
335+
ggml_op_name(src1->op));
307336
return false;
308337
}
309338

@@ -330,7 +359,8 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con
330359
!is_type_bit_enabled(ctx->cpu_preprocess_types, src0->type)) {
331360
// for such cases that src0 is quantized and op is float32, check if the quant type is enabled
332361
QNN_LOG_DEBUG("[%s][MUL_MAT]quantized src0 type %s is not enabled\n",
333-
qnn::get_backend_name(ctx->device), ggml_type_name(src0->type));
362+
qnn::get_backend_name(ctx->device),
363+
ggml_type_name(src0->type));
334364
return false;
335365
}
336366
break;
@@ -354,8 +384,12 @@ void print_tensor_info(qnn::ggml_backend_qnn_device_context * ctx, const ggml_te
354384
std::string op_key;
355385
qnn::get_qnn_op_desc(op, true, GGML_TYPE_COUNT, op_key);
356386

357-
QNN_LOG_DEBUG("[%s][%s]op was %s, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device), op_key.c_str(),
358-
supported, ctx->supported_op_count.load(), ctx->unsupported_op_count.load());
387+
QNN_LOG_DEBUG("[%s][%s]op was %s, support/unsupported: %d/%d\n",
388+
qnn::get_backend_name(ctx->device),
389+
op_key.c_str(),
390+
supported,
391+
ctx->supported_op_count.load(),
392+
ctx->unsupported_op_count.load());
359393
}
360394

361395
#endif
@@ -402,7 +436,9 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t
402436
// TODO: fix this when we have the support for mul with rms_norm
403437
if (ctx->enable_cpu_dequantize && (src0->op == GGML_OP_RMS_NORM || src1->op == GGML_OP_RMS_NORM)) {
404438
QNN_LOG_DEBUG("[%s][%s]skip unsupported mul with rms norm, (%s, %s)\n",
405-
qnn::get_backend_name(ctx->device), ggml_op_desc(op), ggml_op_desc(src0),
439+
qnn::get_backend_name(ctx->device),
440+
ggml_op_desc(op),
441+
ggml_op_desc(src0),
406442
ggml_op_desc(src1));
407443
is_op_supported = false;
408444
break;
@@ -414,7 +450,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t
414450
// TODO: move to op caps array?
415451
if (!ggml_are_same_shape(src0, src1)) {
416452
QNN_LOG_DEBUG("[%s][%s] src0 and src1 dimensions are not equal\n",
417-
qnn::get_backend_name(ctx->device), ggml_op_desc(op));
453+
qnn::get_backend_name(ctx->device),
454+
ggml_op_desc(op));
418455
is_op_supported = false;
419456
}
420457
break;
@@ -442,8 +479,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t
442479
}
443480

444481
bool device_compute_graph(qnn::ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) {
445-
QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device),
446-
(int) cgraph->n_nodes);
482+
QNN_LOG_DEBUG(
483+
"[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), (int) cgraph->n_nodes);
447484

448485
auto qnn_graph = get_qnn_graph_from_cache(ctx, cgraph);
449486
bool success = qnn_graph && qnn_graph->execute(cgraph, ctx->convert_context);

ggml/src/ggml-qnn/qnn/op-config-caps.cpp

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33

44
namespace {
55

6-
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
6+
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *,
7+
const std::string &,
78
std::shared_ptr<qnn::qnn_instance>);
89

9-
using op_description_generator_t = void (*)(const ggml_tensor * op, bool append_dimensions,
10-
ggml_type override_data_type, std::string & output);
10+
using op_description_generator_t = void (*)(const ggml_tensor * op,
11+
bool append_dimensions,
12+
ggml_type override_data_type,
13+
std::string & output);
1114

1215
void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type override_data_type, std::string & output) {
1316
char buffer[256] = {};
@@ -21,13 +24,24 @@ void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type ove
2124
len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name);
2225
break;
2326
case 3:
24-
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
25-
(long) tensor->ne[2], type_name);
27+
len = snprintf(buffer,
28+
sizeof(buffer),
29+
"%ldx%ldx%ld%s",
30+
(long) tensor->ne[0],
31+
(long) tensor->ne[1],
32+
(long) tensor->ne[2],
33+
type_name);
2634
break;
2735
case 4:
2836
default:
29-
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
30-
(long) tensor->ne[2], (long) tensor->ne[3], type_name);
37+
len = snprintf(buffer,
38+
sizeof(buffer),
39+
"%ldx%ldx%ldx%ld%s",
40+
(long) tensor->ne[0],
41+
(long) tensor->ne[1],
42+
(long) tensor->ne[2],
43+
(long) tensor->ne[3],
44+
type_name);
3145
break;
3246
}
3347
GGML_ASSERT(len > 0 && len < (int) sizeof(buffer));
@@ -61,8 +75,10 @@ void get_op_key_with_src_op_desc(const ggml_tensor * op, std::string & output) {
6175
output += ')';
6276
}
6377

64-
void generic_get_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type,
65-
std::string & output) {
78+
void generic_get_op_desc(const ggml_tensor * op,
79+
bool append_dimensions,
80+
ggml_type override_data_type,
81+
std::string & output) {
6682
if (append_dimensions) {
6783
get_graph_key_from_op(op, override_data_type, output);
6884
} else {
@@ -83,6 +99,7 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
8399
// GGML_OP_ADD
84100
QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name
85101
},
102+
{}, // GGML_OP_ADD_ID
86103
{}, // GGML_OP_ADD1
87104
{}, // GGML_OP_ACC
88105
{
@@ -235,8 +252,8 @@ std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tenso
235252
qnn::qnn_instance_ptr qnn_instance) {
236253
if (qnn_instance->has_custom_op_package() && ggml_n_dims(op) == 2) {
237254
QNN_LOG_DEBUG("create GgmlMulMat, name %s, use GgmlOpPackage\n", instance_name.c_str());
238-
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, "GgmlOpPackage", "GgmlMulMat",
239-
qnn_instance);
255+
return std::make_shared<qnn::ggml_qnn_single_op_config>(
256+
instance_name, "GgmlOpPackage", "GgmlMulMat", qnn_instance);
240257
}
241258

242259
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str());
@@ -250,8 +267,8 @@ std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tenso
250267
GGML_UNUSED(op);
251268
static_assert(_op < std::size(kOpCaps));
252269
static_assert(kOpCaps[_op].qnn_op_name != nullptr);
253-
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
254-
kOpCaps[_op].qnn_op_name, qnn_instance);
270+
return std::make_shared<qnn::ggml_qnn_single_op_config>(
271+
instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, kOpCaps[_op].qnn_op_name, qnn_instance);
255272
}
256273

257274
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char * name, float value) {
@@ -273,8 +290,8 @@ std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(const gg
273290

274291
_ggml_op_param_type op_param;
275292
memcpy(&op_param, op->op_params, sizeof(op_param));
276-
auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name,
277-
qnn_instance);
293+
auto qnn_op = std::make_shared<_qnn_op_type_name>(
294+
instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, qnn_instance);
278295
if (op_caps.qnn_param_name) {
279296
add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param);
280297
}
@@ -285,6 +302,7 @@ constexpr const op_constructor_t kOpConstructors[] = {
285302
nullptr, // GGML_OP_NONE
286303
nullptr, // GGML_OP_DUP
287304
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
305+
nullptr, // GGML_OP_ADD_ID
288306
nullptr, // GGML_OP_ADD1
289307
nullptr, // GGML_OP_ACC
290308
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
@@ -425,8 +443,10 @@ const char * get_qnn_op_name(const ggml_tensor * op) {
425443
return kOpCaps[op_index].qnn_op_name;
426444
}
427445

428-
void get_qnn_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type,
429-
std::string & output) {
446+
void get_qnn_op_desc(const ggml_tensor * op,
447+
bool append_dimensions,
448+
ggml_type override_data_type,
449+
std::string & output) {
430450
auto op_index = get_qnn_op_index(op);
431451
GGML_ASSERT(op_index < std::size(kOpCaps));
432452
auto get_desc = kOpCaps[op_index].get_desc;
@@ -437,8 +457,9 @@ void get_qnn_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type o
437457
}
438458
}
439459

440-
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor * op, const std::string & name,
441-
qnn_instance_ptr qnn_instance) {
460+
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor * op,
461+
const std::string & name,
462+
qnn_instance_ptr qnn_instance) {
442463
auto op_index = get_qnn_op_index(op);
443464
GGML_ASSERT(op_index < std::size(kOpCaps));
444465
auto op_constructor = kOpConstructors[op_index];

0 commit comments

Comments
 (0)