Skip to content

Commit a053c0e

Browse files
author
zhouwg
committed
ggml-hexagon: release ggml-hexagon v0.99
1 parent 8432d07 commit a053c0e

File tree

1 file changed

+79
-54
lines changed

1 file changed

+79
-54
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 79 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -751,8 +751,10 @@ static void ggmlhexagon_log_internal(ggml_log_level level, const char * file, co
751751
static void ggmlhexagon_print_tensors_info(const char * func_name, const ggml_backend_hexagon_context * ctx,
752752
const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * dst) {
753753
//skip sanity check of params because of performance concern
754-
if (0 == g_hexagon_appcfg.print_tensors_info)
755-
return;
754+
if (0 == g_hexagon_appcfg.dump_op_info) {
755+
if (0 == g_hexagon_appcfg.print_tensors_info)
756+
return;
757+
}
756758

757759
if (nullptr != func_name && nullptr != ctx) {
758760
GGMLHEXAGON_LOG_DEBUG("call %s in dev %s\n", func_name, ctx->name);
@@ -862,13 +864,17 @@ static void ggmlhexagon_get_timestring(char * p_currenttime) {
862864
}
863865

864866
static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * ctx) {
865-
GGMLHEXAGON_LOG_INFO("hwaccel approach is %d(%s)", g_hexagon_appcfg.hwaccel_approach,
866-
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
867867
char timestamp[GGMLHEXAGON_TMPBUF_LEN];
868868
memset(timestamp, 0, GGMLHEXAGON_TMPBUF_LEN);
869+
870+
GGMLHEXAGON_LOG_INFO("hwaccel approach is %d(%s)", g_hexagon_appcfg.hwaccel_approach,
871+
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
872+
GGMLHEXAGON_LOG_INFO("hexagon_backend=%d(%s)", g_hexagon_appcfg.hexagon_backend,
873+
ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend));
869874
ggmlhexagon_get_timestring(timestamp);
870875
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
871-
GGMLHEXAGON_LOG_INFO("only offload GGML_OP_ADD : %s", g_hexagon_appcfg.enable_q_mulmat ? "NO" : "YES");
876+
GGMLHEXAGON_LOG_INFO("only offload GGML_OP_ADD: %s", g_hexagon_appcfg.enable_mulmat_cdsp ? "NO" : "YES");
877+
GGMLHEXAGON_LOG_INFO("offload quantize GGML_OP_MUL_MAT: %s", g_hexagon_appcfg.enable_q_mulmat ? "YES" : "NO");
872878
} else {
873879
GGMLHEXAGON_LOG_INFO("only offload GGML_OP_ADD: NO");
874880
}
@@ -1437,7 +1443,7 @@ static void ggmlhexagon_load_cfg() {
14371443
qnncfg_instance.load(cfg_filename);
14381444
qnncfg_instance.dump([](const std::string & section, const std::string & key, const std::string value) {
14391445
std::ostringstream tmposs;
1440-
tmposs << "section[" << std::setw(10) << std::left << section << "],[" << std::setw(25) << std::left << key << "] = [" << value << "]" << std::endl;
1446+
tmposs << "section[" << std::setw(10) << std::left << section << "],[" << std::setw(25) << std::left << key << "] = [" << value << "]";
14411447
GGMLHEXAGON_LOG_INFO("%s", tmposs.str().c_str());
14421448
});
14431449
std::string precision_mode;
@@ -1453,11 +1459,10 @@ static void ggmlhexagon_load_cfg() {
14531459
qnncfg_instance.get_stringvalue("qnn", "precision_mode", precision_mode, "fp32");
14541460
qnncfg_instance.get_intvalue("cdsp", "enable_mulmat_cdsp", g_hexagon_appcfg.enable_mulmat_cdsp, 0);
14551461
qnncfg_instance.get_intvalue("cdsp", "enable_q_mulmat", g_hexagon_appcfg.enable_q_mulmat, 0);
1456-
GGMLHEXAGON_LOG_INFO("print_qnn_internal_log=%d", g_hexagon_appcfg.print_qnn_internal_log);
14571462
GGMLHEXAGON_LOG_INFO("hwaccel_approach=%d(%s)", g_hexagon_appcfg.hwaccel_approach,
14581463
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
1459-
GGMLHEXAGON_LOG_INFO("hexagon_backend=%d", g_hexagon_appcfg.hexagon_backend);
1460-
GGMLHEXAGON_LOG_INFO("npu inference precision mode=%s", precision_mode.c_str());
1464+
GGMLHEXAGON_LOG_INFO("hexagon_backend=%d(%s)", g_hexagon_appcfg.hexagon_backend,
1465+
ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend));
14611466
GGMLHEXAGON_LOG_INFO("qnn runtime lib path=%s", g_hexagon_appcfg.runtimelib_path);
14621467
if (precision_mode.find("fp16") != std::string::npos) {
14631468
g_hexagon_appcfg.precision_mode = 1;
@@ -4853,7 +4858,7 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
48534858
ggmlop_domain_uri_len = strlen(ggmlop_URI) + MAX_DOMAIN_NAMELEN;
48544859
ggmlop_domain_uri = (char *)malloc(ggmlop_domain_uri_len);
48554860
snprintf(ggmlop_domain_uri, ggmlop_domain_uri_len, "%s%s", ggmlop_URI, uri);
4856-
GGMLHEXAGON_LOG_INFO("ggmlop domain uri:%s", ggmlop_domain_uri);
4861+
GGMLHEXAGON_LOG_DEBUG("ggmlop domain uri:%s", ggmlop_domain_uri);
48574862
hexagon_error = ggmlop_dsp_open(ggmlop_domain_uri, &ctx->ggmlop_handle);
48584863
if (AEE_SUCCESS == hexagon_error) {
48594864
GGMLHEXAGON_LOG_INFO("succeed to open domain %d(%s)", domain_id, ggmlhexagon_get_dsp_name(domain_id));
@@ -4976,9 +4981,6 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49764981
dsptensor_2.nb[2] = dst->nb[2];
49774982
dsptensor_2.nb[3] = dst->nb[3];
49784983

4979-
//GGMLQNN_DUMP_DSPTENSOR(&dsptensor_0);
4980-
//GGMLQNN_DUMP_DSPTENSOR(&dsptensor_1);
4981-
//GGMLQNN_DUMP_DSPTENSOR(&dsptensor_2);
49824984
hexagon_error = op_func(ctx->ggmlop_handle, &dsptensor_0, &dsptensor_1, &dsptensor_2);
49834985
if (AEE_SUCCESS != hexagon_error) {
49844986
GGMLHEXAGON_LOG_WARN("ggmlop %s computation fail on cdsp", ggml_op_name(op->op));
@@ -4991,49 +4993,56 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49914993
// =================================================================================================
49924994
// section-8: implementation of ggml-hexagon backend according to specification in ggml backend subsystem
49934995
// =================================================================================================
4994-
//hwaccel through cDSP
4995-
static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
4996-
ggmlhexagon_dump_op_info(op_tensor);
4996+
static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
4997+
ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context;
4998+
GGML_UNUSED(ctx);
4999+
if (op_tensor->op == GGML_OP_NONE) {
5000+
return true;
5001+
}
5002+
49975003
if (!ggmlhexagon_k_op_caps[ggmlhexagon_get_op_index(op_tensor)].supported) {
49985004
return false;
49995005
}
50005006

5001-
struct ggml_tensor * src0 = op_tensor->src[0];
5002-
struct ggml_tensor * src1 = op_tensor->src[1];
5003-
const int64_t ne00 = op_tensor->src[0]->ne[0];
5004-
uint32_t src0_rank = ggml_n_dims(src0);
5007+
const struct ggml_tensor * src0 = op_tensor->src[0];
5008+
const struct ggml_tensor * src1 = op_tensor->src[1];
5009+
int64_t ne00 = 0;
5010+
uint32_t src0_rank = 0;
50055011
uint32_t src1_rank = 0;
5012+
if (nullptr != src0) {
5013+
src0_rank = ggml_n_dims(src0);
5014+
ne00 = src0->ne[0];
5015+
}
50065016
if (nullptr != src1) {
50075017
src1_rank = ggml_n_dims(src1);
50085018
}
50095019

5010-
//available in the early stage, should be removed in the product stage
5011-
bool support = false;
5012-
if (g_hexagon_appcfg.enable_mulmat_cdsp)
5013-
support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
5014-
else
5015-
support = (op_tensor->op == GGML_OP_ADD);
5016-
if (!support) {
5017-
return false;
5018-
}
5019-
50205020
switch (op_tensor->op) {
50215021
case GGML_OP_ADD:
50225022
case GGML_OP_SUB:
50235023
{
50245024
if (!ggml_are_same_shape(src0, src1)) {
50255025
return false;
50265026
}
5027-
break;
5027+
5028+
//FIXME:remove this filter
5029+
if (ne00 < 32)
5030+
return false;
5031+
5032+
ggmlhexagon_dump_op_info(op_tensor);
5033+
//FIXME:remove this filter
5034+
return ggmlhexagon_same_types(ctx, op_tensor);
50285035
}
50295036
case GGML_OP_MUL_MAT:
50305037
{
5038+
GGMLHEXAGON_LOG_DEBUG("mulmat");
50315039
ggmlhexagon_dump_op_info(op_tensor);
50325040

50335041
//TODO:3d&4d matrix mulmat on cDSP
50345042
if (src0_rank != 2)
50355043
return false;
50365044

5045+
ggmlhexagon_dump_op_info(op_tensor);
50375046
if (g_hexagon_appcfg.enable_q_mulmat)
50385047
return (src0->type == GGML_TYPE_F32 || ggml_is_quantized(src0->type))
50395048
&& (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
@@ -5043,27 +5052,28 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx,
50435052
default:
50445053
break;
50455054
}
5046-
return (src0->type == GGML_TYPE_F32) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
5055+
return false;
50475056
}
50485057

5049-
static bool ggmlbackend_can_handle_op(const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
5058+
static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
5059+
ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context;
50505060
if (op_tensor->op == GGML_OP_NONE) {
50515061
return true;
50525062
}
50535063

5054-
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
5055-
return ggmlhexagon_can_handle_op(ctx, op_tensor);
5056-
}
5057-
50585064
if (!ggmlqnn_k_op_caps[ggmlhexagon_get_op_index(op_tensor)].supported) {
50595065
return false;
50605066
}
50615067

50625068
struct ggml_tensor * src0 = op_tensor->src[0];
50635069
struct ggml_tensor * src1 = op_tensor->src[1];
5064-
const int64_t ne00 = op_tensor->src[0]->ne[0];
5065-
uint32_t src0_rank = ggml_n_dims(src0);
5070+
int64_t ne00 = 0;
5071+
uint32_t src0_rank = 0;
50665072
uint32_t src1_rank = 0;
5073+
if (nullptr != src0) {
5074+
src0_rank = ggml_n_dims(src0);
5075+
ne00 = src0->ne[0];
5076+
}
50675077
if (nullptr != src1) {
50685078
src1_rank = ggml_n_dims(src1);
50695079
}
@@ -5542,6 +5552,11 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
55425552
GGMLHEXAGON_LOG_DEBUG("user's specified hexagon_backend in cfgfile = %d", g_hexagon_appcfg.hexagon_backend);
55435553
GGMLHEXAGON_LOG_DEBUG("user's sepcified qnn runtime lib path in cfgfile = %s", g_hexagon_appcfg.runtimelib_path);
55445554

5555+
if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach) {
5556+
GGMLHEXAGON_LOG_INFO("HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend");
5557+
return nullptr;
5558+
}
5559+
55455560
if (nullptr == params) {
55465561
GGMLHEXAGON_LOG_DEBUG("program specified param is nullptr");
55475562
dev_index = (g_hexagon_appcfg.hexagon_backend > 0) ? g_hexagon_appcfg.hexagon_backend : 0;
@@ -5600,11 +5615,6 @@ static ggml_backend_buffer_t ggml_backend_hexagon_device_buffer_from_host_ptr(gg
56005615
GGML_UNUSED(max_tensor_size);
56015616
}
56025617

5603-
static bool ggml_backend_hexagon_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
5604-
ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *) dev->context;
5605-
return (ggmlbackend_can_handle_op(ctx,op));
5606-
}
5607-
56085618
static bool ggml_backend_hexagon_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
56095619
GGML_UNUSED(dev);
56105620
return ggml_backend_buft_is_host(buft);
@@ -5620,7 +5630,7 @@ static struct ggml_backend_device_i ggml_backend_hexagon_device_interface = {
56205630
/* .get_buffer_type = */ ggml_backend_hexagon_device_get_buffer_type,
56215631
/* .get_host_buffer_type = */ nullptr,
56225632
/* .buffer_from_host_ptr = */ ggml_backend_hexagon_device_buffer_from_host_ptr,
5623-
/* .supports_op = */ ggml_backend_hexagon_device_supports_op,
5633+
/* .supports_op = */ nullptr,
56245634
/* .supports_buft = */ ggml_backend_hexagon_device_supports_buft,
56255635
/* .offload_op = */ nullptr,
56265636
/* .event_new = */ nullptr,
@@ -5719,23 +5729,33 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
57195729

57205730
//case-2: normal scenario, such as llama-cli or UI applicaton
57215731
ggmlhexagon_load_cfg();
5722-
GGMLHEXAGON_LOG_INFO("hwaccel approach=%d(%s)", g_hexagon_appcfg.hwaccel_approach,
5732+
GGMLHEXAGON_LOG_DEBUG("hwaccel approach=%d(%s)", g_hexagon_appcfg.hwaccel_approach,
57235733
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
5724-
GGMLHEXAGON_LOG_INFO("user's specified hexagon_backend=%d", g_hexagon_appcfg.hexagon_backend);
5725-
GGMLHEXAGON_LOG_INFO("user's specified runtime lib path=%s", g_hexagon_appcfg.runtimelib_path);
5734+
GGMLHEXAGON_LOG_DEBUG("user's specified hexagon_backend=%d", g_hexagon_appcfg.hexagon_backend);
5735+
GGMLHEXAGON_LOG_DEBUG("user's specified runtime lib path=%s", g_hexagon_appcfg.runtimelib_path);
57265736
if (g_hexagon_appcfg.hexagon_backend >= GGML_HEXAGON_MAX_DEVICES) {
5727-
GGMLHEXAGON_LOG_INFO("assume default ggml backend");
5737+
GGMLHEXAGON_LOG_INFO("using default ggml backend");
57285738
GGMLHEXAGON_LOG_DEBUG("leave ggml_backend_hexagon_reg");
57295739
return nullptr;
57305740
}
57315741

5742+
if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach) {
5743+
GGMLHEXAGON_LOG_INFO("HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend");
5744+
return nullptr;
5745+
}
5746+
57325747
{
57335748
static std::mutex mutex;
57345749
std::lock_guard<std::mutex> lock(mutex);
57355750
if (!initialized) {
57365751
ggml_backend_hexagon_reg_context * ctx = new ggml_backend_hexagon_reg_context;
57375752

57385753
for (int i = 0; i < ggml_backend_hexagon_get_device_count(); i++) {
5754+
if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
5755+
ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_cdsp;
5756+
} else {
5757+
ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
5758+
}
57395759
ggml_backend_dev_t dev = new ggml_backend_device {
57405760
/* .iface = */ ggml_backend_hexagon_device_interface,
57415761
/* .reg = */ &reg,
@@ -5763,18 +5783,18 @@ const char * ggml_backend_hexagon_get_devname(size_t dev_num) {
57635783
if (dev_num == HEXAGON_BACKEND_GGML)
57645784
return "ggml";
57655785
else
5766-
return "ggml-hexagon";
5786+
return "HEXAGON_BACKEND_CDSP";
57675787
}
57685788

57695789
switch (dev_num) {
57705790
case HEXAGON_BACKEND_QNNCPU:
5771-
return "QNN-CPU";
5791+
return "HEXAGON_BACKEND_QNN_CPU";
57725792
case HEXAGON_BACKEND_QNNGPU:
5773-
return "QNN-GPU";
5793+
return "HEXAGON_BACKEND_QNN_GPU";
57745794
case HEXAGON_BACKEND_QNNNPU:
5775-
return "QNN-NPU";
5795+
return "HEXAGON_BACKEND_QNN_NPU";
57765796
case HEXAGON_BACKEND_GGML:
5777-
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
5797+
return "ggml"; //"fake" QNN backend, used for compare performance between hexagon backend and the default ggml backend
57785798
default:
57795799
return "unknown";
57805800
}
@@ -5826,6 +5846,11 @@ ggml_backend_t ggml_backend_hexagon_init(size_t device, const char * qnn_lib_pat
58265846
if (nullptr == qnn_lib_path)
58275847
return nullptr;
58285848

5849+
if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach) {
5850+
GGMLHEXAGON_LOG_INFO("HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend");
5851+
return nullptr;
5852+
}
5853+
58295854
GGMLHEXAGON_LOG_DEBUG("device %d", device);
58305855
GGMLHEXAGON_LOG_DEBUG("qnn_lib_path %s", qnn_lib_path);
58315856
if (device >= GGML_HEXAGON_MAX_DEVICES) {

0 commit comments

Comments
 (0)