Skip to content

Commit ac25732

Browse files
author
zhouwg
committed
ggml-hexagon: add perf function in hexagon kernerls on cDSP side
1 parent 2e0af66 commit ac25732

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5867,8 +5867,11 @@ static ggml_backend_dev_t ggml_backend_hexagon_reg_get_device(ggml_backend_reg_t
58675867

58685868
GGMLHEXAGON_LOG_DEBUG("index %d", index);
58695869
ggml_backend_hexagon_reg_context * ctx = (ggml_backend_hexagon_reg_context *)reg->context;
5870-
if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
5870+
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
58715871
GGML_ASSERT(g_hexagon_appcfg.hexagon_backend == HEXAGON_BACKEND_CDSP);
5872+
//here is the trick:
5873+
//there only 1 backend_device when g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP
5874+
//so return ctx->devices[0]
58725875
return ctx->devices[0];
58735876
} else {
58745877
GGML_ASSERT(index < ctx->devices.size());

ggml/src/ggml-hexagon/kernels/ggmlop_cdsp.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,25 @@ static void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, co
11491149

11501150
}
11511151

1152+
static inline uint64 hexagon_perf_get_time_us(void)
1153+
{
1154+
unsigned long long count;
1155+
asm volatile (" %0 = c31:30 " : "=r"(count));
1156+
return (uint64)(count) * 10ull / 192ull;
1157+
}
1158+
1159+
static void ggml_time_init(void) {
1160+
1161+
}
1162+
1163+
static int64_t ggml_time_ms(void) {
1164+
return hexagon_perf_get_time_us() * 1000;
1165+
}
1166+
1167+
int64_t ggml_time_us(void) {
1168+
return hexagon_perf_get_time_us();
1169+
}
1170+
11521171
// =================================================================================================
11531172
// section-4: ggml-hexagon kernel helper function
11541173
// =================================================================================================
@@ -1266,6 +1285,8 @@ static void ggml_compute_forward_add_f32(
12661285
const struct ggml_tensor * src1,
12671286
struct ggml_tensor * dst) {
12681287
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
1288+
uint64_t start_time = ggml_time_us();
1289+
12691290
memcpy(dst->ne, src1->ne, 16);
12701291
memcpy(dst->nb, src1->nb, 16);
12711292
ggmlhexagon_dump_tensor(src0, 1);
@@ -1328,6 +1349,11 @@ static void ggml_compute_forward_add_f32(
13281349
}
13291350
}
13301351
}
1352+
1353+
uint64_t end_time = ggml_time_us();
1354+
uint64_t duration = (end_time - start_time);
1355+
GGMLHEXAGON_LOG_DEBUG("duration %llu us", duration);
1356+
13311357
GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );
13321358
}
13331359

0 commit comments

Comments
 (0)