Skip to content

Commit ac7a334

Browse files
hexagon: there is no need to ref/deref the buffers at this point
We're not going to release the buffers without flushing the session queue. So there is no need to inc/dec the refcounts for every request. We also don't need to include those bufs in the response.
1 parent 2b86354 commit ac7a334

File tree

2 files changed

+71
-200
lines changed

2 files changed

+71
-200
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2226,7 +2226,7 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
22262226
bufs[0].ptr = src0->data;
22272227
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
22282228
bufs[0].size = ggml_nbytes(src0);
2229-
bufs[0].flags = DSPQUEUE_BUFFER_FLAG_REF;
2229+
bufs[0].flags = 0;
22302230

22312231
// Second buffer Input Activations. This is a buffer that the CPU
22322232
// writes and the DSP reads, so we'll need to flush CPU caches and
@@ -2236,8 +2236,7 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
22362236
bufs[1].ptr = src1->data;
22372237
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
22382238
bufs[1].size = ggml_nbytes(src1);
2239-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2240-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2239+
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
22412240
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
22422241

22432242
// Third buffer Output Activations. We'll handle DSP
@@ -2248,7 +2247,7 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
22482247
bufs[2].ptr = dst->data;
22492248
bufs[2].offset = (uint8_t *) dst->data - dst_buf->base;
22502249
bufs[2].size = ggml_nbytes(dst);
2251-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_REF | DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
2250+
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
22522251

22532252
// Primary DSP session from the src0 (normally weight) tensor
22542253
auto sess = src0_buf->sess;
@@ -2332,7 +2331,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
23322331
bufs[0].ptr = src0->data;
23332332
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
23342333
bufs[0].size = ggml_nbytes(src0);
2335-
bufs[0].flags = DSPQUEUE_BUFFER_FLAG_REF;
2334+
bufs[0].flags = 0;
23362335

23372336
// Second buffer Input Activations. This is a buffer that the CPU
23382337
// writes and the DSP reads, so we'll need to flush CPU caches and
@@ -2342,8 +2341,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
23422341
bufs[1].ptr = src1->data;
23432342
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
23442343
bufs[1].size = ggml_nbytes(src1);
2345-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2346-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2344+
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
23472345
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
23482346

23492347
// Third buffer expert IDs. This is a buffer that the CPU
@@ -2354,8 +2352,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
23542352
bufs[2].ptr = src2->data;
23552353
bufs[2].offset = (uint8_t *) src2->data - src2_buf->base;
23562354
bufs[2].size = ggml_nbytes(src2);
2357-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2358-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2355+
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
23592356
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
23602357

23612358
// Forth buffer Output Activations. We'll handle DSP
@@ -2366,7 +2363,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
23662363
bufs[3].ptr = dst->data;
23672364
bufs[3].offset = (uint8_t *) dst->data - dst_buf->base;
23682365
bufs[3].size = ggml_nbytes(dst);
2369-
bufs[3].flags = (DSPQUEUE_BUFFER_FLAG_REF | DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
2366+
bufs[3].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
23702367

23712368
// Primary DSP session from the src0 (normally weight) tensor
23722369
auto sess = src0_buf->sess;
@@ -2468,8 +2465,7 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
24682465
bufs[0].ptr = src0->data;
24692466
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
24702467
bufs[0].size = ggml_nbytes(src0);
2471-
bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2472-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2468+
bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
24732469
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
24742470

24752471
// Second buffer = Second Operand of Binary op
@@ -2481,8 +2477,7 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
24812477
bufs[1].ptr = src1->data;
24822478
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
24832479
bufs[1].size = ggml_nbytes(src1);
2484-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2485-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2480+
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
24862481
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
24872482

24882483
// Third buffer = Output Activations. We'll handle DSP
@@ -2493,7 +2488,7 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
24932488
bufs[2].ptr = dst->data;
24942489
bufs[2].offset = (uint8_t *) dst->data - dst_buf->base;
24952490
bufs[2].size = ggml_nbytes(dst);
2496-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_REF | DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
2491+
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
24972492

24982493
// Primary DSP session from the src0 tensor
24992494
ggml_hexagon_session * sess = src0_buf->sess;
@@ -2586,34 +2581,31 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
25862581
bufs[0].ptr = src0->data;
25872582
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
25882583
bufs[0].size = ggml_nbytes(src0);
2589-
bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2590-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2584+
bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
25912585
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
25922586

25932587
// Second buffer = experts bias
25942588
bufs[1].fd = src1_buf->fd;
25952589
bufs[1].ptr = src1->data;
25962590
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
25972591
bufs[1].size = ggml_nbytes(src1);
2598-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2599-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2592+
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
26002593
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
26012594

26022595
// Third buffer = activated experts
26032596
bufs[2].fd = src2_buf->fd;
26042597
bufs[2].ptr = src2->data;
26052598
bufs[2].offset = (uint8_t *) src2->data - src2_buf->base;
26062599
bufs[2].size = ggml_nbytes(src2);
2607-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2608-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2600+
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
26092601
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
26102602

26112603
// Forth buffer = output activations
26122604
bufs[3].fd = dst_buf->fd;
26132605
bufs[3].ptr = dst->data;
26142606
bufs[3].offset = (uint8_t *) dst->data - dst_buf->base;
26152607
bufs[3].size = ggml_nbytes(dst);
2616-
bufs[3].flags = (DSPQUEUE_BUFFER_FLAG_REF | DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
2608+
bufs[3].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
26172609

26182610
// Primary DSP session from the src0 tensor
26192611
ggml_hexagon_session * sess = src0_buf->sess;
@@ -2741,8 +2733,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
27412733
bufs[n_bufs].ptr = src0->data;
27422734
bufs[n_bufs].offset = (uint8_t *) src0->data - src0_buf->base;
27432735
bufs[n_bufs].size = ggml_nbytes(src0);
2744-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2745-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2736+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
27462737
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
27472738
++n_bufs;
27482739

@@ -2757,8 +2748,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
27572748
bufs[n_bufs].ptr = src1->data;
27582749
bufs[n_bufs].offset = (uint8_t *) src1->data - src1_buf->base;
27592750
bufs[n_bufs].size = ggml_nbytes(src1);
2760-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2761-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2751+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
27622752
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
27632753
++n_bufs;
27642754
}
@@ -2773,7 +2763,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
27732763
bufs[n_bufs].ptr = dst->data;
27742764
bufs[n_bufs].offset = (uint8_t *) dst->data - dst_buf->base;
27752765
bufs[n_bufs].size = ggml_nbytes(dst);
2776-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
2766+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
27772767
++n_bufs;
27782768

27792769
// Primary DSP session from the src0 tensor
@@ -2880,8 +2870,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
28802870
bufs[n_bufs].ptr = src0->data;
28812871
bufs[n_bufs].offset = (uint8_t *) src0->data - src0_buf->base;
28822872
bufs[n_bufs].size = ggml_nbytes(src0);
2883-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2884-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2873+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
28852874
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
28862875
++n_bufs;
28872876

@@ -2895,8 +2884,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
28952884
bufs[n_bufs].ptr = src1->data;
28962885
bufs[n_bufs].offset = (uint8_t *) src1->data - src1_buf->base;
28972886
bufs[n_bufs].size = ggml_nbytes(src1);
2898-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2899-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2887+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
29002888
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
29012889
++n_bufs;
29022890

@@ -2911,8 +2899,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
29112899
bufs[n_bufs].ptr = src2->data;
29122900
bufs[n_bufs].offset = (uint8_t *) src2->data - src2_buf->base;
29132901
bufs[n_bufs].size = ggml_nbytes(src2);
2914-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | // Take a reference
2915-
DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
2902+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
29162903
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
29172904
++n_bufs;
29182905
}
@@ -2927,7 +2914,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
29272914
bufs[n_bufs].ptr = dst->data;
29282915
bufs[n_bufs].offset = (uint8_t *) dst->data - dst_buf->base;
29292916
bufs[n_bufs].size = ggml_nbytes(dst);
2930-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_REF | DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
2917+
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
29312918
++n_bufs;
29322919

29332920
// Primary DSP session from the src0 tensor

0 commit comments

Comments
 (0)