@@ -217,6 +217,9 @@ struct ggml_hexagon_session {
217217    void  allocate (int  dev_id) noexcept (false );
218218    void  release () noexcept (true );
219219
220+     void  enqueue (struct  htp_general_req  &req, struct  dspqueue_buffer  *bufs, uint32_t  n_bufs, bool  sync = false );
221+     void  flush ();
222+ 
220223    ggml_backend_buffer_type buffer_type;
221224    ggml_backend_buffer_type repack_buffer_type;
222225
@@ -237,38 +240,61 @@ struct ggml_hexagon_session {
237240    uint32_t          prof_pkts;
238241};
239242
240- //  Packet callback
241- static  void  htp_packet_callback (dspqueue_t  queue, AEEResult error, void  * context) {
242-     auto  sess = static_cast <ggml_hexagon_session *>(context);
243+ void  ggml_hexagon_session::enqueue (struct  htp_general_req  &req, struct  dspqueue_buffer  *bufs, uint32_t  n_bufs, bool  sync) {
244+     //  Bump pending flag (cleared in the session::flush once we get the responce)
245+     this ->op_pending ++;  //  atomic inc
246+ 
247+     int  err = dspqueue_write (this ->queue ,
248+                              0 ,                       //  flags - the framework will autoset this
249+                              n_bufs,                  //  number of buffers
250+                              bufs,                    //  buffer references
251+                              sizeof (req),
252+                              (const  uint8_t  *) &req,  //  Message
253+                              1000000                   //  Timeout
254+     );
255+ 
256+     if  (err != 0 ) {
257+         GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " this ->name .c_str (), (unsigned ) err);
258+     }
259+ 
260+     if  (sync) {
261+         flush ();
262+     }
263+ }
264+ 
265+ //  Flush HTP response queue i.e wait for all outstanding requests to complete
266+ void  ggml_hexagon_session::flush () {
267+     dspqueue_t  q = this ->queue ;
243268
244269    //  Repeatedly read packets from the queue until it's empty. We don't
245270    //  necessarily get a separate callback for each packet, and new packets
246271    //  may arrive while we're processing the previous one.
247272
248-     while  (1 ) {
273+     while  (this -> op_pending ) {
249274        struct  htp_general_rsp  rsp;
250275        uint32_t                rsp_size;
251276        uint32_t                flags;
252277
253278        struct  dspqueue_buffer  bufs[HTP_MAX_PACKET_BUFFERS];
254279        uint32_t                n_bufs;
255280
256-         //  Read packet from queue
257-         int  err = dspqueue_read_noblock (queue, &flags,
258-                                         HTP_MAX_PACKET_BUFFERS,  //  Maximum number of buffer references
259-                                         &n_bufs,                 //  Number of buffer references
260-                                         bufs,                    //  Buffer references
261-                                         sizeof (rsp),             //  Max message length
262-                                         &rsp_size,               //  Message length
263-                                         (uint8_t  *) &rsp);
264- 
265-         if  (err == AEE_EWOULDBLOCK) {
266-             //  Consumed all packets available for now
267-             return ;
281+         //  Read response packet from queue
282+         int  err = dspqueue_read (q, &flags,
283+                                    HTP_MAX_PACKET_BUFFERS,  //  Maximum number of buffer references
284+                                    &n_bufs,                 //  Number of buffer references
285+                                    bufs,                    //  Buffer references
286+                                    sizeof (rsp),             //  Max message length
287+                                    &rsp_size,               //  Message length
288+                                    (uint8_t  *) &rsp,
289+                                    1000000 );                //  Timeout
290+ 
291+         if  (err == AEE_EEXPIRED) {
292+             //  TODO: might need to bail out if the HTP is stuck on something
293+             continue ;
268294        }
269295
270296        if  (err != 0 ) {
271-             GGML_ABORT (" ggml-hex: dspqueue_read_noblock  failed: 0x%08x\n " unsigned ) err);
297+             GGML_ABORT (" ggml-hex: dspqueue_read  failed: 0x%08x\n " unsigned ) err);
272298        }
273299
274300        //  Basic sanity checks
@@ -281,21 +307,15 @@ static void htp_packet_callback(dspqueue_t queue, AEEResult error, void * contex
281307            //  TODO: handle errors
282308        }
283309
284-         //  FIXME : update profiling implementation
285-         sess ->prof_usecs   = rsp.prof_usecs ;
286-         sess ->prof_cycles  = rsp.prof_cycles ;
287-         sess ->prof_pkts    = rsp.prof_pkts ;
310+         //  TODO : update profiling implementation, currently only works for opt_opsync mode 
311+         this ->prof_usecs   = rsp.prof_usecs ;
312+         this ->prof_cycles  = rsp.prof_cycles ;
313+         this ->prof_pkts    = rsp.prof_pkts ;
288314
289-         sess ->op_pending --;  //  atomic dec
315+         this ->op_pending --;  //  atomic dec
290316    }
291317}
292318
293- //  Error callback - simply terminates with an error. Used where we don't
294- //  expect errors.
295- [[noreturn]] static  void  htp_error_callback (dspqueue_t  queue, AEEResult error, void  * context) {
296-     GGML_ABORT (" ggml-hex: dspcall general error 0x%x: for queue %p\n " void  *) queue);
297- }
298- 
299319//  ** backend buffers
300320
301321struct  ggml_backend_hexagon_buffer_type_context  {
@@ -1564,7 +1584,8 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
15641584                          0 ,              //  Flags
15651585                          128  * 1024 ,     //  Request  queue size (in bytes)
15661586                          64  * 1024 ,      //  Response queue size (in bytes)
1567-                           htp_packet_callback, htp_error_callback,
1587+                           nullptr ,        //  Read packet callback (we handle reads explicitly)
1588+                           nullptr ,        //  Error callback (we handle errors during reads)
15681589                          (void  *) this ,  //  Callback context
15691590                          &queue);
15701591    if  (err != 0 ) {
@@ -2255,27 +2276,7 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
22552276    }
22562277
22572278    if  ((opt_opmask & HTP_OPMASK_QUEUE)) {
2258-         //  Bump pending flag (cleared in the callback once we get the responce)
2259-         sess->op_pending ++;  //  atomic inc
2260- 
2261-         int  err = dspqueue_write (sess->queue ,
2262-                                  0 ,                       //  flags - the framework will autoset this
2263-                                  3 ,                       //  number of buffers
2264-                                  bufs,                    //  buffer references
2265-                                  sizeof (req),
2266-                                  (const  uint8_t  *) &req,  //  Message
2267-                                  1000000                   //  Timeout
2268-         );
2269- 
2270-         if  (err != 0 ) {
2271-             GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " name .c_str (), (unsigned ) err);
2272-         }
2273-     }
2274- 
2275-     if  (opt_opsync) {
2276-         while  (sess->op_pending ) {
2277-             ;
2278-         }
2279+         sess->enqueue (req, bufs, 3 , opt_opsync);
22792280    }
22802281
22812282    t2 = ggml_time_us ();
@@ -2394,27 +2395,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
23942395    }
23952396
23962397    if  ((opt_opmask & HTP_OPMASK_QUEUE)) {
2397-         //  Bump pending flag (cleared in the callback once we get the responce)
2398-         sess->op_pending ++;  //  atomic inc
2399- 
2400-         int  err = dspqueue_write (sess->queue ,
2401-                                  0 ,                       //  flags - the framework will autoset this
2402-                                  4 ,                       //  number of buffers
2403-                                  bufs,                    //  buffer references
2404-                                  sizeof (req),
2405-                                  (const  uint8_t  *) &req,  //  Message
2406-                                  1000000                   //  Timeout
2407-         );
2408- 
2409-         if  (err != 0 ) {
2410-             GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " name .c_str (), (unsigned ) err);
2411-         }
2412-     }
2413- 
2414-     if  (opt_opsync) {
2415-         while  (sess->op_pending ) {
2416-             ;
2417-         }
2398+         sess->enqueue (req, bufs, 4 , opt_opsync);
24182399    }
24192400
24202401    t2 = ggml_time_us ();
@@ -2540,26 +2521,7 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
25402521    }
25412522
25422523    if  ((opt_opmask & HTP_OPMASK_QUEUE)) {
2543-         //  Bump pending flag (cleared in the callback once we get the responce)
2544-         sess->op_pending ++;  //  atomic inc
2545- 
2546-         int  err = dspqueue_write (sess->queue ,
2547-                                  0 ,                       //  flags - the framework will autoset this
2548-                                  3 ,                       //  number of buffers
2549-                                  bufs,                    //  buffer references
2550-                                  sizeof (req),
2551-                                  (const  uint8_t  *) &req,  //  Message
2552-                                  1000000 );                //  Timeout
2553- 
2554-         if  (0  != err) {
2555-             GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " name .c_str (), (unsigned ) err);
2556-         }
2557-     }
2558- 
2559-     if  (opt_opsync) {
2560-         while  (sess->op_pending ) {
2561-             ;
2562-         }
2524+         sess->enqueue (req, bufs, 3 , opt_opsync);
25632525    }
25642526
25652527    t2 = ggml_time_us ();
@@ -2681,26 +2643,7 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
26812643    }
26822644
26832645    if  ((opt_opmask & HTP_OPMASK_QUEUE)) {
2684-         //  Bump pending flag (cleared in the callback once we get the responce)
2685-         sess->op_pending ++;  //  atomic inc
2686- 
2687-         int  err = dspqueue_write (sess->queue ,
2688-                                  0 ,                       //  flags - the framework will autoset this
2689-                                  4 ,                       //  number of buffers
2690-                                  bufs,                    //  buffer references
2691-                                  sizeof (req),
2692-                                  (const  uint8_t  *) &req,  //  Message
2693-                                  1000000 );                //  Timeout
2694- 
2695-         if  (0  != err) {
2696-             GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " name .c_str (), (unsigned ) err);
2697-         }
2698-     }
2699- 
2700-     if  (opt_opsync) {
2701-         while  (sess->op_pending ) {
2702-             ;
2703-         }
2646+         sess->enqueue (req, bufs, 4 , opt_opsync);
27042647    }
27052648
27062649    t2 = ggml_time_us ();
@@ -2863,26 +2806,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
28632806    }
28642807
28652808    if  ((opt_opmask & HTP_OPMASK_QUEUE)) {
2866-         //  Bump pending flag (cleared in the callback once we get the responce)
2867-         sess->op_pending ++;  //  atomic inc
2868- 
2869-         int  err = dspqueue_write (sess->queue ,
2870-                                  0 ,                       //  flags - the framework will autoset this
2871-                                  n_bufs,                  //  number of buffers
2872-                                  bufs,                    //  buffer references
2873-                                  sizeof (req),
2874-                                  (const  uint8_t  *) &req,  //  Message
2875-                                  1000000 );                //  Timeout
2876- 
2877-         if  (0  != err) {
2878-             GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " name .c_str (), (unsigned ) err);
2879-         }
2880-     }
2881- 
2882-     if  (opt_opsync) {
2883-         while  (sess->op_pending ) {
2884-             ;
2885-         }
2809+         sess->enqueue (req, bufs, n_bufs, opt_opsync);
28862810    }
28872811
28882812    t2 = ggml_time_us ();
@@ -3036,26 +2960,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
30362960    }
30372961
30382962    if  ((opt_opmask & HTP_OPMASK_QUEUE)) {
3039-         //  Bump pending flag (cleared in the callback once we get the responce)
3040-         sess->op_pending ++;  //  atomic inc
3041- 
3042-         int  err = dspqueue_write (sess->queue ,
3043-                                  0 ,                       //  flags - the framework will autoset this
3044-                                  n_bufs,                  //  number of buffers
3045-                                  bufs,                    //  buffer references
3046-                                  sizeof (req),
3047-                                  (const  uint8_t  *) &req,  //  Message
3048-                                  1000000 );                //  Timeout
3049- 
3050-         if  (0  != err) {
3051-             GGML_ABORT (" ggml-hex: %s dspqueue_write failed: 0x%08x\n " name .c_str (), (unsigned ) err);
3052-         }
3053-     }
3054- 
3055-     if  (opt_opsync) {
3056-         while  (sess->op_pending ) {
3057-             ;
3058-         }
2963+         sess->enqueue (req, bufs, n_bufs, opt_opsync);
30592964    }
30602965
30612966    t2 = ggml_time_us ();
@@ -3200,9 +3105,7 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
32003105    }
32013106
32023107    //  Wait until all pending ops complete
3203-     while  (sess->op_pending ) {
3204-         ;
3205-     }
3108+     sess->flush ();
32063109
32073110    return  GGML_STATUS_SUCCESS;
32083111}
@@ -3213,9 +3116,7 @@ static void ggml_backend_hexagon_synchronize(ggml_backend_t backend) {
32133116    HEX_VERBOSE (" ggml-hex: %s synchronize\n " name .c_str ());
32143117
32153118    //  Wait until all pending ops complete
3216-     while  (sess->op_pending ) {
3217-         ;
3218-     }
3119+     sess->flush ();
32193120}
32203121
32213122struct  node_info  {
0 commit comments