Skip to content

Commit 2b86354

Browse files
hexagon: remove dspqueue callbacks and do all read processing inplace
1 parent 8284efc commit 2b86354

File tree

1 file changed

+58
-157
lines changed

1 file changed

+58
-157
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 58 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ struct ggml_hexagon_session {
217217
void allocate(int dev_id) noexcept(false);
218218
void release() noexcept(true);
219219

220+
void enqueue(struct htp_general_req &req, struct dspqueue_buffer *bufs, uint32_t n_bufs, bool sync = false);
221+
void flush();
222+
220223
ggml_backend_buffer_type buffer_type;
221224
ggml_backend_buffer_type repack_buffer_type;
222225

@@ -237,38 +240,61 @@ struct ggml_hexagon_session {
237240
uint32_t prof_pkts;
238241
};
239242

240-
// Packet callback
241-
static void htp_packet_callback(dspqueue_t queue, AEEResult error, void * context) {
242-
auto sess = static_cast<ggml_hexagon_session *>(context);
243+
void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_buffer *bufs, uint32_t n_bufs, bool sync) {
244+
// Bump pending flag (cleared in the session::flush once we get the responce)
245+
this->op_pending++; // atomic inc
246+
247+
int err = dspqueue_write(this->queue,
248+
0, // flags - the framework will autoset this
249+
n_bufs, // number of buffers
250+
bufs, // buffer references
251+
sizeof(req),
252+
(const uint8_t *) &req, // Message
253+
1000000 // Timeout
254+
);
255+
256+
if (err != 0) {
257+
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", this->name.c_str(), (unsigned) err);
258+
}
259+
260+
if (sync) {
261+
flush();
262+
}
263+
}
264+
265+
// Flush HTP response queue i.e wait for all outstanding requests to complete
266+
void ggml_hexagon_session::flush() {
267+
dspqueue_t q = this->queue;
243268

244269
// Repeatedly read packets from the queue until it's empty. We don't
245270
// necessarily get a separate callback for each packet, and new packets
246271
// may arrive while we're processing the previous one.
247272

248-
while (1) {
273+
while (this->op_pending) {
249274
struct htp_general_rsp rsp;
250275
uint32_t rsp_size;
251276
uint32_t flags;
252277

253278
struct dspqueue_buffer bufs[HTP_MAX_PACKET_BUFFERS];
254279
uint32_t n_bufs;
255280

256-
// Read packet from queue
257-
int err = dspqueue_read_noblock(queue, &flags,
258-
HTP_MAX_PACKET_BUFFERS, // Maximum number of buffer references
259-
&n_bufs, // Number of buffer references
260-
bufs, // Buffer references
261-
sizeof(rsp), // Max message length
262-
&rsp_size, // Message length
263-
(uint8_t *) &rsp);
264-
265-
if (err == AEE_EWOULDBLOCK) {
266-
// Consumed all packets available for now
267-
return;
281+
// Read response packet from queue
282+
int err = dspqueue_read(q, &flags,
283+
HTP_MAX_PACKET_BUFFERS, // Maximum number of buffer references
284+
&n_bufs, // Number of buffer references
285+
bufs, // Buffer references
286+
sizeof(rsp), // Max message length
287+
&rsp_size, // Message length
288+
(uint8_t *) &rsp,
289+
1000000); // Timeout
290+
291+
if (err == AEE_EEXPIRED) {
292+
// TODO: might need to bail out if the HTP is stuck on something
293+
continue;
268294
}
269295

270296
if (err != 0) {
271-
GGML_ABORT("ggml-hex: dspqueue_read_noblock failed: 0x%08x\n", (unsigned) err);
297+
GGML_ABORT("ggml-hex: dspqueue_read failed: 0x%08x\n", (unsigned) err);
272298
}
273299

274300
// Basic sanity checks
@@ -281,21 +307,15 @@ static void htp_packet_callback(dspqueue_t queue, AEEResult error, void * contex
281307
// TODO: handle errors
282308
}
283309

284-
// FIXME: update profiling implementation
285-
sess->prof_usecs = rsp.prof_usecs;
286-
sess->prof_cycles = rsp.prof_cycles;
287-
sess->prof_pkts = rsp.prof_pkts;
310+
// TODO: update profiling implementation, currently only works for opt_opsync mode
311+
this->prof_usecs = rsp.prof_usecs;
312+
this->prof_cycles = rsp.prof_cycles;
313+
this->prof_pkts = rsp.prof_pkts;
288314

289-
sess->op_pending--; // atomic dec
315+
this->op_pending--; // atomic dec
290316
}
291317
}
292318

293-
// Error callback - simply terminates with an error. Used where we don't
294-
// expect errors.
295-
[[noreturn]] static void htp_error_callback(dspqueue_t queue, AEEResult error, void * context) {
296-
GGML_ABORT("ggml-hex: dspcall general error 0x%x: for queue %p\n", error, (void *) queue);
297-
}
298-
299319
// ** backend buffers
300320

301321
struct ggml_backend_hexagon_buffer_type_context {
@@ -1564,7 +1584,8 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
15641584
0, // Flags
15651585
128 * 1024, // Request queue size (in bytes)
15661586
64 * 1024, // Response queue size (in bytes)
1567-
htp_packet_callback, htp_error_callback,
1587+
nullptr, // Read packet callback (we handle reads explicitly)
1588+
nullptr, // Error callback (we handle errors during reads)
15681589
(void *) this, // Callback context
15691590
&queue);
15701591
if (err != 0) {
@@ -2255,27 +2276,7 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
22552276
}
22562277

22572278
if ((opt_opmask & HTP_OPMASK_QUEUE)) {
2258-
// Bump pending flag (cleared in the callback once we get the responce)
2259-
sess->op_pending++; // atomic inc
2260-
2261-
int err = dspqueue_write(sess->queue,
2262-
0, // flags - the framework will autoset this
2263-
3, // number of buffers
2264-
bufs, // buffer references
2265-
sizeof(req),
2266-
(const uint8_t *) &req, // Message
2267-
1000000 // Timeout
2268-
);
2269-
2270-
if (err != 0) {
2271-
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", sess->name.c_str(), (unsigned) err);
2272-
}
2273-
}
2274-
2275-
if (opt_opsync) {
2276-
while (sess->op_pending) {
2277-
;
2278-
}
2279+
sess->enqueue(req, bufs, 3, opt_opsync);
22792280
}
22802281

22812282
t2 = ggml_time_us();
@@ -2394,27 +2395,7 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
23942395
}
23952396

23962397
if ((opt_opmask & HTP_OPMASK_QUEUE)) {
2397-
// Bump pending flag (cleared in the callback once we get the responce)
2398-
sess->op_pending++; // atomic inc
2399-
2400-
int err = dspqueue_write(sess->queue,
2401-
0, // flags - the framework will autoset this
2402-
4, // number of buffers
2403-
bufs, // buffer references
2404-
sizeof(req),
2405-
(const uint8_t *) &req, // Message
2406-
1000000 // Timeout
2407-
);
2408-
2409-
if (err != 0) {
2410-
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", sess->name.c_str(), (unsigned) err);
2411-
}
2412-
}
2413-
2414-
if (opt_opsync) {
2415-
while (sess->op_pending) {
2416-
;
2417-
}
2398+
sess->enqueue(req, bufs, 4, opt_opsync);
24182399
}
24192400

24202401
t2 = ggml_time_us();
@@ -2540,26 +2521,7 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
25402521
}
25412522

25422523
if ((opt_opmask & HTP_OPMASK_QUEUE)) {
2543-
// Bump pending flag (cleared in the callback once we get the responce)
2544-
sess->op_pending++; // atomic inc
2545-
2546-
int err = dspqueue_write(sess->queue,
2547-
0, // flags - the framework will autoset this
2548-
3, // number of buffers
2549-
bufs, // buffer references
2550-
sizeof(req),
2551-
(const uint8_t *) &req, // Message
2552-
1000000); // Timeout
2553-
2554-
if (0 != err) {
2555-
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", sess->name.c_str(), (unsigned) err);
2556-
}
2557-
}
2558-
2559-
if (opt_opsync) {
2560-
while (sess->op_pending) {
2561-
;
2562-
}
2524+
sess->enqueue(req, bufs, 3, opt_opsync);
25632525
}
25642526

25652527
t2 = ggml_time_us();
@@ -2681,26 +2643,7 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
26812643
}
26822644

26832645
if ((opt_opmask & HTP_OPMASK_QUEUE)) {
2684-
// Bump pending flag (cleared in the callback once we get the responce)
2685-
sess->op_pending++; // atomic inc
2686-
2687-
int err = dspqueue_write(sess->queue,
2688-
0, // flags - the framework will autoset this
2689-
4, // number of buffers
2690-
bufs, // buffer references
2691-
sizeof(req),
2692-
(const uint8_t *) &req, // Message
2693-
1000000); // Timeout
2694-
2695-
if (0 != err) {
2696-
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", sess->name.c_str(), (unsigned) err);
2697-
}
2698-
}
2699-
2700-
if (opt_opsync) {
2701-
while (sess->op_pending) {
2702-
;
2703-
}
2646+
sess->enqueue(req, bufs, 4, opt_opsync);
27042647
}
27052648

27062649
t2 = ggml_time_us();
@@ -2863,26 +2806,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
28632806
}
28642807

28652808
if ((opt_opmask & HTP_OPMASK_QUEUE)) {
2866-
// Bump pending flag (cleared in the callback once we get the responce)
2867-
sess->op_pending++; // atomic inc
2868-
2869-
int err = dspqueue_write(sess->queue,
2870-
0, // flags - the framework will autoset this
2871-
n_bufs, // number of buffers
2872-
bufs, // buffer references
2873-
sizeof(req),
2874-
(const uint8_t *) &req, // Message
2875-
1000000); // Timeout
2876-
2877-
if (0 != err) {
2878-
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", sess->name.c_str(), (unsigned) err);
2879-
}
2880-
}
2881-
2882-
if (opt_opsync) {
2883-
while (sess->op_pending) {
2884-
;
2885-
}
2809+
sess->enqueue(req, bufs, n_bufs, opt_opsync);
28862810
}
28872811

28882812
t2 = ggml_time_us();
@@ -3036,26 +2960,7 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
30362960
}
30372961

30382962
if ((opt_opmask & HTP_OPMASK_QUEUE)) {
3039-
// Bump pending flag (cleared in the callback once we get the responce)
3040-
sess->op_pending++; // atomic inc
3041-
3042-
int err = dspqueue_write(sess->queue,
3043-
0, // flags - the framework will autoset this
3044-
n_bufs, // number of buffers
3045-
bufs, // buffer references
3046-
sizeof(req),
3047-
(const uint8_t *) &req, // Message
3048-
1000000); // Timeout
3049-
3050-
if (0 != err) {
3051-
GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", sess->name.c_str(), (unsigned) err);
3052-
}
3053-
}
3054-
3055-
if (opt_opsync) {
3056-
while (sess->op_pending) {
3057-
;
3058-
}
2963+
sess->enqueue(req, bufs, n_bufs, opt_opsync);
30592964
}
30602965

30612966
t2 = ggml_time_us();
@@ -3200,9 +3105,7 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
32003105
}
32013106

32023107
// Wait until all pending ops complete
3203-
while (sess->op_pending) {
3204-
;
3205-
}
3108+
sess->flush();
32063109

32073110
return GGML_STATUS_SUCCESS;
32083111
}
@@ -3213,9 +3116,7 @@ static void ggml_backend_hexagon_synchronize(ggml_backend_t backend) {
32133116
HEX_VERBOSE("ggml-hex: %s synchronize\n", sess->name.c_str());
32143117

32153118
// Wait until all pending ops complete
3216-
while (sess->op_pending) {
3217-
;
3218-
}
3119+
sess->flush();
32193120
}
32203121

32213122
struct node_info {

0 commit comments

Comments
 (0)