Skip to content

Commit 4c16180

Browse files
hexagon: overhaul error handling in the session/device allocation
this should handle all failure paths in the session allocation.
1 parent 4187523 commit 4c16180

File tree

1 file changed

+90
-41
lines changed

1 file changed

+90
-41
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 90 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -216,20 +216,26 @@ static inline void hex_format_op_names(char * str, const struct ggml_tensor * t)
216216
// ** backend sessions
217217

218218
struct ggml_hexagon_session {
219-
ggml_hexagon_session(int dev_id);
220-
~ggml_hexagon_session();
219+
ggml_hexagon_session(int dev_id) noexcept(false);
220+
~ggml_hexagon_session() noexcept(true);
221+
222+
void allocate(int dev_id) noexcept(false);
223+
void release() noexcept(true);
221224

222225
ggml_backend_buffer_type buffer_type;
223226
ggml_backend_buffer_type repack_buffer_type;
224227

225-
std::string name;
226-
remote_handle64 handle;
227-
dspqueue_t queue;
228-
uint32_t session_id;
229-
uint32_t domain_id;
230-
uint64_t queue_id;
231-
int dev_id;
232-
228+
std::string name;
229+
remote_handle64 handle;
230+
dspqueue_t queue;
231+
uint32_t session_id;
232+
uint32_t domain_id;
233+
uint64_t queue_id;
234+
int dev_id;
235+
bool valid_session;
236+
bool valid_handle;
237+
bool valid_queue;
238+
bool valid_iface;
233239
std::atomic<int> op_pending;
234240
uint32_t prof_usecs;
235241
uint32_t prof_cycles;
@@ -349,15 +355,15 @@ struct ggml_backend_hexagon_buffer_context {
349355
this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
350356
if (!this->base) {
351357
GGML_LOG_ERROR("ggml-hex: %s failed to allocate buffer : size %zu\n", sess->name.c_str(), size);
352-
return;
358+
throw std::runtime_error("ggml-hex: rpcmem_alloc failed (see log for details)");
353359
}
354360

355361
this->fd = rpcmem_to_fd(this->base);
356362
if (this->fd < 0) {
357363
GGML_LOG_ERROR("ggml-hex: %s failed to get FD for buffer %p\n", sess->name.c_str(), (void *) this->base);
358364
rpcmem_free(this->base);
359365
this->base = NULL;
360-
return;
366+
throw std::runtime_error("ggml-hex: rpcmem_to_fd failed (see log for details)");
361367
}
362368

363369
HEX_VERBOSE("ggml-hex: %s allocated buffer: base %p size %zu fd %d repack %d\n", sess->name.c_str(),
@@ -1457,7 +1463,12 @@ static ggml_backend_buffer_type_i ggml_backend_hexagon_repack_buffer_type_interf
14571463
/* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host,
14581464
};
14591465

1460-
ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
1466+
void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
1467+
this->valid_session = false;
1468+
this->valid_handle = false;
1469+
this->valid_queue = false;
1470+
this->valid_iface = false;
1471+
14611472
this->domain_id = 3; // Default for CDSP, updated after the session is created
14621473
this->session_id = 0; // Default for CDSP, updated after the session is created
14631474
this->dev_id = dev_id;
@@ -1472,7 +1483,8 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
14721483

14731484
domain * my_domain = get_domain(this->domain_id);
14741485
if (my_domain == NULL) {
1475-
GGML_ABORT("ggml-hex: unable to get domain struct for CDSP\n");
1486+
GGML_LOG_ERROR("ggml-hex: unable to get domain struct for CDSP\n");
1487+
throw std::runtime_error("ggml-hex: failed to get CDSP domain (see log for details)");
14761488
}
14771489

14781490
// Create new session
@@ -1485,12 +1497,14 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
14851497

14861498
int err = remote_session_control(FASTRPC_RESERVE_NEW_SESSION, (void *) &n, sizeof(n));
14871499
if (err != AEE_SUCCESS) {
1488-
GGML_ABORT("ggml-hex: remote_session_control failed to reserve new session %d : error 0x%x\n", dev_id, err);
1500+
GGML_LOG_ERROR("ggml-hex: failed to reserve new session %d : error 0x%x\n", dev_id, err);
1501+
throw std::runtime_error("ggml-hex: remote_session_control(new-sess) failed (see log for details)");
14891502
}
14901503

14911504
// Save the IDs
14921505
this->session_id = n.session_id;
14931506
this->domain_id = n.effective_domain_id;
1507+
this->valid_session = true;
14941508
}
14951509

14961510
// Get session URI
@@ -1510,7 +1524,8 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
15101524

15111525
int err = remote_session_control(FASTRPC_GET_URI, (void *) &u, sizeof(u));
15121526
if (err != AEE_SUCCESS) {
1513-
GGML_ABORT("ggml-hex: remote_session_control failed to get URI for session %d : error 0x%x\n", dev_id, err);
1527+
GGML_LOG_ERROR("ggml-hex: failed to get URI for session %d : error 0x%x\n", dev_id, err);
1528+
throw std::runtime_error("ggml-hex: remote_session_control(get-uri) failed (see log for details)");
15141529
}
15151530
}
15161531

@@ -1521,17 +1536,20 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
15211536
u.enable = 1;
15221537
int err = remote_session_control(DSPRPC_CONTROL_UNSIGNED_MODULE, (void *) &u, sizeof(u));
15231538
if (err != AEE_SUCCESS) {
1524-
GGML_ABORT("ggml-hex: remote_session_control failed to enable unsigned PD for session %d : error 0x%x\n",
1525-
dev_id, err);
1539+
GGML_LOG_ERROR("ggml-hex: failed to enable unsigned PD for session %d : error 0x%x\n", dev_id, err);
1540+
throw std::runtime_error("ggml-hex: remote_session_control(unsign) failed (see log for details)");
15261541
}
15271542
}
15281543

15291544
// Open session
15301545
int err = htp_iface_open(session_uri, &this->handle);
15311546
if (err != AEE_SUCCESS) {
1532-
GGML_ABORT("ggml-hex: failed to open session %d : error 0x%x\n", dev_id, err);
1547+
GGML_LOG_ERROR("ggml-hex: failed to open session %d : error 0x%x\n", dev_id, err);
1548+
throw std::runtime_error("ggml-hex: failed to open session (see log for details)");
15331549
}
15341550

1551+
this->valid_handle = true;
1552+
15351553
GGML_LOG_INFO("ggml-hex: new session: %s : session-id %d domain-id %d uri %s handle 0x%lx\n", this->name.c_str(),
15361554
this->session_id, this->domain_id, session_uri, (unsigned long) this->handle);
15371555

@@ -1542,7 +1560,7 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
15421560

15431561
int err = remote_handle64_control(this->handle, DSPRPC_CONTROL_LATENCY, (void *) &l, sizeof(l));
15441562
if (err != 0) {
1545-
GGML_LOG_ERROR("ggml-hex: failed to enable fastrpc QOS mode: 0x%08x\n", (unsigned) err);
1563+
GGML_LOG_WARN("ggml-hex: failed to enable fastrpc QOS mode: 0x%08x\n", (unsigned) err);
15461564
}
15471565
}
15481566

@@ -1554,15 +1572,18 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
15541572
htp_packet_callback, htp_error_callback,
15551573
(void *) this, // Callback context
15561574
&queue);
1557-
15581575
if (err != 0) {
1559-
GGML_ABORT("ggml-hex: dspqueue_create failed: 0x%08x\n", (unsigned) err);
1576+
GGML_LOG_ERROR("ggml-hex: %s dspqueue_create failed: 0x%08x\n", this->name.c_str(), (unsigned) err);
1577+
throw std::runtime_error("ggml-hex: failed to create dspqueue (see log for details)");
15601578
}
15611579

1580+
this->valid_queue = true;
1581+
15621582
// Export queue for use on the DSP
15631583
err = dspqueue_export(queue, &this->queue_id);
15641584
if (err != 0) {
1565-
GGML_ABORT("ggml-hex: dspqueue_export failed: 0x%08x\n", (unsigned) err);
1585+
GGML_LOG_ERROR("ggml-hex: dspqueue_export failed: 0x%08x\n", (unsigned) err);
1586+
throw std::runtime_error("ggml-hex: dspqueue export failed (see log for details)");
15661587
}
15671588

15681589
if (opt_etm) {
@@ -1577,23 +1598,23 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id) {
15771598
// listening for packets in a callback.
15781599
err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx);
15791600
if (err != 0) {
1580-
GGML_ABORT("ggml-hex: htp_iface_start failed: 0x%08x\n", (unsigned) err);
1601+
GGML_LOG_ERROR("ggml-hex: failed to start session: 0x%08x\n", (unsigned) err);
1602+
throw std::runtime_error("ggml-hex: iface start failed (see log for details)");
15811603
}
1582-
1583-
buffer_type.iface = ggml_backend_hexagon_buffer_type_interface;
1584-
buffer_type.context = new ggml_backend_hexagon_buffer_type_context(this->name, this);
1585-
1586-
repack_buffer_type.iface = ggml_backend_hexagon_repack_buffer_type_interface;
1587-
repack_buffer_type.context = new ggml_backend_hexagon_buffer_type_context(this->name + "-REPACK", this);
1604+
this->valid_iface = true;
15881605
}
15891606

1590-
ggml_hexagon_session::~ggml_hexagon_session() {
1607+
void ggml_hexagon_session::release() noexcept(true) {
15911608
GGML_LOG_INFO("ggml-hex: releasing session: %s\n", this->name.c_str());
15921609

1610+
int err;
1611+
15931612
// Stop the DSP-side service and close the queue
1594-
int err = htp_iface_stop(this->handle);
1595-
if (err != 0) {
1596-
GGML_ABORT("ggml-hex: htp_iface_stop failed: 0x%08x\n", (unsigned) err);
1613+
if (this->valid_iface) {
1614+
err = htp_iface_stop(this->handle);
1615+
if (err != 0) {
1616+
GGML_ABORT("ggml-hex: htp_iface_stop failed: 0x%08x\n", (unsigned) err);
1617+
}
15971618
}
15981619

15991620
if (opt_etm) {
@@ -1603,12 +1624,35 @@ ggml_hexagon_session::~ggml_hexagon_session() {
16031624
}
16041625
}
16051626

1606-
err = dspqueue_close(queue);
1607-
if (err != 0) {
1608-
GGML_ABORT("ggml-hex: dspqueue_close failed: 0x%08x\n", (unsigned) err);
1627+
if (this->valid_queue) {
1628+
err = dspqueue_close(queue);
1629+
if (err != 0) {
1630+
GGML_ABORT("ggml-hex: dspqueue_close failed: 0x%08x\n", (unsigned) err);
1631+
}
1632+
}
1633+
1634+
if (this->valid_handle) {
1635+
htp_iface_close(this->handle);
1636+
}
1637+
}
1638+
1639+
ggml_hexagon_session::ggml_hexagon_session(int dev_id) noexcept(false) {
1640+
try {
1641+
allocate(dev_id);
1642+
1643+
buffer_type.iface = ggml_backend_hexagon_buffer_type_interface;
1644+
buffer_type.context = new ggml_backend_hexagon_buffer_type_context(this->name, this);
1645+
1646+
repack_buffer_type.iface = ggml_backend_hexagon_repack_buffer_type_interface;
1647+
repack_buffer_type.context = new ggml_backend_hexagon_buffer_type_context(this->name + "-REPACK", this);
1648+
} catch (std::exception const &exc) {
1649+
release();
1650+
throw;
16091651
}
1652+
}
16101653

1611-
htp_iface_close(this->handle);
1654+
ggml_hexagon_session::~ggml_hexagon_session() noexcept(true) {
1655+
release();
16121656
}
16131657

16141658
// ** backend interface
@@ -3599,7 +3643,12 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
35993643
for (size_t i = 0; i < opt_ndev; i++) {
36003644
devices[i].iface = ggml_backend_hexagon_device_i;
36013645
devices[i].reg = reg;
3602-
devices[i].context = new ggml_hexagon_session(i);
3646+
try {
3647+
devices[i].context = new ggml_hexagon_session(i);
3648+
} catch (std::exception const &exc) {
3649+
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
3650+
devices[i].context = nullptr;
3651+
}
36033652
}
36043653
}
36053654

@@ -3626,8 +3675,8 @@ static size_t ggml_backend_hexagon_reg_get_device_count(ggml_backend_reg_t reg)
36263675
static ggml_backend_dev_t ggml_backend_hexagon_reg_get_device(ggml_backend_reg_t reg, size_t index) {
36273676
auto hreg = static_cast<ggml_hexagon_registry *>(reg->context);
36283677

3629-
if (index >= opt_ndev) {
3630-
return NULL;
3678+
if (index >= opt_ndev || !hreg->devices[index].context) {
3679+
return nullptr;
36313680
}
36323681

36333682
return &hreg->devices[index];

0 commit comments

Comments
 (0)