Skip to content

Commit efe67fd

Browse files
committed
server : improve error reporting
1 parent 3398305 commit efe67fd

File tree

1 file changed

+22
-7
lines changed

1 file changed

+22
-7
lines changed

tools/server/server.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3361,14 +3361,29 @@ struct server_context {
33613361
metrics.on_decoded(slots);
33623362

33633363
if (ret != 0) {
3364-
if (n_batch == 1 || ret < 0) {
3365-
// if you get here, it means the KV cache is full - try increasing it via the context size
3366-
SRV_ERR("failed to decode the batch: KV cache is full - try increasing it via the context size, i = %d, n_batch = %d, ret = %d\n", i, n_batch, ret);
3367-
for (auto & slot : slots) {
3368-
slot.release();
3369-
send_error(slot, "Input prompt is too big compared to KV size. Please try increasing KV size.");
3364+
{
3365+
std::string err;
3366+
3367+
if (n_batch == 1 && ret == 1) {
3368+
err = "Context size has been exceeded.";
3369+
}
3370+
3371+
if (ret == -1) {
3372+
err = "Invalid input batch.";
3373+
}
3374+
3375+
if (ret < -1) {
3376+
err = "Compute error.";
3377+
}
3378+
3379+
if (!err.empty()) {
3380+
SRV_ERR("%s, i = %d, n_batch = %d, ret = %d\n", err.c_str(), i, n_batch, ret);
3381+
for (auto & slot : slots) {
3382+
slot.release();
3383+
send_error(slot, err);
3384+
}
3385+
break;
33703386
}
3371-
break; // break loop of n_batch
33723387
}
33733388

33743389
// retry with half the batch size to try to find a free slot in the KV cache

0 commit comments

Comments
 (0)