Skip to content

Commit 1948ae8

Browse files
committed
Cleaned up and improved type/error handling.
1 parent c47dc70 commit 1948ae8

File tree

1 file changed

+21
-34
lines changed

1 file changed

+21
-34
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ struct rpc_msg_init_tensor_req {
110110
rpc_tensor tensor;
111111
};
112112

113-
struct rpc_msg_init_tensor_rsp {
114-
uint8_t result; // success/failure
115-
};
113+
//struct rpc_msg_init_tensor_rsp {
114+
// uint8_t result; // success/failure
115+
//};
116116

117117
struct rpc_msg_alloc_buffer_req {
118118
uint64_t size;
@@ -479,16 +479,15 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
479479
}
480480

481481
static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
482-
//UNUSED(buffer);
483482
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
484483

484+
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
485+
// Due to bandwidth constraints, we only call the server init tensor functions if necessary.
485486
if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0)) {
486-
// TODO: this check is due to MATRIX_ROW_PADDING in CUDA and should be generalized
487-
//GGML_ASSERT(tensor->ne[0] % 512 == 0 && "unsupported quantized tensor");
488487
rpc_msg_init_tensor_req request;
488+
489489
request.tensor = serialize_tensor(tensor);
490490

491-
//rpc_msg_init_tensor_rsp response;
492491
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
493492
GGML_ASSERT(status);
494493
}
@@ -603,11 +602,13 @@ static size_t ggml_backend_rpc_get_max_size(ggml_backend_buffer_type_t buft) {
603602
}
604603

605604
static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
605+
// See comments in init_tensor.
606606
if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0)) {
607607
ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context;
608608
auto sock = get_socket(buft_ctx->endpoint);
609609

610610
rpc_msg_get_alloc_size_req request;
611+
611612
request.tensor = serialize_tensor(tensor);
612613

613614
rpc_msg_get_alloc_size_rsp response;
@@ -812,41 +813,30 @@ class rpc_server {
812813
};
813814

814815
bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) {
815-
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
816+
ggml_backend_buffer_type_t buft;
816817
struct ggml_init_params params {
817818
/*.mem_size =*/ ggml_tensor_overhead(),
818819
/*.mem_buffer =*/ NULL,
819820
/*.no_alloc =*/ true,
820821
};
822+
821823
struct ggml_context * ctx = ggml_init(params);
822824
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
825+
823826
if (tensor == nullptr) {
824-
printf("Got nullptr\n");
827+
fprintf(stderr,"Null tensor pointer passed to server get_alloc_size function.\n");
825828
ggml_free(ctx);
826829
return false;
827830
}
828831

829-
printf("Getting buft\n");
830-
831-
//ggml_backend_buffer_get_alloc_size(tensor->buffer,tensor)
832-
833-
//if (tensor->buffer == nullptr) {
834-
// printf("Got null buffer\n");
835-
// response.alloc_size = 0;
836-
// ggml_free(ctx);
837-
// return true;
838-
//}
832+
if (tensor->buffer == nullptr) {
833+
//No buffer allocated.
834+
buft = ggml_backend_get_default_buffer_type(backend);
835+
} else {
836+
buft = tensor->buffer->buft;
837+
}
839838

840839
response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
841-
// Call the backend's buffer_type_get_alloc_size function
842-
//ggml_backend_buffer_type_t buft = tensor->buffer->buft;
843-
//if (buft && buft->iface.get_alloc_size) {
844-
// printf("Called buffer type get alloc size\n");
845-
// response.alloc_size = buft->iface.get_alloc_size(buft, tensor);
846-
//} else {
847-
// printf("Called ggml_nbytes");
848-
// response.alloc_size = ggml_nbytes(tensor);
849-
//}
850840

851841
ggml_free(ctx);
852842
return true;
@@ -996,20 +986,17 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
996986
struct ggml_context * ctx = ggml_init(params);
997987
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
998988
if (tensor == nullptr) {
999-
printf("Null tensor\n");
989+
fprintf(stderr,"Null tensor pointer passed to server init_tensor function.\n");
1000990
ggml_free(ctx);
1001991
return false;
1002992
}
1003993

1004-
printf("about to call buffer\n");
1005-
1006-
//ggml_backend_init_tensor
1007-
1008994
// Call the backend's buffer_init_tensor function
1009995
ggml_backend_buffer_t buffer = tensor->buffer;
1010996
if (buffer && buffer->iface.init_tensor) {
1011-
printf("Calling buffer iface function\n");
1012997
buffer->iface.init_tensor(buffer, tensor);
998+
} else {
999+
fprintf(stderr,"Null buffer for tensor passed to init_tensor function\n");
10131000
}
10141001

10151002
ggml_free(ctx);

0 commit comments

Comments
 (0)