11#include " ggml-rpc.h"
22#include " ggml-impl.h"
33#include " ggml-backend-impl.h"
4+ #include " xxhash.h"
45
56#include < cinttypes>
67#include < string>
@@ -80,6 +81,7 @@ enum rpc_cmd {
8081 RPC_CMD_FREE_BUFFER,
8182 RPC_CMD_BUFFER_CLEAR,
8283 RPC_CMD_SET_TENSOR,
84+ RPC_CMD_SET_TENSOR_HASH,
8385 RPC_CMD_GET_TENSOR,
8486 RPC_CMD_COPY_TENSOR,
8587 RPC_CMD_GRAPH_COMPUTE,
@@ -89,6 +91,9 @@ enum rpc_cmd {
8991 RPC_CMD_COUNT,
9092};
9193
94+ // Try RPC_CMD_SET_TENSOR_HASH first when data size is larger than this threshold
95+ const int HASH_THRESHOLD = 1024 * 1024 ;
96+
9297struct rpc_msg_get_alloc_size_req {
9398 rpc_tensor tensor;
9499};
@@ -135,6 +140,10 @@ struct rpc_msg_buffer_clear_req {
135140 uint8_t value;
136141};
137142
143+ struct rpc_msg_set_tensor_hash_rsp {
144+ uint8_t result;
145+ };
146+
138147struct rpc_msg_get_tensor_req {
139148 rpc_tensor tensor;
140149 uint64_t offset;
@@ -483,12 +492,23 @@ static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_
483492
484493static void ggml_backend_rpc_buffer_set_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
485494 ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
486- // input serialization format: | rpc_tensor | offset (8 bytes) | data (size bytes) |
495+ // input serialization format: | rpc_tensor | offset (8 bytes) | data (size bytes) OR hash (8 bytes)
487496 size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + size;
488497 std::vector<uint8_t > input (input_size, 0 );
489498 rpc_tensor rpc_tensor = serialize_tensor (tensor);
490499 memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
491500 memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
501+ if (size > HASH_THRESHOLD) {
502+ XXH64_hash_t hash = XXH64 (data, size, 0 );
503+ memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
504+ rpc_msg_set_tensor_hash_rsp response;
505+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input.data (), input.size (), &response, sizeof (response));
506+ GGML_ASSERT (status);
507+ if (response.result ) {
508+ // the server has the same data, no need to send it
509+ return ;
510+ }
511+ }
492512 memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), data, size);
493513 bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR, input.data (), input.size (), nullptr , 0 );
494514 GGML_ASSERT (status);
@@ -782,6 +802,7 @@ class rpc_server {
782802 bool free_buffer (const rpc_msg_free_buffer_req & request);
783803 bool buffer_clear (const rpc_msg_buffer_clear_req & request);
784804 bool set_tensor (const std::vector<uint8_t > & input);
805+ bool set_tensor_hash (const std::vector<uint8_t > & input, rpc_msg_set_tensor_hash_rsp & response);
785806 bool get_tensor (const rpc_msg_get_tensor_req & request, std::vector<uint8_t > & response);
786807 bool copy_tensor (const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
787808 bool graph_compute (const std::vector<uint8_t > & input, rpc_msg_graph_compute_rsp & response);
@@ -965,6 +986,15 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
965986 return true ;
966987}
967988
989+ bool rpc_server::set_tensor_hash (const std::vector<uint8_t > & input, rpc_msg_set_tensor_hash_rsp & response)
990+ {
991+ GGML_UNUSED (input);
992+ GGML_PRINT_DEBUG (" [%s]\n " , __func__);
993+ // TODO: implement
994+ response.result = 0 ;
995+ return true ;
996+ }
997+
968998bool rpc_server::init_tensor (const rpc_msg_init_tensor_req & request) {
969999 struct ggml_init_params params {
9701000 /* .mem_size =*/ ggml_tensor_overhead(),
@@ -1260,6 +1290,20 @@ static void rpc_serve_client(ggml_backend_t backend, sockfd_t sockfd, size_t fre
12601290 }
12611291 break ;
12621292 }
1293+ case RPC_CMD_SET_TENSOR_HASH: {
1294+ std::vector<uint8_t > input;
1295+ if (!recv_msg (sockfd, input)) {
1296+ return ;
1297+ }
1298+ rpc_msg_set_tensor_hash_rsp response;
1299+ if (!server.set_tensor_hash (input, response)) {
1300+ return ;
1301+ }
1302+ if (!send_msg (sockfd, &response, sizeof (response))) {
1303+ return ;
1304+ }
1305+ break ;
1306+ }
12631307 case RPC_CMD_INIT_TENSOR: {
12641308 rpc_msg_init_tensor_req request;
12651309 if (!recv_msg (sockfd, &request,sizeof (request))) {
0 commit comments