@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
151151 uint8_t value;
152152};
153153
154+ struct rpc_msg_set_tensor_hash_req {
155+ rpc_tensor tensor;
156+ uint64_t offset;
157+ uint64_t hash;
158+ };
159+
154160struct rpc_msg_set_tensor_hash_rsp {
155161 uint8_t result;
156162};
@@ -543,15 +549,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
543549 ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
544550 rpc_tensor rpc_tensor = serialize_tensor (tensor);
545551 if (size > HASH_THRESHOLD) {
546- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
547- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
548- std::vector<uint8_t > input (input_size, 0 );
549- uint64_t hash = fnv_hash ((const uint8_t *)data, size);
550- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
551- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
552- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
552+ rpc_msg_set_tensor_hash_req request;
553+ request.tensor = serialize_tensor (tensor);
554+ request.offset = offset;
555+ request.hash = fnv_hash ((const uint8_t *)data, size);
553556 rpc_msg_set_tensor_hash_rsp response;
554- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
557+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
555558 GGML_ASSERT (status);
556559 if (response.result ) {
557560 // the server has the same data, no need to send it
@@ -599,15 +602,12 @@ static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
599602
600603bool ggml_backend_rpc_buffer_load_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, size_t offset, uint64_t hash) {
601604 ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
602- rpc_tensor rpc_tensor = serialize_tensor (tensor);
603- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
604- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
605- std::vector<uint8_t > input (input_size, 0 );
606- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
607- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
608- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
605+ rpc_msg_set_tensor_hash_req request;
606+ request.tensor = serialize_tensor (tensor);
607+ request.offset = offset;
608+ request.hash = hash;
609609 rpc_msg_set_tensor_hash_rsp response;
610- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
610+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
611611 GGML_ASSERT (status);
612612 return response.result ;
613613}
@@ -874,7 +874,7 @@ class rpc_server {
874874 bool free_buffer (const rpc_msg_free_buffer_req & request);
875875 bool buffer_clear (const rpc_msg_buffer_clear_req & request);
876876 bool set_tensor (const std::vector<uint8_t > & input);
877- bool set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response);
877+ bool set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response);
878878 bool get_tensor (const rpc_msg_get_tensor_req & request, std::vector<uint8_t > & response);
879879 bool copy_tensor (const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
880880 bool graph_compute (const std::vector<uint8_t > & input, rpc_msg_graph_compute_rsp & response);
@@ -1111,18 +1111,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
11111111 return true ;
11121112}
11131113
1114- bool rpc_server::set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response)
1114+ bool rpc_server::set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response)
11151115{
1116- // serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
1117- if (input.size () != sizeof (rpc_tensor) + 16 ) {
1118- return false ;
1119- }
1120- const rpc_tensor * in_tensor = (const rpc_tensor *)input.data ();
1121- uint64_t offset;
1122- memcpy (&offset, input.data () + sizeof (rpc_tensor), sizeof (offset));
1123- const uint64_t * hash = (const uint64_t *)(input.data () + sizeof (rpc_tensor) + sizeof (offset));
11241116 std::vector<uint8_t > cached_file;
1125- if (!get_cached_file (* hash, cached_file)) {
1117+ if (!get_cached_file (request. hash , cached_file)) {
11261118 response.result = 0 ;
11271119 return true ;
11281120 }
@@ -1135,7 +1127,7 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11351127 ggml_context_ptr ctx_ptr { ggml_init (params) };
11361128 GGML_ASSERT (ctx_ptr != nullptr );
11371129 ggml_context * ctx = ctx_ptr.get ();
1138- ggml_tensor * tensor = deserialize_tensor (ctx, in_tensor );
1130+ ggml_tensor * tensor = deserialize_tensor (ctx, &request. tensor );
11391131 if (tensor == nullptr ) {
11401132 GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
11411133 return false ;
@@ -1147,13 +1139,15 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11471139 const size_t p0 = (size_t ) ggml_backend_buffer_get_base (tensor->buffer );
11481140 const size_t p1 = p0 + ggml_backend_buffer_get_size (tensor->buffer );
11491141
1150- if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1142+ if (request.tensor .data + request.offset < p0
1143+ || request.tensor .data + request.offset >= p1
1144+ || size > (p1 - request.tensor .data - request.offset )) {
11511145 GGML_LOG_ERROR (" [%s] tensor data region (data=0x%" PRIx64 " , offset=%" PRIu64 " , size=%zu, hash=0x%" PRIx64 " ) out of buffer bounds [0x%zx, 0x%zx)\n " ,
11521146 __func__, in_tensor->data , offset, size, *hash, p0, p1);
11531147 return false ;
11541148 }
11551149 }
1156- ggml_backend_tensor_set (tensor, cached_file.data (), offset, size);
1150+ ggml_backend_tensor_set (tensor, cached_file.data (), request. offset , size);
11571151 response.result = 1 ;
11581152 return true ;
11591153}
@@ -1513,12 +1507,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
15131507 break ;
15141508 }
15151509 case RPC_CMD_SET_TENSOR_HASH: {
1516- std::vector< uint8_t > input ;
1517- if (!recv_msg (sockfd, input )) {
1510+ rpc_msg_set_tensor_hash_req request ;
1511+ if (!recv_msg (sockfd, &request, sizeof (request) )) {
15181512 return ;
15191513 }
15201514 rpc_msg_set_tensor_hash_rsp response;
1521- if (!server.set_tensor_hash (input , response)) {
1515+ if (!server.set_tensor_hash (request , response)) {
15221516 return ;
15231517 }
15241518 if (!send_msg (sockfd, &response, sizeof (response))) {
0 commit comments