@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
151151 uint8_t value;
152152};
153153
154+ struct rpc_msg_set_tensor_hash_req {
155+ rpc_tensor tensor;
156+ uint64_t offset;
157+ uint64_t hash;
158+ };
159+
154160struct rpc_msg_set_tensor_hash_rsp {
155161 uint8_t result;
156162};
@@ -534,15 +540,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
534540 ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
535541 rpc_tensor rpc_tensor = serialize_tensor (tensor);
536542 if (size > HASH_THRESHOLD) {
537- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
538- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
539- std::vector<uint8_t > input (input_size, 0 );
540- uint64_t hash = fnv_hash ((const uint8_t *)data, size);
541- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
542- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
543- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
543+ rpc_msg_set_tensor_hash_req request;
544+ request.tensor = serialize_tensor (tensor);
545+ request.offset = offset;
546+ request.hash = fnv_hash ((const uint8_t *)data, size);
544547 rpc_msg_set_tensor_hash_rsp response;
545- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
548+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
546549 GGML_ASSERT (status);
547550 if (response.result ) {
548551 // the server has the same data, no need to send it
@@ -590,15 +593,12 @@ static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
590593
591594bool ggml_backend_rpc_buffer_load_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, size_t offset, uint64_t hash) {
592595 ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
593- rpc_tensor rpc_tensor = serialize_tensor (tensor);
594- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
595- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
596- std::vector<uint8_t > input (input_size, 0 );
597- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
598- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
599- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
596+ rpc_msg_set_tensor_hash_req request;
597+ request.tensor = serialize_tensor (tensor);
598+ request.offset = offset;
599+ request.hash = hash;
600600 rpc_msg_set_tensor_hash_rsp response;
601- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
601+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
602602 GGML_ASSERT (status);
603603 return response.result ;
604604}
@@ -865,7 +865,7 @@ class rpc_server {
865865 bool free_buffer (const rpc_msg_free_buffer_req & request);
866866 bool buffer_clear (const rpc_msg_buffer_clear_req & request);
867867 bool set_tensor (const std::vector<uint8_t > & input);
868- bool set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response);
868+ bool set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response);
869869 bool get_tensor (const rpc_msg_get_tensor_req & request, std::vector<uint8_t > & response);
870870 bool copy_tensor (const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
871871 bool graph_compute (const std::vector<uint8_t > & input, rpc_msg_graph_compute_rsp & response);
@@ -1087,18 +1087,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
10871087 return true ;
10881088}
10891089
1090- bool rpc_server::set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response)
1090+ bool rpc_server::set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response)
10911091{
1092- // serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
1093- if (input.size () != sizeof (rpc_tensor) + 16 ) {
1094- return false ;
1095- }
1096- const rpc_tensor * in_tensor = (const rpc_tensor *)input.data ();
1097- uint64_t offset;
1098- memcpy (&offset, input.data () + sizeof (rpc_tensor), sizeof (offset));
1099- const uint64_t * hash = (const uint64_t *)(input.data () + sizeof (rpc_tensor) + sizeof (offset));
11001092 std::vector<uint8_t > cached_file;
1101- if (!get_cached_file (* hash, cached_file)) {
1093+ if (!get_cached_file (request. hash , cached_file)) {
11021094 response.result = 0 ;
11031095 return true ;
11041096 }
@@ -1111,7 +1103,7 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11111103 ggml_context_ptr ctx_ptr { ggml_init (params) };
11121104 GGML_ASSERT (ctx_ptr != nullptr );
11131105 ggml_context * ctx = ctx_ptr.get ();
1114- ggml_tensor * tensor = deserialize_tensor (ctx, in_tensor );
1106+ ggml_tensor * tensor = deserialize_tensor (ctx, &request. tensor );
11151107 if (tensor == nullptr ) {
11161108 GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
11171109 return false ;
@@ -1123,11 +1115,13 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11231115 const size_t p0 = (size_t ) ggml_backend_buffer_get_base (tensor->buffer );
11241116 const size_t p1 = p0 + ggml_backend_buffer_get_size (tensor->buffer );
11251117
1126- if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1118+ if (request.tensor .data + request.offset < p0
1119+ || request.tensor .data + request.offset >= p1
1120+ || size > (p1 - request.tensor .data - request.offset )) {
11271121 GGML_ABORT (" [%s] tensor->data out of bounds\n " , __func__);
11281122 }
11291123 }
1130- ggml_backend_tensor_set (tensor, cached_file.data (), offset, size);
1124+ ggml_backend_tensor_set (tensor, cached_file.data (), request. offset , size);
11311125 response.result = 1 ;
11321126 return true ;
11331127}
@@ -1449,12 +1443,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
14491443 break ;
14501444 }
14511445 case RPC_CMD_SET_TENSOR_HASH: {
1452- std::vector< uint8_t > input ;
1453- if (!recv_msg (sockfd, input )) {
1446+ rpc_msg_set_tensor_hash_req request ;
1447+ if (!recv_msg (sockfd, &request, sizeof (request) )) {
14541448 return ;
14551449 }
14561450 rpc_msg_set_tensor_hash_rsp response;
1457- if (!server.set_tensor_hash (input , response)) {
1451+ if (!server.set_tensor_hash (request , response)) {
14581452 return ;
14591453 }
14601454 if (!send_msg (sockfd, &response, sizeof (response))) {
0 commit comments