From b54628062c9dffbd9490d04de75ca5b318777793 Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Sat, 19 Apr 2025 14:06:55 +0300 Subject: [PATCH 1/8] fix(rpc): Improve input validation and error handling The `rpc-server` was vulnerable to Denial of Service attacks via several RPC commands (`SET_TENSOR`, `GRAPH_COMPUTE`, etc.). Malformed messages could trigger failed assertions (e.g., invalid `ggml_type`) or out-of-bounds reads/writes leading to `GGML_ABORT` calls, crashing the server process. This PR introduces robust input validation and replaces `abort()` calls with graceful error handling: - **Type Validation:** `deserialize_tensor` now checks if the `tensor->type` is within the valid `GGML_TYPE_COUNT` range *before* calling `ggml_new_tensor_4d`. Returns `nullptr` on invalid type. - **Bounds Checks:** Replaced `GGML_ABORT` in `set_tensor`, `set_tensor_hash`, and `get_tensor` handlers with error logging and returning `false` when data/offset parameters are out of buffer bounds. - **Size Checks:** Added safe arithmetic checks (for overflow) in `graph_compute` when calculating required message sizes based on client-provided `n_nodes` and `n_tensors`. Returns early if the reported sizes conflict with the actual message size or would lead to overflow. - **Error Propagation:** - `create_node` now checks for `nullptr` return values from `deserialize_tensor` and its recursive calls, propagating `nullptr` upwards on failure. Uses `find` instead of `at` for safer map access. - `copy_tensor` now checks for `nullptr` from `deserialize_tensor` and sets the response status to failure if deserialization or bounds checks fail. - `graph_compute` now checks for `nullptr` return from `create_node` and returns failure status correctly. The final return value now reflects the actual computation status. These changes improve the RPC server's resilience against malformed client requests, preventing crashes and ensuring errors are handled more gracefully. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 138 +++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 15 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 9023eb0919690..ad69e38efa49e 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -982,8 +982,21 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) { } ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rpc_tensor * tensor) { + // Validate tensor type before using it + if (tensor->type < 0 || tensor->type >= GGML_TYPE_COUNT) { + GGML_LOG_ERROR("[%s] invalid tensor type received: %u\n", __func__, tensor->type); + return nullptr; + } + ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); + + // ggml_new_tensor_4d might fail if dimensions are invalid, although less likely to crash than invalid type + if (result == nullptr) { + GGML_LOG_ERROR("[%s] ggml_new_tensor_4d failed for type %u\\n", __func__, tensor->type); + return nullptr; + } + for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) { result->nb[i] = tensor->nb[i]; } @@ -1043,7 +1056,10 @@ bool rpc_server::set_tensor(const std::vector & input) { const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer); if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) { - GGML_ABORT("[%s] tensor->data out of bounds\n", __func__); + // Replace GGML_ABORT with logging and error return + GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu) out of buffer bounds [0x%zx, 0x%zx)\n", + __func__, in_tensor->data, offset, size, p0, p1); + return false; } } @@ -1118,7 +1134,11 @@ bool rpc_server::set_tensor_hash(const std::vector & input, rpc_msg_set const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer); if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) { - GGML_ABORT("[%s] tensor->data out of bounds\n", __func__); + // Replace GGML_ABORT with logging and error return + GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n", + __func__, in_tensor->data, offset, size, *hash, p0, p1); + response.result = 0; + return false; } } ggml_backend_tensor_set(tensor, cached_file.data(), offset, size); @@ -1183,7 +1203,10 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector< if (request.tensor.data + request.offset < p0 || request.tensor.data + request.offset >= p1 || request.size > (p1 - request.tensor.data - request.offset)) { - GGML_ABORT("[%s] tensor->data out of bounds\n", __func__); + // Replace GGML_ABORT with logging and error return + GGML_LOG_ERROR("[%s] requested tensor region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%" PRIu64 ") out of buffer bounds [0x%zx, 0x%zx)\n", + __func__, request.tensor.data, request.offset, request.size, p0, p1); + return false; } } @@ -1206,6 +1229,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co ggml_tensor * dst = deserialize_tensor(ctx, &request.dst); if (src == nullptr || dst == nullptr) { GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__); + response.result = 0; return false; } @@ -1223,6 +1247,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co dst_data + src_size, dst_base, dst_base + dst_buf_sz); + response.result = 0; return false; } @@ -1243,7 +1268,14 @@ ggml_tensor * rpc_server::create_node(uint64_t id, if (tensor_map.find(id) != tensor_map.end()) { return tensor_map[id]; } - const rpc_tensor * tensor = tensor_ptrs.at(id); + // Safely find the tensor pointer + auto it_ptr = tensor_ptrs.find(id); + if (it_ptr == tensor_ptrs.end()) { + GGML_LOG_ERROR("[%s] tensor id %" PRIu64 " not found in provided tensors\n", __func__, id); + return nullptr; + } + const rpc_tensor * tensor = it_ptr->second; + struct ggml_tensor * result = deserialize_tensor(ctx, tensor); if (result == nullptr) { return nullptr; @@ -1251,35 +1283,103 @@ ggml_tensor * rpc_server::create_node(uint64_t id, tensor_map[id] = result; for (int i = 0; i < GGML_MAX_SRC; i++) { result->src[i] = create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map); + // Check if recursive call failed + if (tensor->src[i] != 0 && result->src[i] == nullptr) { + GGML_LOG_ERROR("[%s] failed to create source node %d (src_id=%" PRIu64 ") for node id %" PRIu64 "\n", + __func__, i, tensor->src[i], id); + // Must return nullptr to signal failure up the call stack + return nullptr; + } } result->view_src = create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map); + // Check if recursive call failed + if (tensor->view_src != 0 && result->view_src == nullptr) { + GGML_LOG_ERROR("[%s] failed to create view_src node (view_src_id=%" PRIu64 ") for node id %" PRIu64 "\n", + __func__, tensor->view_src, id); + // Must return nullptr to signal failure up the call stack + return nullptr; + } result->view_offs = tensor->view_offs; return result; } bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph_compute_rsp & response) { // serialization format: - // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | - if (input.size() < sizeof(uint32_t)) { + // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t)) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | + + // Perform robust size checks with overflow protection + const size_t min_header_size = sizeof(uint32_t); + if (input.size() < min_header_size) { + GGML_LOG_ERROR("[%s] input message too small for n_nodes header: %zu bytes\n", __func__, input.size()); + response.result = GGML_STATUS_FAILED; return false; } uint32_t n_nodes; memcpy(&n_nodes, input.data(), sizeof(n_nodes)); - if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) { + + // Calculate required size for nodes array + size_t nodes_array_bytes = n_nodes * sizeof(uint64_t); + + // Calculate required size up to n_tensors field safely + size_t required_size_before_tensors = min_header_size; + + // Check for overflow before adding nodes_array_bytes + if (SIZE_MAX - required_size_before_tensors < nodes_array_bytes) { + GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 1: n_nodes=%u\n", __func__, n_nodes); + response.result = GGML_STATUS_FAILED; // Use correct status + return false; + } + required_size_before_tensors += nodes_array_bytes; + + const size_t n_tensors_field_size = sizeof(uint32_t); + // Check for overflow before adding n_tensors_field_size + if (SIZE_MAX - required_size_before_tensors < n_tensors_field_size) { + GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 2: n_nodes=%u\n", __func__, n_nodes); + response.result = GGML_STATUS_FAILED; // Use correct status return false; } - const uint64_t * nodes = (const uint64_t *)(input.data() + sizeof(n_nodes)); + required_size_before_tensors += n_tensors_field_size; + + if (input.size() < required_size_before_tensors) { + GGML_LOG_ERROR("[%s] input message too small for nodes array or n_tensors header: %zu bytes, needed %zu\n", __func__, input.size(), required_size_before_tensors); + response.result = GGML_STATUS_FAILED; // Use correct status + return false; + } + + // Read n_tensors + const uint64_t * nodes_ptr = (const uint64_t *)(input.data() + sizeof(n_nodes)); uint32_t n_tensors; - memcpy(&n_tensors, input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t), sizeof(n_tensors)); - if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) { + memcpy(&n_tensors, input.data() + min_header_size + nodes_array_bytes, sizeof(n_tensors)); + + // Calculate required size for tensors array + size_t tensors_array_bytes = n_tensors * sizeof(rpc_tensor); + + // Calculate total required size safely + size_t required_total_size = required_size_before_tensors; + + // Check for overflow before adding tensors_array_bytes + if (SIZE_MAX - required_total_size < tensors_array_bytes) { + GGML_LOG_ERROR("[%s] integer overflow calculating total required size: n_nodes=%u, n_tensors=%u\n", __func__, n_nodes, n_tensors); + response.result = GGML_STATUS_FAILED; + return false; + } + required_total_size += tensors_array_bytes; + + if (input.size() < required_total_size) { + GGML_LOG_ERROR("[%s] input message too small for tensors array: %zu bytes, needed %zu\n", __func__, input.size(), required_total_size); + response.result = GGML_STATUS_FAILED; return false; } - const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors)); + + // Pointers are now safe to use based on size checks + const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + required_size_before_tensors); GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors); - size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); + // Estimate buffer size for context + size_t ctx_buf_size = ggml_tensor_overhead()*((size_t)n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); + struct ggml_init_params params = { - /*.mem_size =*/ buf_size, + /*.mem_size =*/ ctx_buf_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; @@ -1295,12 +1395,20 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph std::unordered_map tensor_map; for (uint32_t i = 0; i < n_nodes; i++) { int64_t id; - memcpy(&id, &nodes[i], sizeof(id)); + memcpy(&id, &nodes_ptr[i], sizeof(id)); graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map); + // Check if create_node failed (e.g., due to invalid type or missing ID) + if (graph->nodes[i] == nullptr) { + GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id); + response.result = GGML_STATUS_FAILED; + // No need to free ctx, ggml_context_ptr handles it. + return false; + } } ggml_status status = ggml_backend_graph_compute(backend, graph); response.result = status; - return true; + // Return true only if computation succeeded + return status == GGML_STATUS_SUCCESS; } rpc_server::~rpc_server() { From cd054aa27111bf132b88c1a2710d66608c339261 Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Wed, 23 Apr 2025 18:58:33 +0300 Subject: [PATCH 2/8] refactor(rpc): address pr comments removed comments and unnecessary returns Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index ad69e38efa49e..954c51be328f8 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -1056,7 +1056,6 @@ bool rpc_server::set_tensor(const std::vector & input) { const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer); if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) { - // Replace GGML_ABORT with logging and error return GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu) out of buffer bounds [0x%zx, 0x%zx)\n", __func__, in_tensor->data, offset, size, p0, p1); return false; @@ -1134,10 +1133,8 @@ bool rpc_server::set_tensor_hash(const std::vector & input, rpc_msg_set const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer); if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) { - // Replace GGML_ABORT with logging and error return GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n", __func__, in_tensor->data, offset, size, *hash, p0, p1); - response.result = 0; return false; } } @@ -1203,7 +1200,6 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector< if (request.tensor.data + request.offset < p0 || request.tensor.data + request.offset >= p1 || request.size > (p1 - request.tensor.data - request.offset)) { - // Replace GGML_ABORT with logging and error return GGML_LOG_ERROR("[%s] requested tensor region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%" PRIu64 ") out of buffer bounds [0x%zx, 0x%zx)\n", __func__, request.tensor.data, request.offset, request.size, p0, p1); return false; @@ -1229,7 +1225,6 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co ggml_tensor * dst = deserialize_tensor(ctx, &request.dst); if (src == nullptr || dst == nullptr) { GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__); - response.result = 0; return false; } @@ -1247,7 +1242,6 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co dst_data + src_size, dst_base, dst_base + dst_buf_sz); - response.result = 0; return false; } @@ -1407,8 +1401,7 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph } ggml_status status = ggml_backend_graph_compute(backend, graph); response.result = status; - // Return true only if computation succeeded - return status == GGML_STATUS_SUCCESS; + return true; } rpc_server::~rpc_server() { From c064ffbfecf9117ca7fcb97c96256f7be3a1459a Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Wed, 23 Apr 2025 20:29:50 +0300 Subject: [PATCH 3/8] refactor(rpc): ambiguous nullptr from create_node rpc_server::create_node could previously return nullptr if the input ID was 0 (valid) or if an internal error (deserialization, recursion failure) occurred (invalid). This ambiguity made error handling difficult for the caller (`graph_compute`). This commit clarifies the meaning of nullptr: - `graph_compute` now checks if the input 'id' was non-zero when `create_node` returns nullptr, correctly identifying failures versus intentional null links. - `create_node` avoids recursive calls for zero IDs and propagates nullptr unambiguously on failure during recursion. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 44 ++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 954c51be328f8..b10cb2bb56d60 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -1276,23 +1276,34 @@ ggml_tensor * rpc_server::create_node(uint64_t id, } tensor_map[id] = result; for (int i = 0; i < GGML_MAX_SRC; i++) { - result->src[i] = create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map); - // Check if recursive call failed - if (tensor->src[i] != 0 && result->src[i] == nullptr) { - GGML_LOG_ERROR("[%s] failed to create source node %d (src_id=%" PRIu64 ") for node id %" PRIu64 "\n", - __func__, i, tensor->src[i], id); + // Check if the source ID is 0 before calling create_node recursively + if (tensor->src[i] == 0) { + result->src[i] = nullptr; + } else { + result->src[i] = create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map); + // If the recursive call failed for a non-zero ID, propagate the error + if (result->src[i] == nullptr) { + GGML_LOG_ERROR("[%s] failed to create source node %d (src_id=%" PRIu64 ") for node id %" PRIu64 "\n", + __func__, i, tensor->src[i], id); + // Must return nullptr to signal failure up the call stack + return nullptr; + } + } + } + + // Handle view_src similarly + if (tensor->view_src == 0) { + result->view_src = nullptr; + } else { + result->view_src = create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map); + // If the recursive call failed for a non-zero ID, propagate the error + if (result->view_src == nullptr) { + GGML_LOG_ERROR("[%s] failed to create view_src node (view_src_id=%" PRIu64 ") for node id %" PRIu64 "\n", + __func__, tensor->view_src, id); // Must return nullptr to signal failure up the call stack return nullptr; } } - result->view_src = create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map); - // Check if recursive call failed - if (tensor->view_src != 0 && result->view_src == nullptr) { - GGML_LOG_ERROR("[%s] failed to create view_src node (view_src_id=%" PRIu64 ") for node id %" PRIu64 "\n", - __func__, tensor->view_src, id); - // Must return nullptr to signal failure up the call stack - return nullptr; - } result->view_offs = tensor->view_offs; return result; } @@ -1391,8 +1402,11 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph int64_t id; memcpy(&id, &nodes_ptr[i], sizeof(id)); graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map); - // Check if create_node failed (e.g., due to invalid type or missing ID) - if (graph->nodes[i] == nullptr) { + + // Check if create_node failed for a *non-zero* ID. + // If id was 0, create_node returning nullptr is expected. + // If id was non-zero and create_node returned nullptr, it indicates a deserialization error. + if (graph->nodes[i] == nullptr && id != 0) { GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id); response.result = GGML_STATUS_FAILED; // No need to free ctx, ggml_context_ptr handles it. From 92306a8169e489102077b05fd0fdf5cfcc359014 Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Thu, 24 Apr 2025 20:44:56 +0300 Subject: [PATCH 4/8] refactor(rpc): initial zero check in create_node The caller (`graph_compute`) already checks `id != 0` when handling a `nullptr` return from `create_node`, correctly distinguishing intentional null links from actual errors. This makes the initial `if (id == 0)` check redundant. Also removes the log message when a tensor ID is not found in the provided map which was added in this branch. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index b10cb2bb56d60..ea2a984e97325 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -1256,16 +1256,12 @@ ggml_tensor * rpc_server::create_node(uint64_t id, struct ggml_context * ctx, const std::unordered_map & tensor_ptrs, std::unordered_map & tensor_map) { - if (id == 0) { - return nullptr; - } if (tensor_map.find(id) != tensor_map.end()) { return tensor_map[id]; } // Safely find the tensor pointer auto it_ptr = tensor_ptrs.find(id); if (it_ptr == tensor_ptrs.end()) { - GGML_LOG_ERROR("[%s] tensor id %" PRIu64 " not found in provided tensors\n", __func__, id); return nullptr; } const rpc_tensor * tensor = it_ptr->second; From e38c4d724f88df9e647019c69b92288d6bdac3e3 Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Thu, 24 Apr 2025 20:48:34 +0300 Subject: [PATCH 5/8] fix(rpc): Handle get_alloc_size failure in server Check the return value of `server.get_alloc_size` in the RPC server loop. If the call fails, return early to close the connection. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index ea2a984e97325..9cce72141b977 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -1472,7 +1472,9 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, return; } rpc_msg_get_alloc_size_rsp response; - server.get_alloc_size(request, response); + if (!server.get_alloc_size(request, response)) { + return; + } if (!send_msg(sockfd, &response, sizeof(response))) { return; } From 72c447a5a3c12a3d133953a6f7f4ed8e51a7f6fc Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Sat, 26 Apr 2025 08:57:50 +0300 Subject: [PATCH 6/8] refactor(rpc): input size validation in graph_compute Removes detailed, step-by-step size calculations and overflow checks in favor of simpler direct comparisons, assuming 64-bit overflow is unlikely. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 74 +++++----------------------------- 1 file changed, 10 insertions(+), 64 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 9cce72141b977..fd45330750d1e 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -1306,81 +1306,28 @@ ggml_tensor * rpc_server::create_node(uint64_t id, bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph_compute_rsp & response) { // serialization format: - // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t)) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | - - // Perform robust size checks with overflow protection - const size_t min_header_size = sizeof(uint32_t); - if (input.size() < min_header_size) { - GGML_LOG_ERROR("[%s] input message too small for n_nodes header: %zu bytes\n", __func__, input.size()); - response.result = GGML_STATUS_FAILED; + // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | + if (input.size() < sizeof(uint32_t)) { return false; } uint32_t n_nodes; memcpy(&n_nodes, input.data(), sizeof(n_nodes)); - - // Calculate required size for nodes array - size_t nodes_array_bytes = n_nodes * sizeof(uint64_t); - - // Calculate required size up to n_tensors field safely - size_t required_size_before_tensors = min_header_size; - - // Check for overflow before adding nodes_array_bytes - if (SIZE_MAX - required_size_before_tensors < nodes_array_bytes) { - GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 1: n_nodes=%u\n", __func__, n_nodes); - response.result = GGML_STATUS_FAILED; // Use correct status + if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) { return false; } - required_size_before_tensors += nodes_array_bytes; - - const size_t n_tensors_field_size = sizeof(uint32_t); - // Check for overflow before adding n_tensors_field_size - if (SIZE_MAX - required_size_before_tensors < n_tensors_field_size) { - GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 2: n_nodes=%u\n", __func__, n_nodes); - response.result = GGML_STATUS_FAILED; // Use correct status - return false; - } - required_size_before_tensors += n_tensors_field_size; - - if (input.size() < required_size_before_tensors) { - GGML_LOG_ERROR("[%s] input message too small for nodes array or n_tensors header: %zu bytes, needed %zu\n", __func__, input.size(), required_size_before_tensors); - response.result = GGML_STATUS_FAILED; // Use correct status - return false; - } - - // Read n_tensors - const uint64_t * nodes_ptr = (const uint64_t *)(input.data() + sizeof(n_nodes)); + const uint64_t * nodes = (const uint64_t *)(input.data() + sizeof(n_nodes)); uint32_t n_tensors; - memcpy(&n_tensors, input.data() + min_header_size + nodes_array_bytes, sizeof(n_tensors)); - - // Calculate required size for tensors array - size_t tensors_array_bytes = n_tensors * sizeof(rpc_tensor); - - // Calculate total required size safely - size_t required_total_size = required_size_before_tensors; - - // Check for overflow before adding tensors_array_bytes - if (SIZE_MAX - required_total_size < tensors_array_bytes) { - GGML_LOG_ERROR("[%s] integer overflow calculating total required size: n_nodes=%u, n_tensors=%u\n", __func__, n_nodes, n_tensors); - response.result = GGML_STATUS_FAILED; + memcpy(&n_tensors, input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t), sizeof(n_tensors)); + if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) { return false; } - required_total_size += tensors_array_bytes; - - if (input.size() < required_total_size) { - GGML_LOG_ERROR("[%s] input message too small for tensors array: %zu bytes, needed %zu\n", __func__, input.size(), required_total_size); - response.result = GGML_STATUS_FAILED; - return false; - } - - // Pointers are now safe to use based on size checks - const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + required_size_before_tensors); + const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors)); GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors); - // Estimate buffer size for context - size_t ctx_buf_size = ggml_tensor_overhead()*((size_t)n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); + size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); struct ggml_init_params params = { - /*.mem_size =*/ ctx_buf_size, + /*.mem_size =*/ buf_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; @@ -1396,7 +1343,7 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph std::unordered_map tensor_map; for (uint32_t i = 0; i < n_nodes; i++) { int64_t id; - memcpy(&id, &nodes_ptr[i], sizeof(id)); + memcpy(&id, &nodes[i], sizeof(id)); graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map); // Check if create_node failed for a *non-zero* ID. @@ -1405,7 +1352,6 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph if (graph->nodes[i] == nullptr && id != 0) { GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id); response.result = GGML_STATUS_FAILED; - // No need to free ctx, ggml_context_ptr handles it. return false; } } From eef59bcd630115498a04f4dad24e2921b8a8b1c1 Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Sun, 27 Apr 2025 22:05:36 +0300 Subject: [PATCH 7/8] refactor(rpc): remove extra status code setting Removes the explicit setting of `response.result = GGML_STATUS_FAILED` when `create_node` returns `nullptr` within `graph_compute`. Primary signal is the `false` return value in case of failure. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index fd45330750d1e..4a7eb96ae0853 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -1351,7 +1351,6 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph // If id was non-zero and create_node returned nullptr, it indicates a deserialization error. if (graph->nodes[i] == nullptr && id != 0) { GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id); - response.result = GGML_STATUS_FAILED; return false; } } From 099b8359bc5ce29398812f9509768837375c5f84 Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Mon, 28 Apr 2025 12:27:31 +0300 Subject: [PATCH 8/8] refactor(rpc): remove redundant check for tensor->type Breaks CI on ubuntu-cpu-make. Tensor type is uint32_t, thus the check is not needed. Signed-off-by: Ville Vesilehto --- ggml/src/ggml-rpc/ggml-rpc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 4a7eb96ae0853..140a775f9806f 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -983,7 +983,7 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) { ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rpc_tensor * tensor) { // Validate tensor type before using it - if (tensor->type < 0 || tensor->type >= GGML_TYPE_COUNT) { + if (tensor->type >= GGML_TYPE_COUNT) { GGML_LOG_ERROR("[%s] invalid tensor type received: %u\n", __func__, tensor->type); return nullptr; }