Skip to content

Commit b546280

Browse files
committed
fix(rpc): Improve input validation and error handling
The `rpc-server` was vulnerable to Denial of Service attacks via several RPC commands (`SET_TENSOR`, `GRAPH_COMPUTE`, etc.). Malformed messages could trigger failed assertions (e.g., invalid `ggml_type`) or out-of-bounds reads/writes leading to `GGML_ABORT` calls, crashing the server process. This PR introduces robust input validation and replaces `abort()` calls with graceful error handling: - **Type Validation:** `deserialize_tensor` now checks if the `tensor->type` is within the valid `GGML_TYPE_COUNT` range *before* calling `ggml_new_tensor_4d`. Returns `nullptr` on invalid type. - **Bounds Checks:** Replaced `GGML_ABORT` in `set_tensor`, `set_tensor_hash`, and `get_tensor` handlers with error logging and returning `false` when data/offset parameters are out of buffer bounds. - **Size Checks:** Added safe arithmetic checks (for overflow) in `graph_compute` when calculating required message sizes based on client-provided `n_nodes` and `n_tensors`. Returns early if the reported sizes conflict with the actual message size or would lead to overflow. - **Error Propagation:** - `create_node` now checks for `nullptr` return values from `deserialize_tensor` and its recursive calls, propagating `nullptr` upwards on failure. Uses `find` instead of `at` for safer map access. - `copy_tensor` now checks for `nullptr` from `deserialize_tensor` and sets the response status to failure if deserialization or bounds checks fail. - `graph_compute` now checks for `nullptr` return from `create_node` and returns failure status correctly. The final return value now reflects the actual computation status. These changes improve the RPC server's resilience against malformed client requests, preventing crashes and ensuring errors are handled more gracefully. Signed-off-by: Ville Vesilehto <[email protected]>
1 parent 295354e commit b546280

File tree

1 file changed

+123
-15
lines changed

1 file changed

+123
-15
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 123 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -982,8 +982,21 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
982982
}
983983

984984
ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rpc_tensor * tensor) {
985+
// Validate tensor type before using it
986+
if (tensor->type < 0 || tensor->type >= GGML_TYPE_COUNT) {
987+
GGML_LOG_ERROR("[%s] invalid tensor type received: %u\n", __func__, tensor->type);
988+
return nullptr;
989+
}
990+
985991
ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type,
986992
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
993+
994+
// ggml_new_tensor_4d might fail if dimensions are invalid, although less likely to crash than invalid type
995+
if (result == nullptr) {
996+
GGML_LOG_ERROR("[%s] ggml_new_tensor_4d failed for type %u\\n", __func__, tensor->type);
997+
return nullptr;
998+
}
999+
9871000
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
9881001
result->nb[i] = tensor->nb[i];
9891002
}
@@ -1043,7 +1056,10 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
10431056
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
10441057

10451058
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1046-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1059+
// Replace GGML_ABORT with logging and error return
1060+
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu) out of buffer bounds [0x%zx, 0x%zx)\n",
1061+
__func__, in_tensor->data, offset, size, p0, p1);
1062+
return false;
10471063
}
10481064
}
10491065

@@ -1118,7 +1134,11 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11181134
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
11191135

11201136
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1121-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1137+
// Replace GGML_ABORT with logging and error return
1138+
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
1139+
__func__, in_tensor->data, offset, size, *hash, p0, p1);
1140+
response.result = 0;
1141+
return false;
11221142
}
11231143
}
11241144
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
@@ -1183,7 +1203,10 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
11831203
if (request.tensor.data + request.offset < p0 ||
11841204
request.tensor.data + request.offset >= p1 ||
11851205
request.size > (p1 - request.tensor.data - request.offset)) {
1186-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1206+
// Replace GGML_ABORT with logging and error return
1207+
GGML_LOG_ERROR("[%s] requested tensor region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%" PRIu64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
1208+
__func__, request.tensor.data, request.offset, request.size, p0, p1);
1209+
return false;
11871210
}
11881211
}
11891212

@@ -1206,6 +1229,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
12061229
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
12071230
if (src == nullptr || dst == nullptr) {
12081231
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
1232+
response.result = 0;
12091233
return false;
12101234
}
12111235

@@ -1223,6 +1247,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
12231247
dst_data + src_size,
12241248
dst_base,
12251249
dst_base + dst_buf_sz);
1250+
response.result = 0;
12261251
return false;
12271252
}
12281253

@@ -1243,43 +1268,118 @@ ggml_tensor * rpc_server::create_node(uint64_t id,
12431268
if (tensor_map.find(id) != tensor_map.end()) {
12441269
return tensor_map[id];
12451270
}
1246-
const rpc_tensor * tensor = tensor_ptrs.at(id);
1271+
// Safely find the tensor pointer
1272+
auto it_ptr = tensor_ptrs.find(id);
1273+
if (it_ptr == tensor_ptrs.end()) {
1274+
GGML_LOG_ERROR("[%s] tensor id %" PRIu64 " not found in provided tensors\n", __func__, id);
1275+
return nullptr;
1276+
}
1277+
const rpc_tensor * tensor = it_ptr->second;
1278+
12471279
struct ggml_tensor * result = deserialize_tensor(ctx, tensor);
12481280
if (result == nullptr) {
12491281
return nullptr;
12501282
}
12511283
tensor_map[id] = result;
12521284
for (int i = 0; i < GGML_MAX_SRC; i++) {
12531285
result->src[i] = create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map);
1286+
// Check if recursive call failed
1287+
if (tensor->src[i] != 0 && result->src[i] == nullptr) {
1288+
GGML_LOG_ERROR("[%s] failed to create source node %d (src_id=%" PRIu64 ") for node id %" PRIu64 "\n",
1289+
__func__, i, tensor->src[i], id);
1290+
// Must return nullptr to signal failure up the call stack
1291+
return nullptr;
1292+
}
12541293
}
12551294
result->view_src = create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map);
1295+
// Check if recursive call failed
1296+
if (tensor->view_src != 0 && result->view_src == nullptr) {
1297+
GGML_LOG_ERROR("[%s] failed to create view_src node (view_src_id=%" PRIu64 ") for node id %" PRIu64 "\n",
1298+
__func__, tensor->view_src, id);
1299+
// Must return nullptr to signal failure up the call stack
1300+
return nullptr;
1301+
}
12561302
result->view_offs = tensor->view_offs;
12571303
return result;
12581304
}
12591305

12601306
bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response) {
12611307
// serialization format:
1262-
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) |
1263-
if (input.size() < sizeof(uint32_t)) {
1308+
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t)) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) |
1309+
1310+
// Perform robust size checks with overflow protection
1311+
const size_t min_header_size = sizeof(uint32_t);
1312+
if (input.size() < min_header_size) {
1313+
GGML_LOG_ERROR("[%s] input message too small for n_nodes header: %zu bytes\n", __func__, input.size());
1314+
response.result = GGML_STATUS_FAILED;
12641315
return false;
12651316
}
12661317
uint32_t n_nodes;
12671318
memcpy(&n_nodes, input.data(), sizeof(n_nodes));
1268-
if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) {
1319+
1320+
// Calculate required size for nodes array
1321+
size_t nodes_array_bytes = n_nodes * sizeof(uint64_t);
1322+
1323+
// Calculate required size up to n_tensors field safely
1324+
size_t required_size_before_tensors = min_header_size;
1325+
1326+
// Check for overflow before adding nodes_array_bytes
1327+
if (SIZE_MAX - required_size_before_tensors < nodes_array_bytes) {
1328+
GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 1: n_nodes=%u\n", __func__, n_nodes);
1329+
response.result = GGML_STATUS_FAILED; // Use correct status
1330+
return false;
1331+
}
1332+
required_size_before_tensors += nodes_array_bytes;
1333+
1334+
const size_t n_tensors_field_size = sizeof(uint32_t);
1335+
// Check for overflow before adding n_tensors_field_size
1336+
if (SIZE_MAX - required_size_before_tensors < n_tensors_field_size) {
1337+
GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 2: n_nodes=%u\n", __func__, n_nodes);
1338+
response.result = GGML_STATUS_FAILED; // Use correct status
12691339
return false;
12701340
}
1271-
const uint64_t * nodes = (const uint64_t *)(input.data() + sizeof(n_nodes));
1341+
required_size_before_tensors += n_tensors_field_size;
1342+
1343+
if (input.size() < required_size_before_tensors) {
1344+
GGML_LOG_ERROR("[%s] input message too small for nodes array or n_tensors header: %zu bytes, needed %zu\n", __func__, input.size(), required_size_before_tensors);
1345+
response.result = GGML_STATUS_FAILED; // Use correct status
1346+
return false;
1347+
}
1348+
1349+
// Read n_tensors
1350+
const uint64_t * nodes_ptr = (const uint64_t *)(input.data() + sizeof(n_nodes));
12721351
uint32_t n_tensors;
1273-
memcpy(&n_tensors, input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t), sizeof(n_tensors));
1274-
if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) {
1352+
memcpy(&n_tensors, input.data() + min_header_size + nodes_array_bytes, sizeof(n_tensors));
1353+
1354+
// Calculate required size for tensors array
1355+
size_t tensors_array_bytes = n_tensors * sizeof(rpc_tensor);
1356+
1357+
// Calculate total required size safely
1358+
size_t required_total_size = required_size_before_tensors;
1359+
1360+
// Check for overflow before adding tensors_array_bytes
1361+
if (SIZE_MAX - required_total_size < tensors_array_bytes) {
1362+
GGML_LOG_ERROR("[%s] integer overflow calculating total required size: n_nodes=%u, n_tensors=%u\n", __func__, n_nodes, n_tensors);
1363+
response.result = GGML_STATUS_FAILED;
1364+
return false;
1365+
}
1366+
required_total_size += tensors_array_bytes;
1367+
1368+
if (input.size() < required_total_size) {
1369+
GGML_LOG_ERROR("[%s] input message too small for tensors array: %zu bytes, needed %zu\n", __func__, input.size(), required_total_size);
1370+
response.result = GGML_STATUS_FAILED;
12751371
return false;
12761372
}
1277-
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
1373+
1374+
// Pointers are now safe to use based on size checks
1375+
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + required_size_before_tensors);
12781376
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
12791377

1280-
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1378+
// Estimate buffer size for context
1379+
size_t ctx_buf_size = ggml_tensor_overhead()*((size_t)n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1380+
12811381
struct ggml_init_params params = {
1282-
/*.mem_size =*/ buf_size,
1382+
/*.mem_size =*/ ctx_buf_size,
12831383
/*.mem_buffer =*/ NULL,
12841384
/*.no_alloc =*/ true,
12851385
};
@@ -1295,12 +1395,20 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
12951395
std::unordered_map<uint64_t, ggml_tensor*> tensor_map;
12961396
for (uint32_t i = 0; i < n_nodes; i++) {
12971397
int64_t id;
1298-
memcpy(&id, &nodes[i], sizeof(id));
1398+
memcpy(&id, &nodes_ptr[i], sizeof(id));
12991399
graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map);
1400+
// Check if create_node failed (e.g., due to invalid type or missing ID)
1401+
if (graph->nodes[i] == nullptr) {
1402+
GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id);
1403+
response.result = GGML_STATUS_FAILED;
1404+
// No need to free ctx, ggml_context_ptr handles it.
1405+
return false;
1406+
}
13001407
}
13011408
ggml_status status = ggml_backend_graph_compute(backend, graph);
13021409
response.result = status;
1303-
return true;
1410+
// Return true only if computation succeeded
1411+
return status == GGML_STATUS_SUCCESS;
13041412
}
13051413

13061414
rpc_server::~rpc_server() {

0 commit comments

Comments
 (0)