Skip to content

Commit 6cf3a31

Browse files
committed
fix(rpc): Prevent DoS from invalid tensor type
Validate tensor type in deserialize_tensor before use. Replace GGML_ABORT in bounds checks with error returns. Add overflow checks in graph_compute and propagate errors. Fixes crash from crafted SET_TENSOR/GRAPH_COMPUTE messages. Signed-off-by: Ville Vesilehto <[email protected]>
1 parent ab47dec commit 6cf3a31

File tree

1 file changed

+124
-15
lines changed

1 file changed

+124
-15
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 124 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -973,8 +973,21 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
973973
}
974974

975975
ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rpc_tensor * tensor) {
976+
// Validate tensor type before using it
977+
if (tensor->type < 0 || tensor->type >= GGML_TYPE_COUNT) {
978+
GGML_LOG_ERROR("[%s] invalid tensor type received: %u\n", __func__, tensor->type);
979+
return nullptr;
980+
}
981+
976982
ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type,
977983
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
984+
985+
// ggml_new_tensor_4d might fail if dimensions are invalid, although less likely to crash than invalid type
986+
if (result == nullptr) {
987+
GGML_LOG_ERROR("[%s] ggml_new_tensor_4d failed for type %u\\n", __func__, tensor->type);
988+
return nullptr;
989+
}
990+
978991
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
979992
result->nb[i] = tensor->nb[i];
980993
}
@@ -1034,7 +1047,10 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
10341047
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
10351048

10361049
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1037-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1050+
// Replace GGML_ABORT with logging and error return
1051+
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu) out of buffer bounds [0x%zx, 0x%zx)\n",
1052+
__func__, in_tensor->data, offset, size, p0, p1);
1053+
return false;
10381054
}
10391055
}
10401056

@@ -1109,7 +1125,11 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11091125
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
11101126

11111127
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1112-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1128+
// Replace GGML_ABORT with logging and error return
1129+
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
1130+
__func__, in_tensor->data, offset, size, *hash, p0, p1);
1131+
response.result = 0;
1132+
return false;
11131133
}
11141134
}
11151135
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
@@ -1174,7 +1194,10 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
11741194
if (request.tensor.data + request.offset < p0 ||
11751195
request.tensor.data + request.offset >= p1 ||
11761196
request.size > (p1 - request.tensor.data - request.offset)) {
1177-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1197+
// Replace GGML_ABORT with logging and error return
1198+
GGML_LOG_ERROR("[%s] requested tensor region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%" PRIu64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
1199+
__func__, request.tensor.data, request.offset, request.size, p0, p1);
1200+
return false;
11781201
}
11791202
}
11801203

@@ -1197,6 +1220,8 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
11971220
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
11981221
if (src == nullptr || dst == nullptr) {
11991222
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
1223+
// Propagate error from deserialize_tensor
1224+
response.result = 0;
12001225
return false;
12011226
}
12021227

@@ -1214,6 +1239,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
12141239
dst_data + src_size,
12151240
dst_base,
12161241
dst_base + dst_buf_sz);
1242+
response.result = 0;
12171243
return false;
12181244
}
12191245

@@ -1234,43 +1260,118 @@ ggml_tensor * rpc_server::create_node(uint64_t id,
12341260
if (tensor_map.find(id) != tensor_map.end()) {
12351261
return tensor_map[id];
12361262
}
1237-
const rpc_tensor * tensor = tensor_ptrs.at(id);
1263+
// Safely find the tensor pointer
1264+
auto it_ptr = tensor_ptrs.find(id);
1265+
if (it_ptr == tensor_ptrs.end()) {
1266+
GGML_LOG_ERROR("[%s] tensor id %" PRIu64 " not found in provided tensors\n", __func__, id);
1267+
return nullptr;
1268+
}
1269+
const rpc_tensor * tensor = it_ptr->second;
1270+
12381271
struct ggml_tensor * result = deserialize_tensor(ctx, tensor);
12391272
if (result == nullptr) {
12401273
return nullptr;
12411274
}
12421275
tensor_map[id] = result;
12431276
for (int i = 0; i < GGML_MAX_SRC; i++) {
12441277
result->src[i] = create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map);
1278+
// Check if recursive call failed
1279+
if (tensor->src[i] != 0 && result->src[i] == nullptr) {
1280+
GGML_LOG_ERROR("[%s] failed to create source node %d (src_id=%" PRIu64 ") for node id %" PRIu64 "\n",
1281+
__func__, i, tensor->src[i], id);
1282+
// Must return nullptr to signal failure up the call stack
1283+
return nullptr;
1284+
}
12451285
}
12461286
result->view_src = create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map);
1287+
// Check if recursive call failed
1288+
if (tensor->view_src != 0 && result->view_src == nullptr) {
1289+
GGML_LOG_ERROR("[%s] failed to create view_src node (view_src_id=%" PRIu64 ") for node id %" PRIu64 "\n",
1290+
__func__, tensor->view_src, id);
1291+
// Must return nullptr to signal failure up the call stack
1292+
return nullptr;
1293+
}
12471294
result->view_offs = tensor->view_offs;
12481295
return result;
12491296
}
12501297

12511298
bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response) {
12521299
// serialization format:
1253-
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) |
1254-
if (input.size() < sizeof(uint32_t)) {
1300+
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t)) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) |
1301+
1302+
// Perform robust size checks with overflow protection
1303+
const size_t min_header_size = sizeof(uint32_t);
1304+
if (input.size() < min_header_size) {
1305+
GGML_LOG_ERROR("[%s] input message too small for n_nodes header: %zu bytes\n", __func__, input.size());
1306+
response.result = GGML_STATUS_FAILED;
12551307
return false;
12561308
}
12571309
uint32_t n_nodes;
12581310
memcpy(&n_nodes, input.data(), sizeof(n_nodes));
1259-
if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) {
1311+
1312+
// Calculate required size for nodes array
1313+
size_t nodes_array_bytes = n_nodes * sizeof(uint64_t);
1314+
1315+
// Calculate required size up to n_tensors field safely
1316+
size_t required_size_before_tensors = min_header_size;
1317+
1318+
// Check for overflow before adding nodes_array_bytes
1319+
if (SIZE_MAX - required_size_before_tensors < nodes_array_bytes) {
1320+
GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 1: n_nodes=%u\n", __func__, n_nodes);
1321+
response.result = GGML_STATUS_FAILED; // Use correct status
1322+
return false;
1323+
}
1324+
required_size_before_tensors += nodes_array_bytes;
1325+
1326+
const size_t n_tensors_field_size = sizeof(uint32_t);
1327+
// Check for overflow before adding n_tensors_field_size
1328+
if (SIZE_MAX - required_size_before_tensors < n_tensors_field_size) {
1329+
GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 2: n_nodes=%u\n", __func__, n_nodes);
1330+
response.result = GGML_STATUS_FAILED; // Use correct status
12601331
return false;
12611332
}
1262-
const uint64_t * nodes = (const uint64_t *)(input.data() + sizeof(n_nodes));
1333+
required_size_before_tensors += n_tensors_field_size;
1334+
1335+
if (input.size() < required_size_before_tensors) {
1336+
GGML_LOG_ERROR("[%s] input message too small for nodes array or n_tensors header: %zu bytes, needed %zu\n", __func__, input.size(), required_size_before_tensors);
1337+
response.result = GGML_STATUS_FAILED; // Use correct status
1338+
return false;
1339+
}
1340+
1341+
// Read n_tensors
1342+
const uint64_t * nodes_ptr = (const uint64_t *)(input.data() + sizeof(n_nodes));
12631343
uint32_t n_tensors;
1264-
memcpy(&n_tensors, input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t), sizeof(n_tensors));
1265-
if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) {
1344+
memcpy(&n_tensors, input.data() + min_header_size + nodes_array_bytes, sizeof(n_tensors));
1345+
1346+
// Calculate required size for tensors array
1347+
size_t tensors_array_bytes = n_tensors * sizeof(rpc_tensor);
1348+
1349+
// Calculate total required size safely
1350+
size_t required_total_size = required_size_before_tensors;
1351+
1352+
// Check for overflow before adding tensors_array_bytes
1353+
if (SIZE_MAX - required_total_size < tensors_array_bytes) {
1354+
GGML_LOG_ERROR("[%s] integer overflow calculating total required size: n_nodes=%u, n_tensors=%u\n", __func__, n_nodes, n_tensors);
1355+
response.result = GGML_STATUS_FAILED;
1356+
return false;
1357+
}
1358+
required_total_size += tensors_array_bytes;
1359+
1360+
if (input.size() < required_total_size) {
1361+
GGML_LOG_ERROR("[%s] input message too small for tensors array: %zu bytes, needed %zu\n", __func__, input.size(), required_total_size);
1362+
response.result = GGML_STATUS_FAILED;
12661363
return false;
12671364
}
1268-
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
1365+
1366+
// Pointers are now safe to use based on size checks
1367+
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + required_size_before_tensors);
12691368
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
12701369

1271-
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1370+
// Estimate buffer size for context (original calculation seems reasonable)
1371+
size_t ctx_buf_size = ggml_tensor_overhead()*((size_t)n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1372+
12721373
struct ggml_init_params params = {
1273-
/*.mem_size =*/ buf_size,
1374+
/*.mem_size =*/ ctx_buf_size,
12741375
/*.mem_buffer =*/ NULL,
12751376
/*.no_alloc =*/ true,
12761377
};
@@ -1286,12 +1387,20 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
12861387
std::unordered_map<uint64_t, ggml_tensor*> tensor_map;
12871388
for (uint32_t i = 0; i < n_nodes; i++) {
12881389
int64_t id;
1289-
memcpy(&id, &nodes[i], sizeof(id));
1390+
memcpy(&id, &nodes_ptr[i], sizeof(id));
12901391
graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map);
1392+
// Check if create_node failed (e.g., due to invalid type or missing ID)
1393+
if (graph->nodes[i] == nullptr) {
1394+
GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id);
1395+
response.result = GGML_STATUS_FAILED;
1396+
// No need to free ctx, ggml_context_ptr handles it.
1397+
return false;
1398+
}
12911399
}
12921400
ggml_status status = ggml_backend_graph_compute(backend, graph);
12931401
response.result = status;
1294-
return true;
1402+
// Return true only if computation succeeded
1403+
return status == GGML_STATUS_SUCCESS;
12951404
}
12961405

12971406
rpc_server::~rpc_server() {

0 commit comments

Comments
 (0)