Skip to content

Commit e71d48e

Browse files
Tak-RSShinnosuke Takagi
andauthored
ggml-rpc: chunk send()/recv() to avoid EINVAL for very large tensors over RPC (macOS & others) (#15188)
* ggml-rpc: chunk send()/recv() to avoid EINVAL for very large tensors over RPC (macOS & others). Fixes #15055 * ggml-rpc: rename RPC_IO_CHUNK->MAX_CHUNK_SIZE, use std::min() for cap, switch to GGML_LOG_ERROR, handle 0-length send/recv * rpc: drop n==0 special case in send_data(); retry in loop per review * rpc: remove trailing whitespace in send_data() --------- Co-authored-by: Shinnosuke Takagi <[email protected]>
1 parent b049315 commit e71d48e

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,12 @@
2929
#include <cstring>
3030
#include <fstream>
3131
#include <filesystem>
32+
#include <algorithm>
3233

3334
namespace fs = std::filesystem;
3435

36+
static constexpr size_t MAX_CHUNK_SIZE = 1024ull * 1024ull * 1024ull; // 1 GiB
37+
3538
#ifdef _WIN32
3639
typedef SOCKET sockfd_t;
3740
using ssize_t = __int64;
@@ -323,23 +326,33 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
323326
static bool send_data(sockfd_t sockfd, const void * data, size_t size) {
324327
size_t bytes_sent = 0;
325328
while (bytes_sent < size) {
326-
ssize_t n = send(sockfd, (const char *)data + bytes_sent, size - bytes_sent, 0);
329+
size_t size_to_send = std::min(size - bytes_sent, MAX_CHUNK_SIZE);
330+
ssize_t n = send(sockfd, (const char *)data + bytes_sent, size_to_send, 0);
327331
if (n < 0) {
332+
GGML_LOG_ERROR("send failed (bytes_sent=%zu, size_to_send=%zu)\n",
333+
bytes_sent, size_to_send);
328334
return false;
329335
}
330-
bytes_sent += n;
336+
bytes_sent += (size_t)n;
331337
}
332338
return true;
333339
}
334340

335341
static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
336342
size_t bytes_recv = 0;
337343
while (bytes_recv < size) {
338-
ssize_t n = recv(sockfd, (char *)data + bytes_recv, size - bytes_recv, 0);
339-
if (n <= 0) {
344+
size_t size_to_recv = std::min(size - bytes_recv, MAX_CHUNK_SIZE);
345+
ssize_t n = recv(sockfd, (char *)data + bytes_recv, size_to_recv, 0);
346+
if (n < 0) {
347+
GGML_LOG_ERROR("recv failed (bytes_recv=%zu, size_to_recv=%zu)\n",
348+
bytes_recv, size_to_recv);
349+
return false;
350+
}
351+
if (n == 0) {
352+
GGML_LOG_ERROR("recv returned 0 (peer closed?)\n");
340353
return false;
341354
}
342-
bytes_recv += n;
355+
bytes_recv += (size_t)n;
343356
}
344357
return true;
345358
}

0 commit comments

Comments
 (0)