From 0e7aa4eeb3bd29161e8f4435056bc737706a0407 Mon Sep 17 00:00:00 2001 From: Shinnosuke Takagi Date: Sat, 9 Aug 2025 08:57:22 +0900 Subject: [PATCH 1/4] ggml-rpc: chunk send()/recv() to avoid EINVAL for very large tensors over RPC (macOS & others). Fixes #15055 --- ggml/src/ggml-rpc/ggml-rpc.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index df6ba54076d20..92938c5c37a6a 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -32,6 +32,8 @@ namespace fs = std::filesystem; +static constexpr size_t RPC_IO_CHUNK = 1024ull * 1024ull * 1024ull; // 1 GiB + #ifdef _WIN32 typedef SOCKET sockfd_t; using ssize_t = __int64; @@ -323,8 +325,15 @@ static std::shared_ptr create_server_socket(const char * host, int por static bool send_data(sockfd_t sockfd, const void * data, size_t size) { size_t bytes_sent = 0; while (bytes_sent < size) { - ssize_t n = send(sockfd, (const char *)data + bytes_sent, size - bytes_sent, 0); + size_t size_to_send = size - bytes_sent; + if (size_to_send > RPC_IO_CHUNK) size_to_send = RPC_IO_CHUNK; + ssize_t n = send(sockfd, (const char *)data + bytes_sent, size_to_send, 0); if (n < 0) { +#ifndef _WIN32 + perror("send"); +#else + fprintf(stderr, "send failed (bytes_sent=%zu, size_to_send=%zu)\n", bytes_sent, size_to_send); +#endif return false; } bytes_sent += n; @@ -332,11 +341,19 @@ static bool send_data(sockfd_t sockfd, const void * data, size_t size) { return true; } + static bool recv_data(sockfd_t sockfd, void * data, size_t size) { size_t bytes_recv = 0; while (bytes_recv < size) { - ssize_t n = recv(sockfd, (char *)data + bytes_recv, size - bytes_recv, 0); + size_t size_to_recv = size - bytes_recv; + if (size_to_recv > RPC_IO_CHUNK) size_to_recv = RPC_IO_CHUNK; + ssize_t n = recv(sockfd, (char *)data + bytes_recv, size_to_recv, 0); if (n <= 0) { +#ifndef _WIN32 + perror("recv"); +#else + fprintf(stderr, "recv failed (bytes_recv=%zu, size_to_recv=%zu)\n", bytes_recv, size_to_recv); +#endif return false; } bytes_recv += n; @@ -344,6 +361,7 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) { return true; } + static bool send_msg(sockfd_t sockfd, const void * msg, size_t msg_size) { if (!send_data(sockfd, &msg_size, sizeof(msg_size))) { return false; From 829d6b648d8aff6d8dea40d4e2551758ce9c699e Mon Sep 17 00:00:00 2001 From: Tak-RS <209332245+Tak-RS@users.noreply.github.com> Date: Mon, 11 Aug 2025 23:27:13 +0900 Subject: [PATCH 2/4] ggml-rpc: rename RPC_IO_CHUNK->MAX_CHUNK_SIZE, use std::min() for cap, switch to GGML_LOG_ERROR, handle 0-length send/recv --- ggml/src/ggml-rpc/ggml-rpc.cpp | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 92938c5c37a6a..394d631ed9dea 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -29,10 +29,11 @@ #include #include #include +#include namespace fs = std::filesystem; -static constexpr size_t RPC_IO_CHUNK = 1024ull * 1024ull * 1024ull; // 1 GiB +static constexpr size_t MAX_CHUNK_SIZE = 1024ull * 1024ull * 1024ull; // 1 GiB #ifdef _WIN32 typedef SOCKET sockfd_t; @@ -325,43 +326,45 @@ static std::shared_ptr create_server_socket(const char * host, int por static bool send_data(sockfd_t sockfd, const void * data, size_t size) { size_t bytes_sent = 0; while (bytes_sent < size) { - size_t size_to_send = size - bytes_sent; - if (size_to_send > RPC_IO_CHUNK) size_to_send = RPC_IO_CHUNK; + size_t size_to_send = std::min(size - bytes_sent, MAX_CHUNK_SIZE); ssize_t n = send(sockfd, (const char *)data + bytes_sent, size_to_send, 0); if (n < 0) { -#ifndef _WIN32 - perror("send"); -#else - fprintf(stderr, "send failed (bytes_sent=%zu, size_to_send=%zu)\n", bytes_sent, size_to_send); -#endif + GGML_LOG_ERROR("send failed (bytes_sent=%zu, size_to_send=%zu)\n", + bytes_sent, size_to_send); + return false; + } + if (n == 0) { + GGML_LOG_ERROR("send returned 0 (peer closed?)\n"); return false; } - bytes_sent += n; + bytes_sent += (size_t)n; } return true; } + static bool recv_data(sockfd_t sockfd, void * data, size_t size) { size_t bytes_recv = 0; while (bytes_recv < size) { - size_t size_to_recv = size - bytes_recv; - if (size_to_recv > RPC_IO_CHUNK) size_to_recv = RPC_IO_CHUNK; + size_t size_to_recv = std::min(size - bytes_recv, MAX_CHUNK_SIZE); ssize_t n = recv(sockfd, (char *)data + bytes_recv, size_to_recv, 0); - if (n <= 0) { -#ifndef _WIN32 - perror("recv"); -#else - fprintf(stderr, "recv failed (bytes_recv=%zu, size_to_recv=%zu)\n", bytes_recv, size_to_recv); -#endif + if (n < 0) { + GGML_LOG_ERROR("recv failed (bytes_recv=%zu, size_to_recv=%zu)\n", + bytes_recv, size_to_recv); return false; } - bytes_recv += n; + if (n == 0) { + GGML_LOG_ERROR("recv returned 0 (peer closed?)\n"); + return false; + } + bytes_recv += (size_t)n; } return true; } + static bool send_msg(sockfd_t sockfd, const void * msg, size_t msg_size) { if (!send_data(sockfd, &msg_size, sizeof(msg_size))) { return false; From b44560e0562a2d04247bb0810406e85b11d0bab8 Mon Sep 17 00:00:00 2001 From: Tak-RS <209332245+Tak-RS@users.noreply.github.com> Date: Tue, 12 Aug 2025 20:22:46 +0900 Subject: [PATCH 3/4] rpc: drop n==0 special case in send_data(); retry in loop per review --- ggml/src/ggml-rpc/ggml-rpc.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 394d631ed9dea..224abe40e8aa3 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -333,17 +333,11 @@ static bool send_data(sockfd_t sockfd, const void * data, size_t size) { bytes_sent, size_to_send); return false; } - if (n == 0) { - GGML_LOG_ERROR("send returned 0 (peer closed?)\n"); - return false; - } - bytes_sent += (size_t)n; + bytes_sent += (size_t)n; } return true; } - - static bool recv_data(sockfd_t sockfd, void * data, size_t size) { size_t bytes_recv = 0; while (bytes_recv < size) { @@ -363,8 +357,6 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) { return true; } - - static bool send_msg(sockfd_t sockfd, const void * msg, size_t msg_size) { if (!send_data(sockfd, &msg_size, sizeof(msg_size))) { return false; From 7c7c3d9f0b05186082a0a418992cee70f11aa9ed Mon Sep 17 00:00:00 2001 From: Tak-RS <209332245+Tak-RS@users.noreply.github.com> Date: Wed, 13 Aug 2025 00:34:05 +0900 Subject: [PATCH 4/4] rpc: remove trailing whitespace in send_data() --- ggml/src/ggml-rpc/ggml-rpc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 224abe40e8aa3..e84ff93efc32c 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -333,7 +333,7 @@ static bool send_data(sockfd_t sockfd, const void * data, size_t size) { bytes_sent, size_to_send); return false; } - bytes_sent += (size_t)n; + bytes_sent += (size_t)n; } return true; }