Skip to content

Commit cc8fee7

Browse files
committed
rpc : use GGML_LOG_* for logging
Use GGML_LOG_DEBUG() for debug messages and configure the logger to print them only when rpc-server is started with --verbose. Make sure we log a debug message for every server function.
1 parent 459c0c2 commit cc8fee7

File tree

2 files changed

+52
-39
lines changed

2 files changed

+52
-39
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct socket_t {
4747
sockfd_t fd;
4848
socket_t(sockfd_t fd) : fd(fd) {}
4949
~socket_t() {
50-
GGML_PRINT_DEBUG("[%s] closing socket %d\n", __func__, this->fd);
50+
GGML_LOG_DEBUG("[%s] closing socket %d\n", __func__, this->fd);
5151
#ifdef _WIN32
5252
closesocket(this->fd);
5353
#else
@@ -265,18 +265,19 @@ static std::shared_ptr<socket_t> socket_connect(const char * host, int port) {
265265
return nullptr;
266266
}
267267
if (!set_no_delay(sockfd)) {
268-
fprintf(stderr, "Failed to set TCP_NODELAY\n");
268+
GGML_LOG_ERROR("Failed to set TCP_NODELAY\n");
269269
return nullptr;
270270
}
271271
addr.sin_family = AF_INET;
272272
addr.sin_port = htons(port);
273273
struct hostent * server = gethostbyname(host);
274274
if (server == NULL) {
275-
fprintf(stderr, "Cannot resolve host '%s'\n", host);
275+
GGML_LOG_ERROR("Cannot resolve host '%s'\n", host);
276276
return nullptr;
277277
}
278278
memcpy(&addr.sin_addr.s_addr, server->h_addr, server->h_length);
279279
if (connect(sock_ptr->fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
280+
GGML_LOG_ERROR("Failed to connect to %s:%d\n", host, port);
280281
return nullptr;
281282
}
282283
return sock_ptr;
@@ -289,7 +290,7 @@ static std::shared_ptr<socket_t> socket_accept(sockfd_t srv_sockfd) {
289290
return nullptr;
290291
}
291292
if (!set_no_delay(client_socket_fd)) {
292-
fprintf(stderr, "Failed to set TCP_NODELAY\n");
293+
GGML_LOG_ERROR("Failed to set TCP_NODELAY\n");
293294
return nullptr;
294295
}
295296
return client_socket;
@@ -302,11 +303,11 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
302303
return nullptr;
303304
}
304305
if (!set_reuse_addr(sockfd)) {
305-
fprintf(stderr, "Failed to set SO_REUSEADDR\n");
306+
GGML_LOG_ERROR("Failed to set SO_REUSEADDR\n");
306307
return nullptr;
307308
}
308309
if (inet_addr(host) == INADDR_NONE) {
309-
fprintf(stderr, "Invalid host address: %s\n", host);
310+
GGML_LOG_ERROR("Invalid host address: %s\n", host);
310311
return nullptr;
311312
}
312313
struct sockaddr_in serv_addr;
@@ -349,7 +350,7 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
349350
return false;
350351
}
351352
if (n == 0) {
352-
GGML_LOG_ERROR("recv returned 0 (peer closed?)\n");
353+
GGML_LOG_DEBUG("recv returned 0 (peer closed?)\n");
353354
return false;
354355
}
355356
bytes_recv += (size_t)n;
@@ -383,7 +384,7 @@ static bool recv_msg(sockfd_t sockfd, std::vector<uint8_t> & input) {
383384
try {
384385
input.resize(size);
385386
} catch (const std::bad_alloc & e) {
386-
fprintf(stderr, "Failed to allocate input buffer of size %" PRIu64 "\n", size);
387+
GGML_LOG_ERROR("Failed to allocate input buffer of size %" PRIu64 "\n", size);
387388
return false;
388389
}
389390
return recv_data(sockfd, input.data(), size);
@@ -443,11 +444,11 @@ static bool check_server_version(const std::shared_ptr<socket_t> & sock) {
443444
bool status = send_rpc_cmd(sock, RPC_CMD_HELLO, nullptr, 0, &response, sizeof(response));
444445
RPC_STATUS_ASSERT(status);
445446
if (response.major != RPC_PROTO_MAJOR_VERSION || response.minor > RPC_PROTO_MINOR_VERSION) {
446-
fprintf(stderr, "RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
447+
GGML_LOG_ERROR("RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
447448
return false;
448449
}
449450
if (response.minor != RPC_PROTO_MINOR_VERSION || response.patch != RPC_PROTO_PATCH_VERSION) {
450-
fprintf(stderr, "WARNING: RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
451+
GGML_LOG_INFO("WARNING: RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
451452
}
452453
return true;
453454
}
@@ -488,7 +489,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
488489
if (!check_server_version(sock)) {
489490
return nullptr;
490491
}
491-
GGML_PRINT_DEBUG("[%s] connected to %s, sockfd=%d\n", __func__, endpoint.c_str(), sock->fd);
492+
GGML_LOG_DEBUG("[%s] connected to %s, sockfd=%d\n", __func__, endpoint.c_str(), sock->fd);
492493
sockets[endpoint] = sock;
493494
return sock;
494495
}
@@ -809,7 +810,7 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
809810
}
810811
auto sock = get_socket(endpoint);
811812
if (sock == nullptr) {
812-
fprintf(stderr, "Failed to connect to %s\n", endpoint);
813+
GGML_LOG_ERROR("Failed to connect to %s\n", endpoint);
813814
return nullptr;
814815
}
815816
size_t alignment = get_alignment(sock);
@@ -909,7 +910,7 @@ void rpc_server::hello(rpc_msg_hello_rsp & response) {
909910
response.major = RPC_PROTO_MAJOR_VERSION;
910911
response.minor = RPC_PROTO_MINOR_VERSION;
911912
response.patch = RPC_PROTO_PATCH_VERSION;
912-
GGML_PRINT_DEBUG("[%s] version: %d.%d.%d\n", __func__, response.major, response.minor, response.patch);
913+
GGML_LOG_DEBUG("[%s] version: %d.%d.%d\n", __func__, response.major, response.minor, response.patch);
913914
}
914915

915916
bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) {
@@ -929,15 +930,15 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
929930
GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
930931
return false;
931932
}
932-
933+
GGML_LOG_DEBUG("[%s] buffer: %p, data: %p\n", __func__, (void*)tensor->buffer, tensor->data);
933934
if (tensor->buffer == nullptr) {
934935
//No buffer allocated.
935936
buft = ggml_backend_get_default_buffer_type(backend);
936937
} else {
937938
buft = tensor->buffer->buft;
938939
}
939940

940-
response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
941+
response.alloc_size = ggml_backend_buft_get_alloc_size(buft, tensor);
941942

942943
return true;
943944
}
@@ -950,7 +951,7 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
950951
if (buffer != nullptr) {
951952
response.remote_ptr = reinterpret_cast<uint64_t>(buffer);
952953
response.remote_size = buffer->size;
953-
GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, request.size, response.remote_ptr, response.remote_size);
954+
GGML_LOG_DEBUG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, request.size, response.remote_ptr, response.remote_size);
954955
buffers.insert(buffer);
955956
} else {
956957
GGML_LOG_ERROR("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size);
@@ -960,19 +961,19 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
960961
void rpc_server::get_alignment(rpc_msg_get_alignment_rsp & response) {
961962
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
962963
size_t alignment = ggml_backend_buft_get_alignment(buft);
963-
GGML_PRINT_DEBUG("[%s] alignment: %lu\n", __func__, alignment);
964+
GGML_LOG_DEBUG("[%s] alignment: %lu\n", __func__, alignment);
964965
response.alignment = alignment;
965966
}
966967

967968
void rpc_server::get_max_size(rpc_msg_get_max_size_rsp & response) {
968969
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
969970
size_t max_size = ggml_backend_buft_get_max_size(buft);
970-
GGML_PRINT_DEBUG("[%s] max_size: %lu\n", __func__, max_size);
971+
GGML_LOG_DEBUG("[%s] max_size: %lu\n", __func__, max_size);
971972
response.max_size = max_size;
972973
}
973974

974975
bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response) {
975-
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
976+
GGML_LOG_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
976977
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
977978
if (buffers.find(buffer) == buffers.end()) {
978979
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
@@ -984,7 +985,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
984985
}
985986

986987
bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
987-
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
988+
GGML_LOG_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
988989
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
989990
if (buffers.find(buffer) == buffers.end()) {
990991
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
@@ -996,7 +997,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
996997
}
997998

998999
bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
999-
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
1000+
GGML_LOG_DEBUG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
10001001
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
10011002
if (buffers.find(buffer) == buffers.end()) {
10021003
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
@@ -1073,7 +1074,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
10731074
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
10741075
return false;
10751076
}
1076-
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
1077+
GGML_LOG_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
10771078

10781079
// sanitize tensor->data
10791080
{
@@ -1096,7 +1097,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
10961097
fs::path cache_file = fs::path(cache_dir) / hash_str;
10971098
std::ofstream ofs(cache_file, std::ios::binary);
10981099
ofs.write((const char *)data, size);
1099-
printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
1100+
GGML_LOG_INFO("[%s] saved to '%s'\n", __func__, cache_file.c_str());
11001101
}
11011102
ggml_backend_tensor_set(tensor, data, offset, size);
11021103
return true;
@@ -1142,7 +1143,7 @@ bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rp
11421143
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
11431144
return false;
11441145
}
1145-
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n",
1146+
GGML_LOG_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n",
11461147
__func__, (void*)tensor->buffer, tensor->data, request.offset, size, request.hash);
11471148

11481149
// sanitize tensor->data
@@ -1177,7 +1178,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
11771178
GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
11781179
return false;
11791180
}
1180-
1181+
GGML_LOG_DEBUG("[%s] buffer: %p, data: %p\n", __func__, (void*)tensor->buffer, tensor->data);
11811182
// Call the backend's buffer_init_tensor function
11821183
ggml_backend_buffer_t buffer = tensor->buffer;
11831184
if (buffer && buffer->iface.init_tensor) {
@@ -1210,7 +1211,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
12101211
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
12111212
return false;
12121213
}
1213-
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
1214+
GGML_LOG_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
12141215

12151216
// sanitize tensor->data
12161217
{
@@ -1254,7 +1255,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
12541255
uint64_t dst_buf_sz = (uint64_t) ggml_backend_buffer_get_size(dst->buffer);
12551256

12561257
if (dst_data + src_size > dst_base + dst_buf_sz) {
1257-
GGML_PRINT_DEBUG("[%s] out-of-bounds write in rpc_server::copy_tensor:\n"
1258+
GGML_LOG_DEBUG("[%s] out-of-bounds write in rpc_server::copy_tensor:\n"
12581259
" write range : [0x%" PRIx64 ", 0x%" PRIx64 "]\n"
12591260
" buffer base: [0x%" PRIx64 ", 0x%" PRIx64 "]\n",
12601261
__func__,
@@ -1265,7 +1266,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
12651266
return false;
12661267
}
12671268

1268-
GGML_PRINT_DEBUG("[%s] src->buffer: %p, dst->buffer: %p\n",
1269+
GGML_LOG_DEBUG("[%s] src->buffer: %p, dst->buffer: %p\n",
12691270
__func__, (void*) src->buffer, (void*) dst->buffer);
12701271

12711272
response.result = ggml_backend_buffer_copy_tensor(src, dst);
@@ -1342,7 +1343,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
13421343
return false;
13431344
}
13441345
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
1345-
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
1346+
GGML_LOG_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
13461347

13471348
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
13481349

@@ -1394,7 +1395,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
13941395
}
13951396
// the first command sent by the client must be HELLO
13961397
if (cmd != RPC_CMD_HELLO) {
1397-
fprintf(stderr, "Expected HELLO command, update client\n");
1398+
GGML_LOG_ERROR("Expected HELLO command, update client\n");
13981399
return;
13991400
}
14001401
if (!recv_msg(sockfd, nullptr, 0)) {
@@ -1411,7 +1412,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
14111412
}
14121413
if (cmd >= RPC_CMD_COUNT) {
14131414
// fail fast if the command is invalid
1414-
fprintf(stderr, "Unknown command: %d\n", cmd);
1415+
GGML_LOG_ERROR("Unknown command: %d\n", cmd);
14151416
break;
14161417
}
14171418
switch (cmd) {
@@ -1599,7 +1600,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
15991600
break;
16001601
}
16011602
default: {
1602-
fprintf(stderr, "Unknown command: %d\n", cmd);
1603+
GGML_LOG_ERROR("Unknown command: %d\n", cmd);
16031604
return;
16041605
}
16051606
}

tools/rpc/rpc-server.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ static std::string fs_get_cache_directory() {
131131
}
132132

133133
struct rpc_server_params {
134+
bool verbose = false;
134135
std::string host = "127.0.0.1";
135136
int port = 50052;
136137
size_t backend_mem = 0;
@@ -143,20 +144,23 @@ static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
143144
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
144145
fprintf(stderr, "options:\n");
145146
fprintf(stderr, " -h, --help show this help message and exit\n");
146-
fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads);
147-
fprintf(stderr, " -d DEV, --device device to use\n");
148-
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
149-
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
150-
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
151-
fprintf(stderr, " -c, --cache enable local file cache\n");
147+
fprintf(stderr, " -v, --verbose increase verbosity level\n");
148+
fprintf(stderr, " -t, --threads N number of threads for the CPU backend (default: %d)\n", params.n_threads);
149+
fprintf(stderr, " -d, --device DEV device to use\n");
150+
fprintf(stderr, " -H, --host HOST host to bind to (default: %s)\n", params.host.c_str());
151+
fprintf(stderr, " -p, --port PORT port to bind to (default: %d)\n", params.port);
152+
fprintf(stderr, " -m, --mem MEM backend memory size (in MB)\n");
153+
fprintf(stderr, " -c, --cache enable local file cache\n");
152154
fprintf(stderr, "\n");
153155
}
154156

155157
static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & params) {
156158
std::string arg;
157159
for (int i = 1; i < argc; i++) {
158160
arg = argv[i];
159-
if (arg == "-H" || arg == "--host") {
161+
if (arg == "-v" || arg == "--verbose") {
162+
params.verbose = true;
163+
} else if (arg == "-H" || arg == "--host") {
160164
if (++i >= argc) {
161165
return false;
162166
}
@@ -263,6 +267,14 @@ int main(int argc, char * argv[]) {
263267
return 1;
264268
}
265269

270+
ggml_log_set([](enum ggml_log_level level, const char * text, void * user_data) {
271+
auto verbose = *static_cast<bool *>(user_data);
272+
if (verbose || level >= GGML_LOG_LEVEL_INFO) {
273+
fputs(text, stderr);
274+
fflush(stderr);
275+
}
276+
}, &params.verbose);
277+
266278
if (params.host != "127.0.0.1") {
267279
fprintf(stderr, "\n");
268280
fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");

0 commit comments

Comments
 (0)