3333#include  < filesystem> 
3434#include  < algorithm> 
3535
36+ static  const  char  * RPC_DEBUG = std::getenv(" GGML_RPC_DEBUG" 
37+ 
38+ #define  LOG_DBG (...) \
39+     do  { if  (RPC_DEBUG) GGML_LOG_DEBUG (__VA_ARGS__); } while  (0 )
40+ 
41+ 
3642namespace  fs  =  std::filesystem;
3743
3844static  constexpr  size_t  MAX_CHUNK_SIZE = 1024ull  * 1024ull  * 1024ull ; //  1 GiB
@@ -49,7 +55,7 @@ struct socket_t {
4955    sockfd_t  fd;
5056    socket_t (sockfd_t  fd) : fd(fd) {}
5157    ~socket_t () {
52-         GGML_PRINT_DEBUG (" [%s] closing socket %d\n " this ->fd );
58+         LOG_DBG (" [%s] closing socket %d\n " this ->fd );
5359#ifdef  _WIN32
5460        closesocket (this ->fd );
5561#else 
@@ -277,14 +283,14 @@ static std::shared_ptr<socket_t> socket_connect(const char * host, int port) {
277283        return  nullptr ;
278284    }
279285    if  (!set_no_delay (sockfd)) {
280-         fprintf (stderr,  " Failed to set TCP_NODELAY\n " 
286+         GGML_LOG_ERROR ( " Failed to set TCP_NODELAY\n " 
281287        return  nullptr ;
282288    }
283289    addr.sin_family  = AF_INET;
284290    addr.sin_port  = htons (port);
285291    struct  hostent  * server = gethostbyname (host);
286292    if  (server == NULL ) {
287-         fprintf (stderr,  " Cannot resolve host '%s'\n " 
293+         GGML_LOG_ERROR ( " Cannot resolve host '%s'\n " 
288294        return  nullptr ;
289295    }
290296    memcpy (&addr.sin_addr .s_addr , server->h_addr , server->h_length );
@@ -301,7 +307,7 @@ static std::shared_ptr<socket_t> socket_accept(sockfd_t srv_sockfd) {
301307        return  nullptr ;
302308    }
303309    if  (!set_no_delay (client_socket_fd)) {
304-         fprintf (stderr,  " Failed to set TCP_NODELAY\n " 
310+         GGML_LOG_ERROR ( " Failed to set TCP_NODELAY\n " 
305311        return  nullptr ;
306312    }
307313    return  client_socket;
@@ -314,11 +320,11 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
314320        return  nullptr ;
315321    }
316322    if  (!set_reuse_addr (sockfd)) {
317-         fprintf (stderr,  " Failed to set SO_REUSEADDR\n " 
323+         GGML_LOG_ERROR ( " Failed to set SO_REUSEADDR\n " 
318324        return  nullptr ;
319325    }
320326    if  (inet_addr (host) == INADDR_NONE) {
321-         fprintf (stderr,  " Invalid host address: %s\n " 
327+         GGML_LOG_ERROR ( " Invalid host address: %s\n " 
322328        return  nullptr ;
323329    }
324330    struct  sockaddr_in  serv_addr;
@@ -361,7 +367,7 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
361367            return  false ;
362368        }
363369        if  (n == 0 ) {
364-             GGML_LOG_ERROR (" recv returned 0 (peer closed?)\n " 
370+             LOG_DBG (" recv returned 0 (peer closed?)\n " 
365371            return  false ;
366372        }
367373        bytes_recv += (size_t )n;
@@ -395,7 +401,7 @@ static bool recv_msg(sockfd_t sockfd, std::vector<uint8_t> & input) {
395401    try  {
396402        input.resize (size);
397403    } catch  (const  std::bad_alloc & e) {
398-         fprintf (stderr,  " Failed to allocate input buffer of size %" " \n " 
404+         GGML_LOG_ERROR ( " Failed to allocate input buffer of size %" " \n " 
399405        return  false ;
400406    }
401407    return  recv_data (sockfd, input.data (), size);
@@ -479,11 +485,11 @@ static bool check_server_version(const std::shared_ptr<socket_t> & sock) {
479485    status = send_rpc_cmd (sock, RPC_CMD_HELLO, nullptr , 0 , &response, sizeof (response));
480486    RPC_STATUS_ASSERT (status);
481487    if  (response.major  != RPC_PROTO_MAJOR_VERSION || response.minor  > RPC_PROTO_MINOR_VERSION) {
482-         fprintf (stderr,  " RPC server version mismatch: %d.%d.%d\n " major , response.minor , response.patch );
488+         GGML_LOG_ERROR ( " RPC server version mismatch: %d.%d.%d\n " major , response.minor , response.patch );
483489        return  false ;
484490    }
485491    if  (response.minor  != RPC_PROTO_MINOR_VERSION || response.patch  != RPC_PROTO_PATCH_VERSION) {
486-         fprintf (stderr,  " WARNING: RPC server version mismatch: %d.%d.%d\n " major , response.minor , response.patch );
492+         GGML_LOG_INFO ( " WARNING: RPC server version mismatch: %d.%d.%d\n " major , response.minor , response.patch );
487493    }
488494    return  true ;
489495}
@@ -524,7 +530,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
524530    if  (!check_server_version (sock)) {
525531        return  nullptr ;
526532    }
527-     GGML_PRINT_DEBUG (" [%s] connected to %s, sockfd=%d\n " c_str (), sock->fd );
533+     LOG_DBG (" [%s] connected to %s, sockfd=%d\n " c_str (), sock->fd );
528534    sockets[endpoint] = sock;
529535    return  sock;
530536}
@@ -845,7 +851,7 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
845851    }
846852    auto  sock = get_socket (endpoint);
847853    if  (sock == nullptr ) {
848-         fprintf (stderr,  " Failed to connect to %s\n " 
854+         GGML_LOG_ERROR ( " Failed to connect to %s\n " 
849855        return  nullptr ;
850856    }
851857    size_t  alignment = get_alignment (sock);
@@ -954,7 +960,7 @@ void rpc_server::hello(rpc_msg_hello_rsp & response) {
954960    response.major  = RPC_PROTO_MAJOR_VERSION;
955961    response.minor  = RPC_PROTO_MINOR_VERSION;
956962    response.patch  = RPC_PROTO_PATCH_VERSION;
957-     GGML_PRINT_DEBUG (" [%s] version: %d.%d.%d\n " major , response.minor , response.patch );
963+     LOG_DBG (" [%s] version: %d.%d.%d\n " major , response.minor , response.patch );
958964}
959965
960966bool  rpc_server::get_alloc_size (const  rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) {
@@ -974,15 +980,15 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
974980        GGML_LOG_ERROR (" Null tensor pointer passed to server get_alloc_size function.\n " 
975981        return  false ;
976982    }
977- 
983+      LOG_DBG ( " [%s] buffer: %p, data: %p \n " , __func__, ( void *)tensor-> buffer , tensor-> data ); 
978984    if  (tensor->buffer  == nullptr ) {
979985        // No buffer allocated.
980986        buft = ggml_backend_get_default_buffer_type (backend);
981987    } else  {
982988        buft = tensor->buffer ->buft ;
983989    }
984990
985-     response.alloc_size  = ggml_backend_buft_get_alloc_size (buft,tensor);
991+     response.alloc_size  = ggml_backend_buft_get_alloc_size (buft,  tensor);
986992
987993    return  true ;
988994}
@@ -996,30 +1002,30 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
9961002        uint64_t  rpk = random_id ();
9971003        response.remote_ptr  = rpk;
9981004        response.remote_size  = buffer->size ;
999-         GGML_PRINT_DEBUG (" [%s] size: %" "  -> handle: %" " , remote_size: %" " \n " 
1005+         LOG_DBG (" [%s] size: %" "  -> handle: %" " , remote_size: %" " \n " 
10001006                         __func__, request.size , rpk, response.remote_size );
10011007        buffers[rpk] = buffer;
10021008    } else  {
1003-         GGML_LOG_ERROR (" [%s] size: %" "  -> failed\n " size );
1009+         LOG_DBG (" [%s] size: %" "  -> failed\n " size );
10041010    }
10051011}
10061012
10071013void  rpc_server::get_alignment (rpc_msg_get_alignment_rsp & response) {
10081014    ggml_backend_buffer_type_t  buft = ggml_backend_get_default_buffer_type (backend);
10091015    size_t  alignment = ggml_backend_buft_get_alignment (buft);
1010-     GGML_PRINT_DEBUG (" [%s] alignment: %lu\n " 
1016+     LOG_DBG (" [%s] alignment: %lu\n " 
10111017    response.alignment  = alignment;
10121018}
10131019
10141020void  rpc_server::get_max_size (rpc_msg_get_max_size_rsp & response) {
10151021    ggml_backend_buffer_type_t  buft = ggml_backend_get_default_buffer_type (backend);
10161022    size_t  max_size = ggml_backend_buft_get_max_size (buft);
1017-     GGML_PRINT_DEBUG (" [%s] max_size: %lu\n " 
1023+     LOG_DBG (" [%s] max_size: %lu\n " 
10181024    response.max_size  = max_size;
10191025}
10201026
10211027bool  rpc_server::buffer_get_base (const  rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response) {
1022-     GGML_PRINT_DEBUG (" [%s] remote_ptr: %" " \n " remote_ptr );
1028+     LOG_DBG (" [%s] remote_ptr: %" " \n " remote_ptr );
10231029    auto  it = buffers.find (request.remote_ptr );
10241030    if  (it == buffers.end ()) {
10251031        GGML_LOG_ERROR (" [%s] buffer handle not found: %" " \n " remote_ptr );
@@ -1032,7 +1038,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
10321038}
10331039
10341040bool  rpc_server::free_buffer (const  rpc_msg_free_buffer_req & request) {
1035-     GGML_PRINT_DEBUG (" [%s] remote_ptr: %" " \n " remote_ptr );
1041+     LOG_DBG (" [%s] remote_ptr: %" " \n " remote_ptr );
10361042    auto  it = buffers.find (request.remote_ptr );
10371043    if  (it == buffers.end ()) {
10381044        GGML_LOG_ERROR (" [%s] buffer handle not found: %" " \n " remote_ptr );
@@ -1045,7 +1051,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
10451051}
10461052
10471053bool  rpc_server::buffer_clear (const  rpc_msg_buffer_clear_req & request) {
1048-     GGML_PRINT_DEBUG (" [%s] remote_ptr: %" " , value: %u\n " remote_ptr , request.value );
1054+     LOG_DBG (" [%s] remote_ptr: %" " , value: %u\n " remote_ptr , request.value );
10491055    auto  it = buffers.find (request.remote_ptr );
10501056    if  (it == buffers.end ()) {
10511057        GGML_LOG_ERROR (" [%s] buffer handle not found: %" " \n " remote_ptr );
@@ -1126,7 +1132,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
11261132        GGML_LOG_ERROR (" [%s] error deserializing tensor\n " 
11271133        return  false ;
11281134    }
1129-     GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" " , size: %zu\n " void *)tensor->buffer , tensor->data , offset, size);
1135+     LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" " , size: %zu\n " void *)tensor->buffer , tensor->data , offset, size);
11301136
11311137    //  sanitize tensor->data
11321138    {
@@ -1149,7 +1155,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
11491155        fs::path cache_file = fs::path (cache_dir) / hash_str;
11501156        std::ofstream ofs (cache_file, std::ios::binary);
11511157        ofs.write ((const  char  *)data, size);
1152-         printf (" [%s] saved to '%s'\n " c_str ());
1158+         GGML_LOG_INFO (" [%s] saved to '%s'\n " c_str ());
11531159    }
11541160    ggml_backend_tensor_set (tensor, data, offset, size);
11551161    return  true ;
@@ -1195,8 +1201,8 @@ bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rp
11951201        GGML_LOG_ERROR (" [%s] error deserializing tensor\n " 
11961202        return  false ;
11971203    }
1198-     GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" " , size: %zu, hash: %" " \n " 
1199-         __func__, (void *)tensor->buffer , tensor->data , request.offset , size, request.hash );
1204+     LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" " , size: %zu, hash: %" " \n " 
1205+              __func__, (void *)tensor->buffer , tensor->data , request.offset , size, request.hash );
12001206
12011207    //  sanitize tensor->data
12021208    {
@@ -1230,7 +1236,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
12301236        GGML_LOG_ERROR (" Null tensor pointer passed to server init_tensor function.\n " 
12311237        return  false ;
12321238    }
1233- 
1239+      LOG_DBG ( " [%s] buffer: %p, data: %p \n " , __func__, ( void *)tensor-> buffer , tensor-> data ); 
12341240    //  Call the backend's buffer_init_tensor function
12351241    ggml_backend_buffer_t  buffer = tensor->buffer ;
12361242    if  (buffer && buffer->iface .init_tensor ) {
@@ -1263,7 +1269,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
12631269        GGML_LOG_ERROR (" [%s] error deserializing tensor\n " 
12641270        return  false ;
12651271    }
1266-     GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" " , size: %" " \n " void *)tensor->buffer , tensor->data , request.offset , request.size );
1272+     LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" " , size: %" " \n " void *)tensor->buffer , tensor->data , request.offset , request.size );
12671273
12681274    //  sanitize tensor->data
12691275    {
@@ -1307,7 +1313,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
13071313    uint64_t  dst_buf_sz = (uint64_t ) ggml_backend_buffer_get_size(dst->buffer);
13081314
13091315    if  (dst_data + src_size > dst_base + dst_buf_sz) {
1310-         GGML_PRINT_DEBUG (" [%s] out-of-bounds write in rpc_server::copy_tensor:\n " 
1316+         GGML_LOG_ERROR (" [%s] out-of-bounds write in rpc_server::copy_tensor:\n " 
13111317                         "     write range : [0x%" " , 0x%" " ]\n " 
13121318                         "     buffer base: [0x%" " , 0x%" " ]\n " 
13131319                         __func__,
@@ -1318,8 +1324,8 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
13181324        return  false ;
13191325    }
13201326
1321-     GGML_PRINT_DEBUG (" [%s] src->buffer: %p, dst->buffer: %p\n " 
1322-                       __func__, (void *) src->buffer, (void *) dst->buffer);
1327+     LOG_DBG (" [%s] src->buffer: %p, dst->buffer: %p\n " 
1328+             __func__, (void *) src->buffer, (void *) dst->buffer);
13231329
13241330    response.result = ggml_backend_buffer_copy_tensor(src, dst);
13251331    return  true ;
@@ -1395,7 +1401,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
13951401        return  false ;
13961402    }
13971403    const  rpc_tensor * tensors = (const  rpc_tensor *)(input.data () + sizeof (n_nodes) + n_nodes*sizeof (uint64_t ) + sizeof (n_tensors));
1398-     GGML_PRINT_DEBUG (" [%s] n_nodes: %u, n_tensors: %u\n " 
1404+     LOG_DBG (" [%s] n_nodes: %u, n_tensors: %u\n " 
13991405
14001406    size_t  buf_size = ggml_tensor_overhead ()*(n_nodes + n_tensors) + ggml_graph_overhead_custom (n_nodes, false );
14011407
@@ -1518,7 +1524,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
15181524    }
15191525    //  the second command sent by the client must be HELLO
15201526    if  (cmd != RPC_CMD_HELLO) {
1521-         fprintf (stderr,  " Expected HELLO command, update client\n " 
1527+         GGML_LOG_ERROR ( " Expected HELLO command, update client\n " 
15221528        return ;
15231529    }
15241530    if  (!recv_msg (sockfd, nullptr , 0 )) {
@@ -1535,7 +1541,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
15351541        }
15361542        if  (cmd >= RPC_CMD_COUNT) {
15371543            //  fail fast if the command is invalid
1538-             fprintf (stderr,  " Unknown command: %d\n " 
1544+             GGML_LOG_ERROR ( " Unknown command: %d\n " 
15391545            break ;
15401546        }
15411547        switch  (cmd) {
@@ -1723,7 +1729,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
17231729                break ;
17241730            }
17251731            default : {
1726-                 fprintf (stderr,  " Unknown command: %d\n " 
1732+                 GGML_LOG_ERROR ( " Unknown command: %d\n " 
17271733                return ;
17281734            }
17291735        }
0 commit comments