@@ -683,7 +683,7 @@ struct cmd_params_instance {
683683 bool cpu_strict;
684684 int poll;
685685 int n_gpu_layers;
686- std::string rpc_servers ;
686+ std::string rpc_servers_str ;
687687 llama_split_mode split_mode;
688688 int main_gpu;
689689 bool no_kv_offload;
@@ -696,8 +696,45 @@ struct cmd_params_instance {
696696 llama_model_params mparams = llama_model_default_params ();
697697
698698 mparams.n_gpu_layers = n_gpu_layers;
699- if (!rpc_servers.empty ()) {
700- mparams.rpc_servers = rpc_servers.c_str ();
699+ if (!rpc_servers_str.empty ()) {
700+ std::vector<std::string> rpc_servers;
701+ std::string servers (rpc_servers_str);
702+ size_t pos = 0 ;
703+ while ((pos = servers.find (' ,' )) != std::string::npos) {
704+ std::string server = servers.substr (0 , pos);
705+ rpc_servers.push_back (server);
706+ servers.erase (0 , pos + 1 );
707+ }
708+ rpc_servers.push_back (servers);
709+
710+ // add RPC devices
711+ if (!rpc_servers.empty ()) {
712+ ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name (" RPC" );
713+ if (!rpc_reg) {
714+ fprintf (stderr, " %s: failed to find RPC backend\n " , __func__);
715+ exit (1 );
716+ }
717+
718+ typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t )(const char * endpoint);
719+ ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t ) ggml_backend_reg_get_proc_address (rpc_reg, " ggml_backend_rpc_add_device" );
720+ if (!ggml_backend_rpc_add_device_fn) {
721+ fprintf (stderr, " %s: failed to find RPC device add function\n " , __func__);
722+ exit (1 );
723+ }
724+ static std::vector<ggml_backend_dev_t > devices;
725+ devices.clear ();
726+ for (const std::string & server : rpc_servers) {
727+ ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn (server.c_str ());
728+ if (dev) {
729+ devices.push_back (dev);
730+ } else {
731+ fprintf (stderr, " %s: failed to add RPC device for server '%s'\n " , __func__, server.c_str ());
732+ exit (1 );
733+ }
734+ }
735+ devices.push_back (nullptr );
736+ mparams.devices = devices.data ();
737+ }
701738 }
702739 mparams.split_mode = split_mode;
703740 mparams.main_gpu = main_gpu;
@@ -708,7 +745,7 @@ struct cmd_params_instance {
708745 }
709746
710747 bool equal_mparams (const cmd_params_instance & other) const {
711- return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers &&
748+ return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
712749 split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
713750 tensor_split == other.tensor_split ;
714751 }
0 commit comments