@@ -683,7 +683,7 @@ struct cmd_params_instance {
683683 bool cpu_strict;
684684 int poll;
685685 int n_gpu_layers;
686- std::string rpc_servers ;
686+ std::string rpc_servers_str ;
687687 llama_split_mode split_mode;
688688 int main_gpu;
689689 bool no_kv_offload;
@@ -696,8 +696,37 @@ struct cmd_params_instance {
696696 llama_model_params mparams = llama_model_default_params ();
697697
698698 mparams.n_gpu_layers = n_gpu_layers;
699- if (!rpc_servers.empty ()) {
700- mparams.rpc_servers = rpc_servers.c_str ();
699+ if (!rpc_servers_str.empty ()) {
700+ auto rpc_servers = string_split<std::string>(rpc_servers_str, ' ,' );
701+
702+ // add RPC devices
703+ if (!rpc_servers.empty ()) {
704+ ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name (" RPC" );
705+ if (!rpc_reg) {
706+ fprintf (stderr, " %s: failed to find RPC backend\n " , __func__);
707+ exit (1 );
708+ }
709+
710+ typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t )(const char * endpoint);
711+ ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t ) ggml_backend_reg_get_proc_address (rpc_reg, " ggml_backend_rpc_add_device" );
712+ if (!ggml_backend_rpc_add_device_fn) {
713+ fprintf (stderr, " %s: failed to find RPC device add function\n " , __func__);
714+ exit (1 );
715+ }
716+ static std::vector<ggml_backend_dev_t > devices;
717+ devices.clear ();
718+ for (const std::string & server : rpc_servers) {
719+ ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn (server.c_str ());
720+ if (dev) {
721+ devices.push_back (dev);
722+ } else {
723+ fprintf (stderr, " %s: failed to add RPC device for server '%s'\n " , __func__, server.c_str ());
724+ exit (1 );
725+ }
726+ }
727+ devices.push_back (nullptr );
728+ mparams.devices = devices.data ();
729+ }
701730 }
702731 mparams.split_mode = split_mode;
703732 mparams.main_gpu = main_gpu;
@@ -708,7 +737,7 @@ struct cmd_params_instance {
708737 }
709738
710739 bool equal_mparams (const cmd_params_instance & other) const {
711- return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers &&
740+ return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
712741 split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
713742 tensor_split == other.tensor_split ;
714743 }
0 commit comments