File tree Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Original file line number Diff line number Diff line change @@ -1303,10 +1303,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
13031303 const int act_gpu_layers = devices.empty () ? 0 : std::min (n_gpu_layers, (int )n_layer + 1 );
13041304 auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
13051305 if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
1306+ LLAMA_LOG_DEBUG (" load_tensors: layer %3d assigned to device %s\n " , il, ggml_backend_dev_name (cpu_dev));
13061307 return {cpu_dev, &pimpl->cpu_buft_list };
13071308 }
13081309 const int layer_gpu = std::upper_bound (splits.begin (), splits.begin () + n_devices (), float (il - i_gpu_start)/act_gpu_layers) - splits.begin ();
13091310 auto * dev = devices.at (layer_gpu);
1311+ LLAMA_LOG_DEBUG (" load_tensors: layer %3d assigned to device %s\n " , il, ggml_backend_dev_name (dev));
13101312 return {dev, &pimpl->gpu_buft_list .at (dev)};
13111313 };
13121314
Original file line number Diff line number Diff line change @@ -9405,6 +9405,7 @@ static struct llama_model * llama_model_load_from_file_impl(
94059405 model->devices .push_back (*dev);
94069406 }
94079407 } else {
9408+ std::vector<ggml_backend_dev_t > rpc_servers;
94089409 // use all available devices
94099410 for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
94109411 ggml_backend_dev_t dev = ggml_backend_dev_get (i);
@@ -9415,10 +9416,19 @@ static struct llama_model * llama_model_load_from_file_impl(
94159416 break ;
94169417
94179418 case GGML_BACKEND_DEVICE_TYPE_GPU:
9418- model->devices .push_back (dev);
9419+ ggml_backend_reg_t reg = ggml_backend_dev_backend_reg (dev);
9420+ if (ggml_backend_reg_name (reg) == std::string (" RPC" )) {
9421+ rpc_servers.push_back (dev);
9422+ } else {
9423+ model->devices .push_back (dev);
9424+ }
94199425 break ;
94209426 }
94219427 }
9428+ // add RPC servers at the front of the list
9429+ if (!rpc_servers.empty ()) {
9430+ model->devices .insert (model->devices .begin (), rpc_servers.begin (), rpc_servers.end ());
9431+ }
94229432 }
94239433
94249434 // if using single GPU mode, remove all except the main GPU
You can’t perform that action at this time.
0 commit comments