@@ -211,34 +211,6 @@ static std::vector<ggml_backend_dev_t> register_rpc_device_list(const std::strin
211211 return devices;
212212}
213213
214- [[noreturn]] static void print_available_devices_and_exit () {
215- std::vector<ggml_backend_dev_t > devices;
216- for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
217- auto * dev = ggml_backend_dev_get (i);
218- auto ty = ggml_backend_dev_type (dev);
219- if (ty == GGML_BACKEND_DEVICE_TYPE_CPU) {
220- continue ;
221- }
222- devices.push_back (dev);
223- }
224-
225- printf (" Available devices:\n " );
226- if (devices.empty ()) {
227- printf (" (none)\n " );
228- }
229- for (auto * dev : devices) {
230- size_t free = 0 ;
231- size_t total = 0 ;
232- ggml_backend_dev_memory (dev, &free, &total);
233- printf (" %s: %s (%zu MiB, %zu MiB free)\n " ,
234- ggml_backend_dev_name (dev),
235- ggml_backend_dev_description (dev),
236- total / 1024 / 1024 ,
237- free / 1024 / 1024 );
238- }
239- exit (0 );
240- }
241-
242214static std::string devices_to_string (const std::vector<ggml_backend_dev_t > & devices) {
243215 if (devices.empty ()) {
244216 return " auto" ;
@@ -375,8 +347,6 @@ struct cmd_params {
375347 std::vector<int > poll;
376348 std::vector<int > n_gpu_layers;
377349 std::vector<int > n_cpu_moe;
378- std::vector<std::string> rpc_servers;
379- std::vector<std::vector<ggml_backend_dev_t >> rpc_device_sets;
380350 std::vector<llama_split_mode> split_mode;
381351 std::vector<int > main_gpu;
382352 std::vector<bool > no_kv_offload;
@@ -396,7 +366,6 @@ struct cmd_params {
396366 bool no_warmup;
397367 output_formats output_format;
398368 output_formats output_format_stderr;
399- bool list_devices;
400369};
401370
402371static const cmd_params cmd_params_defaults = {
@@ -415,13 +384,11 @@ static const cmd_params cmd_params_defaults = {
415384 /* poll */ { 50 },
416385 /* n_gpu_layers */ { 99 },
417386 /* n_cpu_moe */ { 0 },
418- /* rpc_servers */ { " " },
419- /* rpc_device_sets */ { std::vector<ggml_backend_dev_t >() },
420387 /* split_mode */ { LLAMA_SPLIT_MODE_LAYER },
421388 /* main_gpu */ { 0 },
422389 /* no_kv_offload */ { false },
423390 /* flash_attn */ { false },
424- /* devices */ { std::vector< ggml_backend_dev_t >() },
391+ /* devices */ { {} },
425392 /* tensor_split */ { std::vector<float >(llama_max_devices (), 0 .0f ) },
426393 /* tensor_buft_overrides*/ { std::vector<llama_model_tensor_buft_override>{ { nullptr , nullptr } } },
427394 /* use_mmap */ { true },
@@ -436,7 +403,6 @@ static const cmd_params cmd_params_defaults = {
436403 /* no_warmup */ false ,
437404 /* output_format */ MARKDOWN,
438405 /* output_format_stderr */ NONE,
439- /* list_devices */ false ,
440406};
441407
442408static void print_usage (int /* argc */ , char ** argv) {
@@ -459,6 +425,9 @@ static void print_usage(int /* argc */, char ** argv) {
459425 printf (" -v, --verbose verbose output\n " );
460426 printf (" --progress print test progress indicators\n " );
461427 printf (" --no-warmup skip warmup runs before benchmarking\n " );
428+ if (llama_supports_rpc ()) {
429+ printf (" -rpc, --rpc <rpc_servers> register RPC devices (comma separated)\n " );
430+ }
462431 printf (" \n " );
463432 printf (" test parameters:\n " );
464433 printf (" -m, --model <filename> (default: %s)\n " , join (cmd_params_defaults.model , " ," ).c_str ());
@@ -488,10 +457,6 @@ static void print_usage(int /* argc */, char ** argv) {
488457 join (cmd_params_defaults.n_gpu_layers , " ," ).c_str ());
489458 printf (" -ncmoe, --n-cpu-moe <n> (default: %s)\n " ,
490459 join (cmd_params_defaults.n_cpu_moe , " ," ).c_str ());
491- if (llama_supports_rpc ()) {
492- printf (" -rpc, --rpc <rpc_servers> (default: %s)\n " ,
493- join (cmd_params_defaults.rpc_servers , " ," ).c_str ());
494- }
495460 printf (" -sm, --split-mode <none|layer|row> (default: %s)\n " ,
496461 join (transform_to_str (cmd_params_defaults.split_mode , split_mode_str), " ," ).c_str ());
497462 printf (" -mg, --main-gpu <i> (default: %s)\n " ,
@@ -561,7 +526,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
561526 params.delay = cmd_params_defaults.delay ;
562527 params.progress = cmd_params_defaults.progress ;
563528 params.no_warmup = cmd_params_defaults.no_warmup ;
564- params.list_devices = cmd_params_defaults.list_devices ;
565529
566530 for (int i = 1 ; i < argc; i++) {
567531 arg = argv[i];
@@ -676,7 +640,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
676640 try {
677641 params.devices .push_back (parse_devices_arg (combo));
678642 } catch (const std::exception & e) {
679- fprintf (stderr, " error: %s\\ n" , e.what ());
643+ fprintf (stderr, " error: %s\n " , e.what ());
680644 invalid_param = true ;
681645 break ;
682646 }
@@ -685,7 +649,23 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
685649 break ;
686650 }
687651 } else if (arg == " --list-devices" ) {
688- params.list_devices = true ;
652+ std::vector<ggml_backend_dev_t > devices;
653+ for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
654+ auto * dev = ggml_backend_dev_get (i);
655+ if (ggml_backend_dev_type (dev) != GGML_BACKEND_DEVICE_TYPE_CPU) {
656+ devices.push_back (dev);
657+ }
658+ }
659+ printf (" Available devices:\n " );
660+ if (devices.empty ()) {
661+ printf (" (none)\n " );
662+ }
663+ for (auto * dev : devices) {
664+ size_t free, total;
665+ ggml_backend_dev_memory (dev, &free, &total);
666+ printf (" %s: %s (%zu MiB, %zu MiB free)\n " , ggml_backend_dev_name (dev), ggml_backend_dev_description (dev), total / 1024 / 1024 , free / 1024 / 1024 );
667+ }
668+ exit (0 );
689669 } else if (arg == " -t" || arg == " --threads" ) {
690670 if (++i >= argc) {
691671 invalid_param = true ;
@@ -734,9 +714,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
734714 break ;
735715 }
736716 try {
737- auto devices = register_rpc_device_list (argv[i]);
738- params.rpc_servers .push_back (argv[i]);
739- params.rpc_device_sets .push_back (devices);
717+ register_rpc_device_list (argv[i]);
740718 } catch (const std::exception & e) {
741719 fprintf (stderr, " error: %s\n " , e.what ());
742720 invalid_param = true ;
@@ -1016,12 +994,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
1016994 if (params.n_cpu_moe .empty ()) {
1017995 params.n_cpu_moe = cmd_params_defaults.n_cpu_moe ;
1018996 }
1019- if (params.rpc_servers .empty ()) {
1020- params.rpc_servers = cmd_params_defaults.rpc_servers ;
1021- }
1022- if (params.rpc_device_sets .empty ()) {
1023- params.rpc_device_sets = cmd_params_defaults.rpc_device_sets ;
1024- }
1025997 if (params.split_mode .empty ()) {
1026998 params.split_mode = cmd_params_defaults.split_mode ;
1027999 }
@@ -1083,8 +1055,6 @@ struct cmd_params_instance {
10831055 int poll;
10841056 int n_gpu_layers;
10851057 int n_cpu_moe;
1086- std::string rpc_servers_str;
1087- std::vector<ggml_backend_dev_t > rpc_devices;
10881058 llama_split_mode split_mode;
10891059 int main_gpu;
10901060 bool no_kv_offload;
@@ -1102,24 +1072,6 @@ struct cmd_params_instance {
11021072 mparams.n_gpu_layers = n_gpu_layers;
11031073 if (!devices.empty ()) {
11041074 mparams.devices = const_cast <ggml_backend_dev_t *>(devices.data ());
1105- } else if (!rpc_devices.empty ()) {
1106- static std::vector<ggml_backend_dev_t > merged_devices;
1107- merged_devices.clear ();
1108- merged_devices.insert (merged_devices.end (), rpc_devices.begin (), rpc_devices.end ());
1109-
1110- for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
1111- ggml_backend_dev_t dev = ggml_backend_dev_get (i);
1112- auto dev_type = ggml_backend_dev_type (dev);
1113- if (dev_type == GGML_BACKEND_DEVICE_TYPE_CPU || dev_type == GGML_BACKEND_DEVICE_TYPE_ACCEL) {
1114- continue ;
1115- }
1116- if (std::find (merged_devices.begin (), merged_devices.end (), dev) == merged_devices.end ()) {
1117- merged_devices.push_back (dev);
1118- }
1119- }
1120-
1121- merged_devices.push_back (nullptr );
1122- mparams.devices = merged_devices.data ();
11231075 }
11241076 mparams.split_mode = split_mode;
11251077 mparams.main_gpu = main_gpu;
@@ -1167,7 +1119,7 @@ struct cmd_params_instance {
11671119
11681120 bool equal_mparams (const cmd_params_instance & other) const {
11691121 return model == other.model && n_gpu_layers == other.n_gpu_layers && n_cpu_moe == other.n_cpu_moe &&
1170- rpc_servers_str == other. rpc_servers_str && rpc_devices == other. rpc_devices && split_mode == other.split_mode &&
1122+ split_mode == other.split_mode &&
11711123 main_gpu == other.main_gpu && use_mmap == other.use_mmap && tensor_split == other.tensor_split &&
11721124 devices == other.devices &&
11731125 vec_tensor_buft_override_equal (tensor_buft_overrides, other.tensor_buft_overrides );
@@ -1199,7 +1151,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
11991151 for (const auto & m : params.model )
12001152 for (const auto & nl : params.n_gpu_layers )
12011153 for (const auto & ncmoe : params.n_cpu_moe )
1202- for (size_t rpc_idx = 0 ; rpc_idx < params.rpc_servers .size (); ++rpc_idx)
12031154 for (const auto & sm : params.split_mode )
12041155 for (const auto & mg : params.main_gpu )
12051156 for (const auto & devs : params.devices )
@@ -1219,9 +1170,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12191170 for (const auto & cs : params.cpu_strict )
12201171 for (const auto & nd : params.n_depth )
12211172 for (const auto & pl : params.poll ) {
1222- const auto & rpc = params.rpc_servers [rpc_idx];
1223- const auto & rpc_set = params.rpc_device_sets [rpc_idx];
1224-
12251173 for (const auto & n_prompt : params.n_prompt ) {
12261174 if (n_prompt == 0 ) {
12271175 continue ;
@@ -1241,8 +1189,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12411189 /* .poll = */ pl,
12421190 /* .n_gpu_layers = */ nl,
12431191 /* .n_cpu_moe = */ ncmoe,
1244- /* .rpc_servers = */ rpc,
1245- /* .rpc_devices = */ rpc_set,
12461192 /* .split_mode = */ sm,
12471193 /* .main_gpu = */ mg,
12481194 /* .no_kv_offload= */ nkvo,
@@ -1276,8 +1222,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12761222 /* .poll = */ pl,
12771223 /* .n_gpu_layers = */ nl,
12781224 /* .n_cpu_moe = */ ncmoe,
1279- /* .rpc_servers = */ rpc,
1280- /* .rpc_devices = */ rpc_set,
12811225 /* .split_mode = */ sm,
12821226 /* .main_gpu = */ mg,
12831227 /* .no_kv_offload= */ nkvo,
@@ -1311,8 +1255,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
13111255 /* .poll = */ pl,
13121256 /* .n_gpu_layers = */ nl,
13131257 /* .n_cpu_moe = */ ncmoe,
1314- /* .rpc_servers = */ rpc,
1315- /* .rpc_devices = */ rpc_set,
13161258 /* .split_mode = */ sm,
13171259 /* .main_gpu = */ mg,
13181260 /* .no_kv_offload= */ nkvo,
@@ -2050,19 +1992,6 @@ int main(int argc, char ** argv) {
20501992
20511993 cmd_params params = parse_cmd_params (argc, argv);
20521994
2053- if (params.list_devices ) {
2054- for (const auto & rpc : params.rpc_servers ) {
2055- if (!rpc.empty ()) {
2056- try {
2057- register_rpc_device_list (rpc);
2058- } catch (const std::exception & e) {
2059- fprintf (stderr, " warning: %s\n " , e.what ());
2060- }
2061- }
2062- }
2063- print_available_devices_and_exit ();
2064- }
2065-
20661995 auto * cpu_dev = ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_CPU);
20671996 if (!cpu_dev) {
20681997 fprintf (stderr, " %s: error: CPU backend is not loaded\n " , __func__);
0 commit comments