@@ -298,6 +298,27 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
298298 print_options (specific_options);
299299}
300300
301+ static std::vector<ggml_backend_dev_t > parse_device_list (const std::string & value) {
302+ std::vector<ggml_backend_dev_t > devices;
303+ auto dev_names = string_split<std::string>(value, ' ,' );
304+ if (dev_names.empty ()) {
305+ throw std::invalid_argument (" no devices specified" );
306+ }
307+ if (dev_names.size () == 1 && dev_names[0 ] == " none" ) {
308+ devices.push_back (nullptr );
309+ } else {
310+ for (const auto & device : dev_names) {
311+ auto * dev = ggml_backend_dev_by_name (device.c_str ());
312+ if (!dev || ggml_backend_dev_type (dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
313+ throw std::invalid_argument (string_format (" invalid device: %s" , device.c_str ()));
314+ }
315+ devices.push_back (dev);
316+ }
317+ devices.push_back (nullptr );
318+ }
319+ return devices;
320+ }
321+
301322bool common_params_parse (int argc, char ** argv, common_params & params, llama_example ex, void (*print_usage)(int , char **)) {
302323 auto ctx_arg = common_params_parser_init (params, ex, print_usage);
303324 const common_params params_org = ctx_arg.params ; // the example can modify the default params
@@ -324,6 +345,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
324345}
325346
326347common_params_context common_params_parser_init (common_params & params, llama_example ex, void (*print_usage)(int , char **)) {
348+ // load dynamic backends
349+ ggml_backend_load_all ();
350+
327351 common_params_context ctx_arg (params);
328352 ctx_arg.print_usage = print_usage;
329353 ctx_arg.ex = ex;
@@ -1312,6 +1336,30 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13121336 else { throw std::invalid_argument (" invalid value" ); }
13131337 }
13141338 ).set_env (" LLAMA_ARG_NUMA" ));
1339+ add_opt (common_arg (
1340+ {" -dev" , " --device" }, " <dev1,dev2,..>" ,
1341+ " comma-separated list of devices to use for offloading (none = don't offload)\n "
1342+ " use --list-devices to see a list of available devices" ,
1343+ [](common_params & params, const std::string & value) {
1344+ params.devices = parse_device_list (value);
1345+ }
1346+ ).set_env (" LLAMA_ARG_DEVICE" ));
1347+ add_opt (common_arg (
1348+ {" --list-devices" },
1349+ " print list of available devices and exit" ,
1350+ [](common_params &) {
1351+ printf (" Available devices:\n " );
1352+ for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
1353+ auto * dev = ggml_backend_dev_get (i);
1354+ if (ggml_backend_dev_type (dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
1355+ size_t free, total;
1356+ ggml_backend_dev_memory (dev, &free, &total);
1357+ printf (" %s: %s (%zu MiB, %zu MiB free)\n " , ggml_backend_dev_name (dev), ggml_backend_dev_description (dev), total / 1024 / 1024 , free / 1024 / 1024 );
1358+ }
1359+ }
1360+ exit (0 );
1361+ }
1362+ ));
13151363 add_opt (common_arg (
13161364 {" -ngl" , " --gpu-layers" , " --n-gpu-layers" }, " N" ,
13171365 " number of layers to store in VRAM" ,
@@ -1336,10 +1384,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13361384 } else if (arg_next == " layer" ) {
13371385 params.split_mode = LLAMA_SPLIT_MODE_LAYER;
13381386 } else if (arg_next == " row" ) {
1339- #ifdef GGML_USE_SYCL
1340- fprintf (stderr, " warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\n Exit!\n " );
1341- exit (1 );
1342- #endif // GGML_USE_SYCL
13431387 params.split_mode = LLAMA_SPLIT_MODE_ROW;
13441388 } else {
13451389 throw std::invalid_argument (" invalid value" );
@@ -2042,6 +2086,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20422086 params.speculative .n_ctx = value;
20432087 }
20442088 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2089+ add_opt (common_arg (
2090+ {" -devd" , " --device-draft" }, " <dev1,dev2,..>" ,
2091+ " comma-separated list of devices to use for offloading the draft model (none = don't offload)\n "
2092+ " use --list-devices to see a list of available devices" ,
2093+ [](common_params & params, const std::string & value) {
2094+ params.speculative .devices = parse_device_list (value);
2095+ }
2096+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
20452097 add_opt (common_arg (
20462098 {" -ngld" , " --gpu-layers-draft" , " --n-gpu-layers-draft" }, " N" ,
20472099 " number of layers to store in VRAM for the draft model" ,
0 commit comments