@@ -79,6 +79,7 @@ class Opt {
7979 ctx_params = llama_context_default_params ();
8080 model_params = llama_model_default_params ();
8181 context_size_default = ctx_params.n_batch ;
82+ n_threads_default = ctx_params.n_threads ;
8283 ngl_default = model_params.n_gpu_layers ;
8384 common_params_sampling sampling;
8485 temperature_default = sampling.temp ;
@@ -104,6 +105,7 @@ class Opt {
104105
105106 ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
106107 ctx_params.n_ctx = ctx_params.n_batch ;
108+ ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default;
107109 model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
108110 temperature = temperature >= 0 ? temperature : temperature_default;
109111
@@ -116,12 +118,12 @@ class Opt {
116118 std::string chat_template_file;
117119 std::string user;
118120 bool use_jinja = false ;
119- int context_size = -1 , ngl = -1 ;
121+ int context_size = -1 , ngl = -1 , n_threads = - 1 ;
120122 float temperature = -1 ;
121123 bool verbose = false ;
122124
123125 private:
124- int context_size_default = -1 , ngl_default = -1 ;
126+ int context_size_default = -1 , ngl_default = -1 , n_threads_default = - 1 ;
125127 float temperature_default = -1 ;
126128 bool help = false ;
127129
@@ -159,53 +161,94 @@ class Opt {
159161 return 0 ;
160162 }
161163
164+ int parse_options_with_value (int argc, const char ** argv, int & i, bool & options_parsing) {
165+ if (options_parsing && (strcmp (argv[i], " -c" ) == 0 || strcmp (argv[i], " --context-size" ) == 0 )) {
166+ if (handle_option_with_value (argc, argv, i, context_size) == 1 ) {
167+ return 1 ;
168+ }
169+ } else if (options_parsing &&
170+ (strcmp (argv[i], " -n" ) == 0 || strcmp (argv[i], " -ngl" ) == 0 || strcmp (argv[i], " --ngl" ) == 0 )) {
171+ if (handle_option_with_value (argc, argv, i, ngl) == 1 ) {
172+ return 1 ;
173+ }
174+ } else if (options_parsing && (strcmp (argv[i], " -t" ) == 0 || strcmp (argv[i], " --threads" ) == 0 )) {
175+ if (handle_option_with_value (argc, argv, i, n_threads) == 1 ) {
176+ return 1 ;
177+ }
178+ } else if (options_parsing && strcmp (argv[i], " --temp" ) == 0 ) {
179+ if (handle_option_with_value (argc, argv, i, temperature) == 1 ) {
180+ return 1 ;
181+ }
182+ } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ) {
183+ if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
184+ return 1 ;
185+ }
186+ use_jinja = true ;
187+ } else {
188+ return 2 ;
189+ }
190+
191+ return 0 ;
192+ }
193+
194+ int parse_options (const char ** argv, int & i, bool & options_parsing) {
195+ if (options_parsing && (parse_flag (argv, i, " -v" , " --verbose" ) || parse_flag (argv, i, " -v" , " --log-verbose" ))) {
196+ verbose = true ;
197+ } else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
198+ use_jinja = true ;
199+ } else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
200+ help = true ;
201+ return 0 ;
202+ } else if (options_parsing && strcmp (argv[i], " --" ) == 0 ) {
203+ options_parsing = false ;
204+ } else {
205+ return 2 ;
206+ }
207+
208+ return 0 ;
209+ }
210+
211+ int parse_positional_args (const char ** argv, int & i, int & positional_args_i) {
212+ if (positional_args_i == 0 ) {
213+ if (!argv[i][0 ] || argv[i][0 ] == ' -' ) {
214+ return 1 ;
215+ }
216+
217+ ++positional_args_i;
218+ model_ = argv[i];
219+ } else if (positional_args_i == 1 ) {
220+ ++positional_args_i;
221+ user = argv[i];
222+ } else {
223+ user += " " + std::string (argv[i]);
224+ }
225+
226+ return 0 ;
227+ }
228+
162229 int parse (int argc, const char ** argv) {
163230 bool options_parsing = true ;
164231 for (int i = 1 , positional_args_i = 0 ; i < argc; ++i) {
165- if (options_parsing && (strcmp (argv[i], " -c" ) == 0 || strcmp (argv[i], " --context-size" ) == 0 )) {
166- if (handle_option_with_value (argc, argv, i, context_size) == 1 ) {
167- return 1 ;
168- }
169- } else if (options_parsing &&
170- (strcmp (argv[i], " -n" ) == 0 || strcmp (argv[i], " -ngl" ) == 0 || strcmp (argv[i], " --ngl" ) == 0 )) {
171- if (handle_option_with_value (argc, argv, i, ngl) == 1 ) {
172- return 1 ;
173- }
174- } else if (options_parsing && strcmp (argv[i], " --temp" ) == 0 ) {
175- if (handle_option_with_value (argc, argv, i, temperature) == 1 ) {
176- return 1 ;
177- }
178- } else if (options_parsing &&
179- (parse_flag (argv, i, " -v" , " --verbose" ) || parse_flag (argv, i, " -v" , " --log-verbose" ))) {
180- verbose = true ;
181- } else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
182- use_jinja = true ;
183- } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ){
184- if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
185- return 1 ;
186- }
187- use_jinja = true ;
188- } else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
189- help = true ;
190- return 0 ;
191- } else if (options_parsing && strcmp (argv[i], " --" ) == 0 ) {
192- options_parsing = false ;
193- } else if (positional_args_i == 0 ) {
194- if (!argv[i][0 ] || argv[i][0 ] == ' -' ) {
195- return 1 ;
196- }
197-
198- ++positional_args_i;
199- model_ = argv[i];
200- } else if (positional_args_i == 1 ) {
201- ++positional_args_i;
202- user = argv[i];
203- } else {
204- user += " " + std::string (argv[i]);
232+ int ret = parse_options_with_value (argc, argv, i, options_parsing);
233+ if (ret == 0 ) {
234+ continue ;
235+ } else if (ret == 1 ) {
236+ return ret;
237+ }
238+
239+ ret = parse_options (argv, i, options_parsing);
240+ if (ret == 0 ) {
241+ continue ;
242+ } else if (ret == 1 ) {
243+ return ret;
244+ }
245+
246+ if (parse_positional_args (argv, i, positional_args_i)) {
247+ return 1 ;
205248 }
206249 }
207250
208- if (model_.empty ()){
251+ if (model_.empty ()) {
209252 return 1 ;
210253 }
211254
@@ -232,6 +275,8 @@ class Opt {
232275 " Number of GPU layers (default: %d)\n "
233276 " --temp <value>\n "
234277 " Temperature (default: %.1f)\n "
278+ " -t, --threads <value>\n "
279+ " Number of threads to use during generation (default: %d)\n "
235280 " -v, --verbose, --log-verbose\n "
236281 " Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n "
237282 " -h, --help\n "
@@ -260,7 +305,7 @@ class Opt {
260305 " llama-run file://some-file3.gguf\n "
261306 " llama-run --ngl 999 some-file4.gguf\n "
262307 " llama-run --ngl 999 some-file5.gguf Hello World\n " ,
263- context_size_default, ngl_default, temperature_default);
308+ context_size_default, ngl_default, temperature_default, n_threads_default );
264309 }
265310};
266311
0 commit comments