@@ -55,6 +55,17 @@ static int printe(const char * fmt, ...) {
5555class Opt {
5656 public:
5757 int init (int argc, const char ** argv) {
58+ context_size_default = llama_context_default_params ().n_batch ;
59+ ngl_default = llama_model_default_params ().n_gpu_layers ;
60+ common_params_sampling sampling;
61+ temperature_default = sampling.temp ;
62+
63+ if (argc < 2 ) {
64+ printe (" Error: No arguments provided.\n " );
65+ help ();
66+ return 1 ;
67+ }
68+
5869 // Parse arguments
5970 if (parse (argc, argv)) {
6071 printe (" Error: Failed to parse arguments.\n " );
@@ -73,7 +84,10 @@ class Opt {
7384
7485 std::string model_;
7586 std::string user_;
76- int context_size_ = -1 , ngl_ = -1 ;
87+ int context_size_default = -1 , ngl_default = -1 ;
88+ float temperature_default = -1 ;
89+ int context_size_ = -1 , ngl_ = -1 ;
90+ float temperature_ = -1 ;
7791 bool verbose_ = false ;
7892
7993 private:
@@ -89,6 +103,17 @@ class Opt {
89103 }
90104
91105 option_value = std::atoi (argv[++i]);
106+
107+ return 0 ;
108+ }
109+
110+ int handle_option_with_value (int argc, const char ** argv, int & i, float & option_value) {
111+ if (i + 1 >= argc) {
112+ return 1 ;
113+ }
114+
115+ option_value = std::atof (argv[++i]);
116+
92117 return 0 ;
93118 }
94119
@@ -103,6 +128,10 @@ class Opt {
103128 if (handle_option_with_value (argc, argv, i, ngl_) == 1 ) {
104129 return 1 ;
105130 }
131+ } else if (options_parsing && strcmp (argv[i], " --temperature" ) == 0 ) {
132+ if (handle_option_with_value (argc, argv, i, temperature_) == 1 ) {
133+ return 1 ;
134+ }
106135 } else if (options_parsing &&
107136 (parse_flag (argv, i, " -v" , " --verbose" ) || parse_flag (argv, i, " -v" , " --log-verbose" ))) {
108137 verbose_ = true ;
@@ -142,6 +171,8 @@ class Opt {
142171 " Context size (default: %d)\n "
143172 " -n, --ngl <value>\n "
144173 " Number of GPU layers (default: %d)\n "
174+ " --temp <value>\n "
175+ " Temperature (default: %.1f)\n "
145176 " -v, --verbose, --log-verbose\n "
146177 " Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n "
147178 " -h, --help\n "
@@ -170,7 +201,7 @@ class Opt {
170201 " llama-run file://some-file3.gguf\n "
171202 " llama-run --ngl 999 some-file4.gguf\n "
172203 " llama-run --ngl 999 some-file5.gguf Hello World\n " ,
173- llama_context_default_params (). n_batch , llama_model_default_params (). n_gpu_layers );
204+ context_size_default, ngl_default, temperature_default );
174205 }
175206};
176207
@@ -495,12 +526,12 @@ class LlamaData {
495526 return 1 ;
496527 }
497528
498- context = initialize_context (model, opt. context_size_ );
529+ context = initialize_context (model, opt);
499530 if (!context) {
500531 return 1 ;
501532 }
502533
503- sampler = initialize_sampler ();
534+ sampler = initialize_sampler (opt );
504535 return 0 ;
505536 }
506537
@@ -620,7 +651,7 @@ class LlamaData {
620651 llama_model_ptr initialize_model (Opt & opt) {
621652 ggml_backend_load_all ();
622653 llama_model_params model_params = llama_model_default_params ();
623- model_params.n_gpu_layers = opt.ngl_ >= 0 ? opt.ngl_ : model_params. n_gpu_layers ;
654+ model_params.n_gpu_layers = opt.ngl_ >= 0 ? opt.ngl_ : opt. ngl_default ;
624655 resolve_model (opt.model_ );
625656 printe (
626657 " \r %*s"
@@ -636,9 +667,9 @@ class LlamaData {
636667 }
637668
638669 // Initializes the context with the specified parameters
639- llama_context_ptr initialize_context (const llama_model_ptr & model, const int n_ctx ) {
670+ llama_context_ptr initialize_context (const llama_model_ptr & model, const Opt & opt ) {
640671 llama_context_params ctx_params = llama_context_default_params ();
641- ctx_params.n_ctx = ctx_params.n_batch = n_ctx >= 0 ? n_ctx : ctx_params. n_batch ;
672+ ctx_params.n_ctx = ctx_params.n_batch = opt. context_size_ >= 0 ? opt. context_size_ : opt. context_size_default ;
642673 llama_context_ptr context (llama_new_context_with_model (model.get (), ctx_params));
643674 if (!context) {
644675 printe (" %s: error: failed to create the llama_context\n " , __func__);
@@ -648,10 +679,11 @@ class LlamaData {
648679 }
649680
650681 // Initializes and configures the sampler
651- llama_sampler_ptr initialize_sampler () {
682+ llama_sampler_ptr initialize_sampler (const Opt & opt ) {
652683 llama_sampler_ptr sampler (llama_sampler_chain_init (llama_sampler_chain_default_params ()));
653684 llama_sampler_chain_add (sampler.get (), llama_sampler_init_min_p (0 .05f , 1 ));
654- llama_sampler_chain_add (sampler.get (), llama_sampler_init_temp (0 .8f ));
685+ llama_sampler_chain_add (
686+ sampler.get (), llama_sampler_init_temp (opt.temperature_ >= 0 ? opt.temperature_ : opt.temperature_default ));
655687 llama_sampler_chain_add (sampler.get (), llama_sampler_init_dist (LLAMA_DEFAULT_SEED));
656688
657689 return sampler;
0 commit comments