2323#include < string>
2424#include < vector>
2525
26- #include " common.h"
2726#include " json.hpp"
28- #include " llama-cpp .h"
27+ #include " opt .h"
2928
3029#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
3130[[noreturn]] static void sigint_handler (int ) {
@@ -52,178 +51,6 @@ static std::string fmt(const char * fmt, ...) {
5251 return buf;
5352}
5453
55- GGML_ATTRIBUTE_FORMAT (1 , 2 )
56- static int printe(const char * fmt, ...) {
57- va_list args;
58- va_start (args, fmt);
59- const int ret = vfprintf (stderr, fmt, args);
60- va_end (args);
61-
62- return ret;
63- }
64-
65- class Opt {
66- public:
67- int init (int argc, const char ** argv) {
68- ctx_params = llama_context_default_params ();
69- model_params = llama_model_default_params ();
70- context_size_default = ctx_params.n_batch ;
71- ngl_default = model_params.n_gpu_layers ;
72- common_params_sampling sampling;
73- temperature_default = sampling.temp ;
74-
75- if (argc < 2 ) {
76- printe (" Error: No arguments provided.\n " );
77- print_help ();
78- return 1 ;
79- }
80-
81- // Parse arguments
82- if (parse (argc, argv)) {
83- printe (" Error: Failed to parse arguments.\n " );
84- print_help ();
85- return 1 ;
86- }
87-
88- // If help is requested, show help and exit
89- if (help) {
90- print_help ();
91- return 2 ;
92- }
93-
94- ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
95- ctx_params.n_ctx = ctx_params.n_batch ;
96- model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
97- temperature = temperature >= 0 ? temperature : temperature_default;
98-
99- return 0 ; // Success
100- }
101-
102- llama_context_params ctx_params;
103- llama_model_params model_params;
104- std::string model_;
105- std::string user;
106- int context_size = -1 , ngl = -1 ;
107- float temperature = -1 ;
108- bool verbose = false ;
109-
110- private:
111- int context_size_default = -1 , ngl_default = -1 ;
112- float temperature_default = -1 ;
113- bool help = false ;
114-
115- bool parse_flag (const char ** argv, int i, const char * short_opt, const char * long_opt) {
116- return strcmp (argv[i], short_opt) == 0 || strcmp (argv[i], long_opt) == 0 ;
117- }
118-
119- int handle_option_with_value (int argc, const char ** argv, int & i, int & option_value) {
120- if (i + 1 >= argc) {
121- return 1 ;
122- }
123-
124- option_value = std::atoi (argv[++i]);
125-
126- return 0 ;
127- }
128-
129- int handle_option_with_value (int argc, const char ** argv, int & i, float & option_value) {
130- if (i + 1 >= argc) {
131- return 1 ;
132- }
133-
134- option_value = std::atof (argv[++i]);
135-
136- return 0 ;
137- }
138-
139- int parse (int argc, const char ** argv) {
140- bool options_parsing = true ;
141- for (int i = 1 , positional_args_i = 0 ; i < argc; ++i) {
142- if (options_parsing && (strcmp (argv[i], " -c" ) == 0 || strcmp (argv[i], " --context-size" ) == 0 )) {
143- if (handle_option_with_value (argc, argv, i, context_size) == 1 ) {
144- return 1 ;
145- }
146- } else if (options_parsing && (strcmp (argv[i], " -n" ) == 0 || strcmp (argv[i], " --ngl" ) == 0 )) {
147- if (handle_option_with_value (argc, argv, i, ngl) == 1 ) {
148- return 1 ;
149- }
150- } else if (options_parsing && strcmp (argv[i], " --temp" ) == 0 ) {
151- if (handle_option_with_value (argc, argv, i, temperature) == 1 ) {
152- return 1 ;
153- }
154- } else if (options_parsing &&
155- (parse_flag (argv, i, " -v" , " --verbose" ) || parse_flag (argv, i, " -v" , " --log-verbose" ))) {
156- verbose = true ;
157- } else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
158- help = true ;
159- return 0 ;
160- } else if (options_parsing && strcmp (argv[i], " --" ) == 0 ) {
161- options_parsing = false ;
162- } else if (positional_args_i == 0 ) {
163- if (!argv[i][0 ] || argv[i][0 ] == ' -' ) {
164- return 1 ;
165- }
166-
167- ++positional_args_i;
168- model_ = argv[i];
169- } else if (positional_args_i == 1 ) {
170- ++positional_args_i;
171- user = argv[i];
172- } else {
173- user += " " + std::string (argv[i]);
174- }
175- }
176-
177- return 0 ;
178- }
179-
180- void print_help () const {
181- printf (
182- " Description:\n "
183- " Runs a llm\n "
184- " \n "
185- " Usage:\n "
186- " llama-run [options] model [prompt]\n "
187- " \n "
188- " Options:\n "
189- " -c, --context-size <value>\n "
190- " Context size (default: %d)\n "
191- " -n, --ngl <value>\n "
192- " Number of GPU layers (default: %d)\n "
193- " --temp <value>\n "
194- " Temperature (default: %.1f)\n "
195- " -v, --verbose, --log-verbose\n "
196- " Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n "
197- " -h, --help\n "
198- " Show help message\n "
199- " \n "
200- " Commands:\n "
201- " model\n "
202- " Model is a string with an optional prefix of \n "
203- " huggingface:// (hf://), ollama://, https:// or file://.\n "
204- " If no protocol is specified and a file exists in the specified\n "
205- " path, file:// is assumed, otherwise if a file does not exist in\n "
206- " the specified path, ollama:// is assumed. Models that are being\n "
207- " pulled are downloaded with .partial extension while being\n "
208- " downloaded and then renamed as the file without the .partial\n "
209- " extension when complete.\n "
210- " \n "
211- " Examples:\n "
212- " llama-run llama3\n "
213- " llama-run ollama://granite-code\n "
214- " llama-run ollama://smollm:135m\n "
215- " llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n "
216- " llama-run "
217- " huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n "
218- " llama-run https://example.com/some-file1.gguf\n "
219- " llama-run some-file2.gguf\n "
220- " llama-run file://some-file3.gguf\n "
221- " llama-run --ngl 999 some-file4.gguf\n "
222- " llama-run --ngl 999 some-file5.gguf Hello World\n " ,
223- context_size_default, ngl_default, temperature_default);
224- }
225- };
226-
22754struct progress_data {
22855 size_t file_size = 0 ;
22956 std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
0 commit comments