Skip to content

Commit ce7dfee

Browse files
committed
Refactor llama-run to split out opt struct
Put it in it's own file Signed-off-by: Eric Curtin <[email protected]>
1 parent 8d59d91 commit ce7dfee

File tree

4 files changed

+195
-175
lines changed

4 files changed

+195
-175
lines changed

examples/run/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set(TARGET llama-run)
2-
add_executable(${TARGET} run.cpp)
2+
add_executable(${TARGET} run.cpp opt.cpp)
33
install(TARGETS ${TARGET} RUNTIME)
44
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
55
target_compile_features(${TARGET} PRIVATE cxx_std_17)

examples/run/opt.cpp

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#include "opt.h"
2+
3+
GGML_ATTRIBUTE_FORMAT(1, 2)
4+
5+
int printe(const char * fmt, ...) {
6+
va_list args;
7+
va_start(args, fmt);
8+
const int ret = vfprintf(stderr, fmt, args);
9+
va_end(args);
10+
11+
return ret;
12+
}
13+
14+
int Opt::init(int argc, const char ** argv) {
15+
ctx_params = llama_context_default_params();
16+
model_params = llama_model_default_params();
17+
context_size_default = ctx_params.n_batch;
18+
ngl_default = model_params.n_gpu_layers;
19+
common_params_sampling sampling;
20+
temperature_default = sampling.temp;
21+
22+
if (argc < 2) {
23+
printe("Error: No arguments provided.\n");
24+
print_help();
25+
return 1;
26+
}
27+
28+
// Parse arguments
29+
if (parse(argc, argv)) {
30+
printe("Error: Failed to parse arguments.\n");
31+
print_help();
32+
return 1;
33+
}
34+
35+
// If help is requested, show help and exit
36+
if (help) {
37+
print_help();
38+
return 2;
39+
}
40+
41+
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
42+
ctx_params.n_ctx = ctx_params.n_batch;
43+
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
44+
temperature = temperature >= 0 ? temperature : temperature_default;
45+
46+
return 0; // Success
47+
}
48+
49+
bool Opt::parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt) {
50+
return strcmp(argv[i], short_opt) == 0 || strcmp(argv[i], long_opt) == 0;
51+
}
52+
53+
int Opt::handle_option_with_value(int argc, const char ** argv, int & i, int & option_value) {
54+
if (i + 1 >= argc) {
55+
return 1;
56+
}
57+
58+
option_value = std::atoi(argv[++i]);
59+
60+
return 0;
61+
}
62+
63+
int Opt::handle_option_with_value(int argc, const char ** argv, int & i, float & option_value) {
64+
if (i + 1 >= argc) {
65+
return 1;
66+
}
67+
68+
option_value = std::atof(argv[++i]);
69+
70+
return 0;
71+
}
72+
73+
int Opt::parse(int argc, const char ** argv) {
74+
bool options_parsing = true;
75+
for (int i = 1, positional_args_i = 0; i < argc; ++i) {
76+
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
77+
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
78+
return 1;
79+
}
80+
} else if (options_parsing && (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0)) {
81+
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
82+
return 1;
83+
}
84+
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
85+
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
86+
return 1;
87+
}
88+
} else if (options_parsing &&
89+
(parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
90+
verbose = true;
91+
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
92+
help = true;
93+
return 0;
94+
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
95+
options_parsing = false;
96+
} else if (positional_args_i == 0) {
97+
if (!argv[i][0] || argv[i][0] == '-') {
98+
return 1;
99+
}
100+
101+
++positional_args_i;
102+
model_ = argv[i];
103+
} else if (positional_args_i == 1) {
104+
++positional_args_i;
105+
user = argv[i];
106+
} else {
107+
user += " " + std::string(argv[i]);
108+
}
109+
}
110+
111+
return 0;
112+
}
113+
114+
void Opt::print_help() const {
115+
printf(
116+
"Description:\n"
117+
" Runs a llm\n"
118+
"\n"
119+
"Usage:\n"
120+
" llama-run [options] model [prompt]\n"
121+
"\n"
122+
"Options:\n"
123+
" -c, --context-size <value>\n"
124+
" Context size (default: %d)\n"
125+
" -n, --ngl <value>\n"
126+
" Number of GPU layers (default: %d)\n"
127+
" --temp <value>\n"
128+
" Temperature (default: %.1f)\n"
129+
" -v, --verbose, --log-verbose\n"
130+
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
131+
" -h, --help\n"
132+
" Show help message\n"
133+
"\n"
134+
"Commands:\n"
135+
" model\n"
136+
" Model is a string with an optional prefix of \n"
137+
" huggingface:// (hf://), ollama://, https:// or file://.\n"
138+
" If no protocol is specified and a file exists in the specified\n"
139+
" path, file:// is assumed, otherwise if a file does not exist in\n"
140+
" the specified path, ollama:// is assumed. Models that are being\n"
141+
" pulled are downloaded with .partial extension while being\n"
142+
" downloaded and then renamed as the file without the .partial\n"
143+
" extension when complete.\n"
144+
"\n"
145+
"Examples:\n"
146+
" llama-run llama3\n"
147+
" llama-run ollama://granite-code\n"
148+
" llama-run ollama://smollm:135m\n"
149+
" llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
150+
" llama-run "
151+
"huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
152+
" llama-run https://example.com/some-file1.gguf\n"
153+
" llama-run some-file2.gguf\n"
154+
" llama-run file://some-file3.gguf\n"
155+
" llama-run --ngl 999 some-file4.gguf\n"
156+
" llama-run --ngl 999 some-file5.gguf Hello World\n",
157+
context_size_default, ngl_default, temperature_default);
158+
}

examples/run/opt.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#pragma once
2+
3+
#include <cstdio>
4+
#include <cstdlib>
5+
#include <cstring>
6+
#include <string>
7+
8+
#include "common.h"
9+
#include "llama-cpp.h"
10+
11+
GGML_ATTRIBUTE_FORMAT(1, 2)
12+
int printe(const char * fmt, ...);
13+
14+
struct Opt {
15+
int init(int argc, const char ** argv);
16+
17+
// Public members
18+
llama_context_params ctx_params;
19+
llama_model_params model_params;
20+
std::string model_;
21+
std::string user;
22+
int context_size = -1, ngl = -1;
23+
float temperature = -1;
24+
bool verbose = false;
25+
26+
int context_size_default = -1, ngl_default = -1;
27+
float temperature_default = -1;
28+
bool help = false;
29+
30+
bool parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt);
31+
int handle_option_with_value(int argc, const char ** argv, int & i, int & option_value);
32+
int handle_option_with_value(int argc, const char ** argv, int & i, float & option_value);
33+
int parse(int argc, const char ** argv);
34+
void print_help() const;
35+
};

examples/run/run.cpp

Lines changed: 1 addition & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@
2323
#include <string>
2424
#include <vector>
2525

26-
#include "common.h"
2726
#include "json.hpp"
28-
#include "llama-cpp.h"
27+
#include "opt.h"
2928

3029
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
3130
[[noreturn]] static void sigint_handler(int) {
@@ -52,178 +51,6 @@ static std::string fmt(const char * fmt, ...) {
5251
return buf;
5352
}
5453

55-
GGML_ATTRIBUTE_FORMAT(1, 2)
56-
static int printe(const char * fmt, ...) {
57-
va_list args;
58-
va_start(args, fmt);
59-
const int ret = vfprintf(stderr, fmt, args);
60-
va_end(args);
61-
62-
return ret;
63-
}
64-
65-
class Opt {
66-
public:
67-
int init(int argc, const char ** argv) {
68-
ctx_params = llama_context_default_params();
69-
model_params = llama_model_default_params();
70-
context_size_default = ctx_params.n_batch;
71-
ngl_default = model_params.n_gpu_layers;
72-
common_params_sampling sampling;
73-
temperature_default = sampling.temp;
74-
75-
if (argc < 2) {
76-
printe("Error: No arguments provided.\n");
77-
print_help();
78-
return 1;
79-
}
80-
81-
// Parse arguments
82-
if (parse(argc, argv)) {
83-
printe("Error: Failed to parse arguments.\n");
84-
print_help();
85-
return 1;
86-
}
87-
88-
// If help is requested, show help and exit
89-
if (help) {
90-
print_help();
91-
return 2;
92-
}
93-
94-
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
95-
ctx_params.n_ctx = ctx_params.n_batch;
96-
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
97-
temperature = temperature >= 0 ? temperature : temperature_default;
98-
99-
return 0; // Success
100-
}
101-
102-
llama_context_params ctx_params;
103-
llama_model_params model_params;
104-
std::string model_;
105-
std::string user;
106-
int context_size = -1, ngl = -1;
107-
float temperature = -1;
108-
bool verbose = false;
109-
110-
private:
111-
int context_size_default = -1, ngl_default = -1;
112-
float temperature_default = -1;
113-
bool help = false;
114-
115-
bool parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt) {
116-
return strcmp(argv[i], short_opt) == 0 || strcmp(argv[i], long_opt) == 0;
117-
}
118-
119-
int handle_option_with_value(int argc, const char ** argv, int & i, int & option_value) {
120-
if (i + 1 >= argc) {
121-
return 1;
122-
}
123-
124-
option_value = std::atoi(argv[++i]);
125-
126-
return 0;
127-
}
128-
129-
int handle_option_with_value(int argc, const char ** argv, int & i, float & option_value) {
130-
if (i + 1 >= argc) {
131-
return 1;
132-
}
133-
134-
option_value = std::atof(argv[++i]);
135-
136-
return 0;
137-
}
138-
139-
int parse(int argc, const char ** argv) {
140-
bool options_parsing = true;
141-
for (int i = 1, positional_args_i = 0; i < argc; ++i) {
142-
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
143-
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
144-
return 1;
145-
}
146-
} else if (options_parsing && (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0)) {
147-
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
148-
return 1;
149-
}
150-
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
151-
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
152-
return 1;
153-
}
154-
} else if (options_parsing &&
155-
(parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
156-
verbose = true;
157-
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
158-
help = true;
159-
return 0;
160-
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
161-
options_parsing = false;
162-
} else if (positional_args_i == 0) {
163-
if (!argv[i][0] || argv[i][0] == '-') {
164-
return 1;
165-
}
166-
167-
++positional_args_i;
168-
model_ = argv[i];
169-
} else if (positional_args_i == 1) {
170-
++positional_args_i;
171-
user = argv[i];
172-
} else {
173-
user += " " + std::string(argv[i]);
174-
}
175-
}
176-
177-
return 0;
178-
}
179-
180-
void print_help() const {
181-
printf(
182-
"Description:\n"
183-
" Runs a llm\n"
184-
"\n"
185-
"Usage:\n"
186-
" llama-run [options] model [prompt]\n"
187-
"\n"
188-
"Options:\n"
189-
" -c, --context-size <value>\n"
190-
" Context size (default: %d)\n"
191-
" -n, --ngl <value>\n"
192-
" Number of GPU layers (default: %d)\n"
193-
" --temp <value>\n"
194-
" Temperature (default: %.1f)\n"
195-
" -v, --verbose, --log-verbose\n"
196-
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
197-
" -h, --help\n"
198-
" Show help message\n"
199-
"\n"
200-
"Commands:\n"
201-
" model\n"
202-
" Model is a string with an optional prefix of \n"
203-
" huggingface:// (hf://), ollama://, https:// or file://.\n"
204-
" If no protocol is specified and a file exists in the specified\n"
205-
" path, file:// is assumed, otherwise if a file does not exist in\n"
206-
" the specified path, ollama:// is assumed. Models that are being\n"
207-
" pulled are downloaded with .partial extension while being\n"
208-
" downloaded and then renamed as the file without the .partial\n"
209-
" extension when complete.\n"
210-
"\n"
211-
"Examples:\n"
212-
" llama-run llama3\n"
213-
" llama-run ollama://granite-code\n"
214-
" llama-run ollama://smollm:135m\n"
215-
" llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
216-
" llama-run "
217-
"huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
218-
" llama-run https://example.com/some-file1.gguf\n"
219-
" llama-run some-file2.gguf\n"
220-
" llama-run file://some-file3.gguf\n"
221-
" llama-run --ngl 999 some-file4.gguf\n"
222-
" llama-run --ngl 999 some-file5.gguf Hello World\n",
223-
context_size_default, ngl_default, temperature_default);
224-
}
225-
};
226-
22754
struct progress_data {
22855
size_t file_size = 0;
22956
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();

0 commit comments

Comments
 (0)