@@ -9,9 +9,9 @@ def parse_args_into_params(argv) -> llamacpp.gpt_params:
9
9
parser = argparse .ArgumentParser (description = "llama.cpp CLI" )
10
10
parser .add_argument ("-i" , "--interactive" , action = "store_true" , help = "run in interactive mode" )
11
11
parser .add_argument (
12
- "--interactive-start " ,
12
+ "-ins" , "--instruct " ,
13
13
action = "store_true" ,
14
- help = "run in interactive mode and poll user input at startup " ,
14
+ help = "run in 'instruct mode' where the user is prompted to enter a command " ,
15
15
default = False ,
16
16
)
17
17
parser .add_argument (
@@ -39,11 +39,10 @@ def parse_args_into_params(argv) -> llamacpp.gpt_params:
39
39
"--prompt" ,
40
40
type = str ,
41
41
help = "prompt to start generation with (default: random)" ,
42
- required = True ,
43
42
)
44
- # parser.add_argument(
45
- # "-f", "--file", type=str, default="", help="prompt file to start generation."
46
- # )
43
+ parser .add_argument (
44
+ "-f" , "--file" , type = str , default = "" , help = "prompt file to start generation."
45
+ )
47
46
parser .add_argument (
48
47
"-n" , "--n_predict" , type = int , default = 128 , help = "number of tokens to predict (default: 128)"
49
48
)
@@ -81,29 +80,7 @@ def parse_args_into_params(argv) -> llamacpp.gpt_params:
81
80
82
81
args = parser .parse_args (argv [1 :])
83
82
84
- # Add a space in front of the first character to match OG llama tokenizer behavior
85
- args .prompt = " " + args .prompt
86
-
87
- # Initialize gpt_params object
88
- params = llamacpp .gpt_params (
89
- args .model ,
90
- args .prompt ,
91
- args .reverse_prompt ,
92
- args .ctx_size ,
93
- args .n_predict ,
94
- args .top_k ,
95
- args .top_p ,
96
- args .temp ,
97
- args .repeat_penalty ,
98
- args .seed ,
99
- args .threads ,
100
- args .repeat_last_n ,
101
- args .batch_size ,
102
- args .color ,
103
- args .interactive ,
104
- )
105
-
106
- return params
83
+ return args
107
84
108
85
109
86
def process_interactive_input (model : llamacpp .PyLLAMA ):
@@ -121,24 +98,57 @@ def process_interactive_input(model: llamacpp.PyLLAMA):
121
98
break
122
99
123
100
124
- def main (params ):
101
+ def main (args ):
125
102
"""Main function"""
103
+
104
+ # if args.file is specified, read the file and set the prompt to the contents
105
+ if args .file :
106
+ with open (args .file , "r" ) as f :
107
+ args .prompt = f .read ().strip ()
108
+
109
+ # Add a space in front of the first character to match OG llama tokenizer behavior
110
+ args .prompt = " " + args .prompt
111
+
112
+ # Initialize the gpt_params object
113
+ params = llamacpp .gpt_params (
114
+ args .model ,
115
+ args .ctx_size ,
116
+ args .n_predict ,
117
+ args .top_k ,
118
+ args .top_p ,
119
+ args .temp ,
120
+ args .repeat_penalty ,
121
+ args .seed ,
122
+ args .threads ,
123
+ args .repeat_last_n ,
124
+ args .batch_size ,
125
+ )
126
+
126
127
model = llamacpp .PyLLAMA (params )
127
128
model .add_bos ()
128
- model .update_input (params .prompt )
129
+ model .update_input (args .prompt )
129
130
model .print_startup_stats ()
130
131
model .prepare_context ()
131
132
133
+ inp_pfx = model .tokenize ("\n \n ### Instruction:\n \n " , True )
134
+ inp_sfx = model .tokenize ("\n \n ### Response:\n \n " , False )
135
+
136
+ if args .instruct :
137
+ args .interactive = True
138
+ args .antiprompt = "### Instruction:\n \n "
139
+
132
140
# Set antiprompt if we are in interactive mode
133
- if params .interactive :
134
- model .set_antiprompt (params .antiprompt )
141
+ if args .antiprompt :
142
+ args .interactive = True
143
+ model .set_antiprompt (args .antiprompt )
135
144
136
- if params .interactive :
145
+ if args .interactive :
137
146
print ("== Running in interactive mode. ==" )
138
147
print (" - Press Ctrl+C to interject at any time." )
139
148
print (" - Press Return to return control to LLaMa." )
140
149
print (" - If you want to submit another line, end your input in '\\ '." )
141
150
print ()
151
+ is_interacting = True
142
152
143
153
input_noecho = False
144
154
is_finished = False
@@ -147,33 +157,50 @@ def main(params):
147
157
if model .has_unconsumed_input ():
148
158
model .ingest_all_pending_input (not input_noecho )
149
159
# # reset color to default if we there is no pending user input
150
- # if (!input_noecho && params .use_color) {
160
+ # if (!input_noecho && args .use_color) {
151
161
# printf(ANSI_COLOR_RESET);
152
162
# }
153
163
else :
154
164
text , is_finished = model .infer_text ()
155
165
print (text , end = "" )
156
166
input_noecho = False
157
167
158
- if params .interactive :
168
+ if args .interactive :
159
169
if model .is_antiprompt_present ():
160
170
# reverse prompt found
161
171
is_interacting = True
162
172
if is_interacting :
173
+ if args .instruct :
174
+ model .update_input_tokens (inp_pfx )
175
+ print ("\n > " , end = "" )
176
+
163
177
process_interactive_input (model )
178
+
179
+ if args .instruct :
180
+ model .update_input_tokens (inp_sfx )
181
+
164
182
input_noecho = True
165
183
is_interacting = False
166
-
184
+
185
+ # end of text token was found
167
186
if is_finished :
168
- break
187
+ if args .interactive :
188
+ is_interacting = True
189
+ else :
190
+ print (" [end of text]" )
191
+ break
192
+
193
+ if args .interactive and model .is_finished ():
194
+ model .reset_remaining_tokens ()
195
+ is_interacting = True
169
196
170
197
return 0
171
198
172
199
173
200
def run ():
174
201
# Parse params into a gpt_params object
175
- params = parse_args_into_params (sys .argv )
176
- return main (params )
202
+ args = parse_args_into_params (sys .argv )
203
+ return main (args )
177
204
178
205
if __name__ == "__main__" :
179
206
sys .exit (run ())
0 commit comments