@@ -185,6 +185,14 @@ class LlamaRunner:
185
185
def _add_llama_args (self , cmd : List [str ], args : argparse .Namespace ) -> List [str ]:
186
186
"""Add LLaMA-specific arguments to the command."""
187
187
188
+ # For service mode, only pass the model path and max-tokens
189
+ if hasattr (args , 'service' ) and args .service :
190
+ llama_args = [
191
+ "--model" , args .model_path ,
192
+ "--max-tokens" , str (args .max_tokens ),
193
+ ]
194
+ return cmd + llama_args
195
+
188
196
llama_args = [
189
197
"--model" , args .model_path ,
190
198
"--temperature" , str (args .temperature ),
@@ -219,14 +227,19 @@ class LlamaRunner:
219
227
220
228
# Show service-specific information
221
229
if args .service :
222
- print ("Starting TornadoVM LLM REST API Service..." )
230
+ print ("Starting GPULlama3.java REST API Service..." )
223
231
print (f"Model: { args .model_path } " )
224
- print ("API endpoints will be available at:" )
225
- print (" - http://localhost:8080/v1/completions" )
226
- print (" - http://localhost:8080/v1/completions/stream" )
227
- print (" - http://localhost:8080/v1/models" )
228
- print (" - http://localhost:8080/v1/health" )
229
- print ("\n Press Ctrl+C to stop the service" )
232
+ print ("API endpoints available at:" )
233
+ print (" - http://localhost:8080/chat" )
234
+ print (" - http://localhost:8080/chat/stream" )
235
+ print (" - http://localhost:8080/health" )
236
+ print ("" )
237
+ print ("Example usage:" )
238
+ print (' curl -X POST http://localhost:8080/chat \\ ' )
239
+ print (' -H "Content-Type: application/json" \\ ' )
240
+ print (' -d \' {"message": "Hello!"}\' ' )
241
+ print ("" )
242
+ print ("Press Ctrl+C to stop the service" )
230
243
print ("-" * 60 )
231
244
232
245
0 commit comments