@@ -204,6 +204,14 @@ class LlamaRunner:
204
204
def _add_llama_args (self , cmd : List [str ], args : argparse .Namespace ) -> List [str ]:
205
205
"""Add LLaMA-specific arguments to the command."""
206
206
207
+ # For service mode, only pass the model path and max-tokens
208
+ if hasattr (args , 'service' ) and args .service :
209
+ llama_args = [
210
+ "--model" , args .model_path ,
211
+ "--max-tokens" , str (args .max_tokens ),
212
+ ]
213
+ return cmd + llama_args
214
+
207
215
llama_args = [
208
216
"--model" , args .model_path ,
209
217
"--temperature" , str (args .temperature ),
@@ -238,14 +246,19 @@ class LlamaRunner:
238
246
239
247
# Show service-specific information
240
248
if args .service :
241
- print ("Starting TornadoVM LLM REST API Service..." )
249
+ print ("Starting GPULlama3.java REST API Service..." )
242
250
print (f"Model: { args .model_path } " )
243
- print ("API endpoints will be available at:" )
244
- print (" - http://localhost:8080/v1/completions" )
245
- print (" - http://localhost:8080/v1/completions/stream" )
246
- print (" - http://localhost:8080/v1/models" )
247
- print (" - http://localhost:8080/v1/health" )
248
- print ("\n Press Ctrl+C to stop the service" )
251
+ print ("API endpoints available at:" )
252
+ print (" - http://localhost:8080/chat" )
253
+ print (" - http://localhost:8080/chat/stream" )
254
+ print (" - http://localhost:8080/health" )
255
+ print ("" )
256
+ print ("Example usage:" )
257
+ print (' curl -X POST http://localhost:8080/chat \\ ' )
258
+ print (' -H "Content-Type: application/json" \\ ' )
259
+ print (' -d \' {"message": "Hello!"}\' ' )
260
+ print ("" )
261
+ print ("Press Ctrl+C to stop the service" )
249
262
print ("-" * 60 )
250
263
251
264
0 commit comments