Skip to content

Commit ce65bfd

Browse files
Update llama-tornado python script for service
1 parent 55eb896 commit ce65bfd

File tree

1 file changed

+20
-7
lines changed

1 file changed

+20
-7
lines changed

llama-tornado

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,14 @@ class LlamaRunner:
185185
def _add_llama_args(self, cmd: List[str], args: argparse.Namespace) -> List[str]:
186186
"""Add LLaMA-specific arguments to the command."""
187187

188+
# For service mode, only pass the model path and max-tokens
189+
if hasattr(args, 'service') and args.service:
190+
llama_args = [
191+
"--model", args.model_path,
192+
"--max-tokens", str(args.max_tokens),
193+
]
194+
return cmd + llama_args
195+
188196
llama_args = [
189197
"--model", args.model_path,
190198
"--temperature", str(args.temperature),
@@ -219,14 +227,19 @@ class LlamaRunner:
219227

220228
# Show service-specific information
221229
if args.service:
222-
print("Starting TornadoVM LLM REST API Service...")
230+
print("Starting GPULlama3.java REST API Service...")
223231
print(f"Model: {args.model_path}")
224-
print("API endpoints will be available at:")
225-
print(" - http://localhost:8080/v1/completions")
226-
print(" - http://localhost:8080/v1/completions/stream")
227-
print(" - http://localhost:8080/v1/models")
228-
print(" - http://localhost:8080/v1/health")
229-
print("\nPress Ctrl+C to stop the service")
232+
print("API endpoints available at:")
233+
print(" - http://localhost:8080/chat")
234+
print(" - http://localhost:8080/chat/stream")
235+
print(" - http://localhost:8080/health")
236+
print("")
237+
print("Example usage:")
238+
print(' curl -X POST http://localhost:8080/chat \\')
239+
print(' -H "Content-Type: application/json" \\')
240+
print(' -d \'{"message": "Hello!"}\'')
241+
print("")
242+
print("Press Ctrl+C to stop the service")
230243
print("-" * 60)
231244

232245

0 commit comments

Comments
 (0)