Skip to content

Commit 9a1a475

Browse files
Update llama-tornado python script for service
1 parent a4e103d commit 9a1a475

File tree

1 file changed

+20
-7
lines changed

1 file changed

+20
-7
lines changed

llama-tornado

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,14 @@ class LlamaRunner:
204204
def _add_llama_args(self, cmd: List[str], args: argparse.Namespace) -> List[str]:
205205
"""Add LLaMA-specific arguments to the command."""
206206

207+
# For service mode, only pass the model path and max-tokens
208+
if hasattr(args, 'service') and args.service:
209+
llama_args = [
210+
"--model", args.model_path,
211+
"--max-tokens", str(args.max_tokens),
212+
]
213+
return cmd + llama_args
214+
207215
llama_args = [
208216
"--model", args.model_path,
209217
"--temperature", str(args.temperature),
@@ -238,14 +246,19 @@ class LlamaRunner:
238246

239247
# Show service-specific information
240248
if args.service:
241-
print("Starting TornadoVM LLM REST API Service...")
249+
print("Starting GPULlama3.java REST API Service...")
242250
print(f"Model: {args.model_path}")
243-
print("API endpoints will be available at:")
244-
print(" - http://localhost:8080/v1/completions")
245-
print(" - http://localhost:8080/v1/completions/stream")
246-
print(" - http://localhost:8080/v1/models")
247-
print(" - http://localhost:8080/v1/health")
248-
print("\nPress Ctrl+C to stop the service")
251+
print("API endpoints available at:")
252+
print(" - http://localhost:8080/chat")
253+
print(" - http://localhost:8080/chat/stream")
254+
print(" - http://localhost:8080/health")
255+
print("")
256+
print("Example usage:")
257+
print(' curl -X POST http://localhost:8080/chat \\')
258+
print(' -H "Content-Type: application/json" \\')
259+
print(' -d \'{"message": "Hello!"}\'')
260+
print("")
261+
print("Press Ctrl+C to stop the service")
249262
print("-" * 60)
250263

251264

0 commit comments

Comments
 (0)