1
1
#!/usr/bin/env python3
2
2
"""
3
- llama-tornado: GPU-accelerated LLaMA.java runner with TornadoVM
4
- Run LLaMA models using either OpenCL or PTX backends.
3
+ llama-tornado: GPU-accelerated Java LLM runner with TornadoVM
4
+ Run LLM models using either OpenCL or PTX backends.
5
5
"""
6
6
7
7
import argparse
@@ -19,7 +19,7 @@ class Backend(Enum):
19
19
PTX = "ptx"
20
20
21
21
class LlamaRunner :
22
- """Main class for managing LLaMA model execution with GPU acceleration."""
22
+ """Main class for managing LLM execution with GPU acceleration."""
23
23
24
24
def __init__ (self ):
25
25
self .java_home = os .environ .get ('JAVA_HOME' )
@@ -266,29 +266,29 @@ def create_parser() -> argparse.ArgumentParser:
266
266
"""Create and configure the argument parser."""
267
267
parser = argparse .ArgumentParser (
268
268
prog = "llama-tornado" ,
269
- description = "GPU-accelerated LLaMA.java model runner using TornadoVM" ,
269
+ description = "GPU-accelerated LLM runner using TornadoVM" ,
270
270
formatter_class = argparse .ArgumentDefaultsHelpFormatter
271
271
)
272
272
273
273
# Required arguments
274
274
parser .add_argument ("--model" , dest = "model_path" , required = True ,
275
- help = "Path to the LLaMA model file (e.g., Llama-3.2-1B-Instruct-Q8_0.gguf)" )
275
+ help = "Path to the LLM gguf file (e.g., Llama-3.2-1B-Instruct-Q8_0.gguf)" )
276
276
277
- # LLaMA arguments
278
- llama_group = parser .add_argument_group ("LLaMA Configuration" )
279
- llama_group .add_argument ("--prompt" , help = "Input prompt for the model" )
280
- llama_group .add_argument ("-sp" , "--system-prompt" , help = "System prompt for the model" )
281
- llama_group .add_argument ("--temperature" , type = float , default = 0.1 ,
277
+ # LLM arguments
278
+ llm_group = parser .add_argument_group ("LLaMA Configuration" )
279
+ llm_group .add_argument ("--prompt" , help = "Input prompt for the model" )
280
+ llm_group .add_argument ("-sp" , "--system-prompt" , help = "System prompt for the model" )
281
+ llm_group .add_argument ("--temperature" , type = float , default = 0.1 ,
282
282
help = "Sampling temperature (0.0 to 2.0)" )
283
- llama_group .add_argument ("--top-p" , type = float , default = 0.95 ,
283
+ llm_group .add_argument ("--top-p" , type = float , default = 0.95 ,
284
284
help = "Top-p sampling parameter" )
285
- llama_group .add_argument ("--seed" , type = int , default = None ,
285
+ llm_group .add_argument ("--seed" , type = int , default = None ,
286
286
help = "Random seed (default: current timestamp)" )
287
- llama_group .add_argument ("-n" , "--max-tokens" , type = int , default = 512 ,
287
+ llm_group .add_argument ("-n" , "--max-tokens" , type = int , default = 512 ,
288
288
help = "Maximum number of tokens to generate" )
289
- llama_group .add_argument ("--stream" , type = bool , default = True ,
289
+ llm_group .add_argument ("--stream" , type = bool , default = True ,
290
290
help = "Enable streaming output" )
291
- llama_group .add_argument ("--echo" , type = bool , default = False ,
291
+ llm_group .add_argument ("--echo" , type = bool , default = False ,
292
292
help = "Echo the input prompt" )
293
293
llm_group .add_argument ("--suffix" , help = "Suffix for fill-in-the-middle request (Codestral)" )
294
294
0 commit comments