Skip to content

Commit 4195fb3

Browse files
Feat: add command-line arguments for backend parameters (#86)
1 parent 1dcd7d0 commit 4195fb3

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

gpt_oss/generate.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ def main(args):
1919
from gpt_oss.torch.utils import init_distributed
2020
from gpt_oss.triton.model import TokenGenerator as TritonGenerator
2121
device = init_distributed()
22-
generator = TritonGenerator(args.checkpoint, context=4096, device=device)
22+
generator = TritonGenerator(args.checkpoint, context=args.context_length, device=device)
2323
case "vllm":
2424
from gpt_oss.vllm.token_generator import TokenGenerator as VLLMGenerator
25-
generator = VLLMGenerator(args.checkpoint, tensor_parallel_size=2)
25+
generator = VLLMGenerator(args.checkpoint, tensor_parallel_size=args.tensor_parallel_size)
2626
case _:
2727
raise ValueError(f"Invalid backend: {args.backend}")
2828

@@ -31,9 +31,9 @@ def main(args):
3131
max_tokens = None if args.limit == 0 else args.limit
3232
for token, logprob in generator.generate(tokens, stop_tokens=[tokenizer.eot_token], temperature=args.temperature, max_tokens=max_tokens, return_logprobs=True):
3333
tokens.append(token)
34-
decoded_token = tokenizer.decode([token])
34+
token_text = tokenizer.decode([token])
3535
print(
36-
f"Generated token: {repr(decoded_token)}, logprob: {logprob}"
36+
f"Generated token: {repr(token_text)}, logprob: {logprob}"
3737
)
3838

3939

@@ -78,6 +78,18 @@ def main(args):
7878
choices=["triton", "torch", "vllm"],
7979
help="Inference backend",
8080
)
81+
parser.add_argument(
82+
"--tensor-parallel-size",
83+
type=int,
84+
default=2,
85+
help="Tensor parallel size for vLLM backend",
86+
)
87+
parser.add_argument(
88+
"--context-length",
89+
type=int,
90+
default=4096,
91+
help="Context length for Triton backend",
92+
)
8193
args = parser.parse_args()
8294

8395
main(args)

0 commit comments

Comments
 (0)