Skip to content

Commit ccf47ee

Browse files
author
zhuyuhua-v
authored
Update arguments for LLM running scripts (#3581)
Signed-off-by: zhuyuhua-v <[email protected]>
1 parent e2f1913 commit ccf47ee

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

examples/gpu/inference/python/llm/run_generation.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@
5252
parser.add_argument(
5353
"--dtype",
5454
type=str,
55-
choices=["float32", "bfloat16", "float16"],
56-
default="bfloat16",
57-
help="float16, bfloat16, float32",
55+
choices=["float16"],
56+
default="float16",
57+
help="ipex.optimize_transformers only supports float16 for now",
5858
)
5959
parser.add_argument(
6060
"--input-tokens",
@@ -319,6 +319,7 @@ def _model_generate(self, context, max_length, eos_token_id):
319319
results = evaluator.evaluate(
320320
hfmodel,
321321
task_dict,
322+
limit=2
322323
)
323324

324325
print(evaluator.make_table(results))
@@ -362,7 +363,7 @@ def run_generate(num_tokens, num_input_tokens, num_beams):
362363
ref_prompt=None
363364
ref_prompt_cuda=None
364365
token_support = [(32, 32), (1024, 128)]
365-
if (int(num_input_tokens), num_tokens) in token_support:
366+
if (int(num_input_tokens), num_tokens) in token_support and args.sub_model_name is not None:
366367
ref_prompt = prompt_json[args.sub_model_name][f"{num_input_tokens}-{num_tokens}"][f"{num_beams}"]
367368
try:
368369
ref_prompt_cuda = prompt_json[args.sub_model_name][f"{num_input_tokens}-{num_tokens}"][f"cuda-result: {num_beams}"]

examples/gpu/inference/python/llm/run_generation_with_deepspeed.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@
6666
default='xpu',
6767
)
6868
parser.add_argument(
69-
"--dtype", type=str, help="float16 or bfloat16 or int8", choices=["int8", "float16", "bfloat16", "float32"], default="float16"
69+
"--dtype", type=str, help="ipex.optimize_transformers only supports float16 for now",
70+
choices=["float16"], default="float16"
7071
)
7172
parser.add_argument("--local_rank", required=False, type=int, help="used by dist launchers")
7273
parser.add_argument("--batch_size", "--batch-size", default=1, type=int, help="batch size")
@@ -485,7 +486,7 @@ def generate():
485486
ref_prompt=None
486487
ref_prompt_cuda=None
487488
token_support = [(32, 32), (1024, 128)]
488-
if (int(num_input_tokens), num_tokens) in token_support:
489+
if (int(num_input_tokens), num_tokens) in token_support and args.sub_model_name is not None:
489490
ref_prompt = prompt_json[args.sub_model_name][f"{num_input_tokens}-{num_tokens}"][f"{num_beams}"]
490491
try:
491492
ref_prompt_cuda = prompt_json[args.sub_model_name][f"{num_input_tokens}-{num_tokens}"][f"cuda-result: {num_beams}"]

0 commit comments

Comments
 (0)