Skip to content

Commit ea25095

Browse files
committed
Add batch_size parameterization and lower to 16, restore default optimization level and prompt.
1 parent 95baad3 commit ea25095

File tree

2 files changed

+25
-8
lines changed

2 files changed

+25
-8
lines changed

benchmark/tt-xla/llm_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
MIN_STEPS = 16
4141

4242
# Default input prompt
43-
DEFAULT_INPUT_PROMPT = "Explain quantum mechanics."
43+
DEFAULT_INPUT_PROMPT = "Here is an exaustive list of the best practices for writing clean code:"
4444

4545
MODULE_EXPORT_PATH = "modules"
4646

benchmark/tt-xla/test_llms.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy as np
1717

1818
# Defaults for all llms
19-
DEFAULT_OPTIMIZATION_LEVEL = 0
19+
DEFAULT_OPTIMIZATION_LEVEL = 1
2020
DEFAULT_MEMORY_LAYOUT_ANALYSIS = False
2121
DEFAULT_TRACE_ENABLED = False
2222
DEFAULT_BATCH_SIZE = 32
@@ -78,6 +78,8 @@ def test_llm(
7878
model_loader = create_model_loader(ModelLoaderModule, num_layers=num_layers, variant=variant)
7979
if num_layers is not None and model_loader is None:
8080
pytest.fail("num_layers override requested but ModelLoader does not support it.")
81+
assert optimization_level in [0, 1, 2], "optimization_level must be 0, 1, or 2"
82+
8183
model_info_name = model_loader.get_model_info(variant=variant).name
8284
display_name = resolve_display_name(request=request, fallback=model_info_name)
8385

@@ -164,20 +166,25 @@ def test_llm(
164166
json.dump(results, file, indent=2)
165167

166168

167-
def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, request=None, **kwargs):
169+
def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, batch_size=None, optimization_level=None, request=None, **kwargs):
168170
# Need to define arch since get_xla_device_arch() doesn't work when spmd is enabled
169171
arch = "wormhole_llmbox"
170172
mesh_config_fn = ModelLoaderModule.get_mesh_config
171173
shard_spec_fn = ModelLoaderModule.load_shard_spec
172-
174+
if batch_size is None:
175+
batch_size = DEFAULT_BATCH_SIZE
176+
if optimization_level is None:
177+
optimization_level = DEFAULT_OPTIMIZATION_LEVEL
178+
173179
test_llm(
174180
ModelLoaderModule=ModelLoaderModule,
175181
variant=variant,
176182
output_file=output_file,
177183
mesh_config_fn=mesh_config_fn,
178184
shard_spec_fn=shard_spec_fn,
179-
batch_size=32,
180-
input_sequence_length=128,
185+
batch_size=batch_size,
186+
input_sequence_length=DEFAULT_INPUT_SEQUENCE_LENGTH,
187+
optimization_level=optimization_level,
181188
arch=arch,
182189
num_layers=num_layers,
183190
request=request,
@@ -606,8 +613,18 @@ def test_llama_3_1_70b_tp(output_file, num_layers, request):
606613
) # https://github.com/tenstorrent/tt-xla/issues/2976
607614

608615

609-
def test_gpt_oss_20b_tp(output_file):
616+
def test_gpt_oss_20b_tp(output_file, num_layers, request):
617+
num_layers = 1
610618
from third_party.tt_forge_models.gpt_oss.pytorch.loader import ModelLoader, ModelVariant
611619

612620
variant = ModelVariant.GPT_OSS_20B
613-
test_llm_tp(ModelLoader, variant, output_file, required_pcc=0.86)
621+
test_llm_tp(
622+
ModelLoader,
623+
variant,
624+
output_file,
625+
num_layers=num_layers,
626+
batch_size=16, # https://github.com/tenstorrent/tt-xla/issues/3251
627+
optimization_level=0,
628+
request=request,
629+
required_pcc=0.86
630+
)

0 commit comments

Comments
 (0)