Skip to content

Commit cdb4a5f

Browse files
committed
Add batch_size parameterization and lower to 16, restore default optimization level and prompt.
1 parent 8e47a24 commit cdb4a5f

File tree

2 files changed

+32
-7
lines changed

2 files changed

+32
-7
lines changed

benchmark/tt-xla/llm_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
MIN_STEPS = 16
4141

4242
# Default input prompt
43-
DEFAULT_INPUT_PROMPT = "Explain quantum mechanics."
43+
DEFAULT_INPUT_PROMPT = "Here is an exaustive list of the best practices for writing clean code:"
4444

4545
MODULE_EXPORT_PATH = "modules"
4646

benchmark/tt-xla/test_llms.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy as np
1717

1818
# Defaults for all llms
19-
DEFAULT_OPTIMIZATION_LEVEL = 0
19+
DEFAULT_OPTIMIZATION_LEVEL = 1
2020
DEFAULT_MEMORY_LAYOUT_ANALYSIS = False
2121
DEFAULT_TRACE_ENABLED = False
2222
DEFAULT_BATCH_SIZE = 32
@@ -78,6 +78,8 @@ def test_llm(
7878
model_loader = create_model_loader(ModelLoaderModule, num_layers=num_layers, variant=variant)
7979
if num_layers is not None and model_loader is None:
8080
pytest.fail("num_layers override requested but ModelLoader does not support it.")
81+
assert optimization_level in [0, 1, 2], "optimization_level must be 0, 1, or 2"
82+
8183
model_info_name = model_loader.get_model_info(variant=variant).name
8284
display_name = resolve_display_name(request=request, fallback=model_info_name)
8385

@@ -164,20 +166,34 @@ def test_llm(
164166
json.dump(results, file, indent=2)
165167

166168

167-
def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, request=None, **kwargs):
169+
def test_llm_tp(
170+
ModelLoaderModule,
171+
variant,
172+
output_file,
173+
num_layers=None,
174+
batch_size=None,
175+
optimization_level=None,
176+
request=None,
177+
**kwargs,
178+
):
168179
# Need to define arch since get_xla_device_arch() doesn't work when spmd is enabled
169180
arch = "wormhole_llmbox"
170181
mesh_config_fn = ModelLoaderModule.get_mesh_config
171182
shard_spec_fn = ModelLoaderModule.load_shard_spec
183+
if batch_size is None:
184+
batch_size = DEFAULT_BATCH_SIZE
185+
if optimization_level is None:
186+
optimization_level = DEFAULT_OPTIMIZATION_LEVEL
172187

173188
test_llm(
174189
ModelLoaderModule=ModelLoaderModule,
175190
variant=variant,
176191
output_file=output_file,
177192
mesh_config_fn=mesh_config_fn,
178193
shard_spec_fn=shard_spec_fn,
179-
batch_size=32,
180-
input_sequence_length=128,
194+
batch_size=batch_size,
195+
input_sequence_length=DEFAULT_INPUT_SEQUENCE_LENGTH,
196+
optimization_level=optimization_level,
181197
arch=arch,
182198
num_layers=num_layers,
183199
request=request,
@@ -606,8 +622,17 @@ def test_llama_3_1_70b_tp(output_file, num_layers, request):
606622
) # https://github.com/tenstorrent/tt-xla/issues/2976
607623

608624

609-
def test_gpt_oss_20b_tp(output_file):
625+
def test_gpt_oss_20b_tp(output_file, num_layers, request):
610626
from third_party.tt_forge_models.gpt_oss.pytorch.loader import ModelLoader, ModelVariant
611627

612628
variant = ModelVariant.GPT_OSS_20B
613-
test_llm_tp(ModelLoader, variant, output_file, required_pcc=0.86)
629+
test_llm_tp(
630+
ModelLoader,
631+
variant,
632+
output_file,
633+
num_layers=num_layers,
634+
batch_size=16, # https://github.com/tenstorrent/tt-xla/issues/3251
635+
optimization_level=0,
636+
request=request,
637+
required_pcc=0.86,
638+
)

0 commit comments

Comments
 (0)