Skip to content

Commit 12d0c86

Browse files
committed
Add batch_size parameterization and lower to 16, restore default optimization level and prompt.
1 parent ea25095 commit 12d0c86

File tree

1 file changed

+16
-7
lines changed

1 file changed

+16
-7
lines changed

benchmark/tt-xla/test_llms.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,16 @@ def test_llm(
166166
json.dump(results, file, indent=2)
167167

168168

169-
def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, batch_size=None, optimization_level=None, request=None, **kwargs):
169+
def test_llm_tp(
170+
ModelLoaderModule,
171+
variant,
172+
output_file,
173+
num_layers=None,
174+
batch_size=None,
175+
optimization_level=None,
176+
request=None,
177+
**kwargs,
178+
):
170179
# Need to define arch since get_xla_device_arch() doesn't work when spmd is enabled
171180
arch = "wormhole_llmbox"
172181
mesh_config_fn = ModelLoaderModule.get_mesh_config
@@ -175,7 +184,7 @@ def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, batch_
175184
batch_size = DEFAULT_BATCH_SIZE
176185
if optimization_level is None:
177186
optimization_level = DEFAULT_OPTIMIZATION_LEVEL
178-
187+
179188
test_llm(
180189
ModelLoaderModule=ModelLoaderModule,
181190
variant=variant,
@@ -619,12 +628,12 @@ def test_gpt_oss_20b_tp(output_file, num_layers, request):
619628

620629
variant = ModelVariant.GPT_OSS_20B
621630
test_llm_tp(
622-
ModelLoader,
623-
variant,
624-
output_file,
631+
ModelLoader,
632+
variant,
633+
output_file,
625634
num_layers=num_layers,
626-
batch_size=16, # https://github.com/tenstorrent/tt-xla/issues/3251
635+
batch_size=16, # https://github.com/tenstorrent/tt-xla/issues/3251
627636
optimization_level=0,
628637
request=request,
629-
required_pcc=0.86
638+
required_pcc=0.86,
630639
)

0 commit comments

Comments
 (0)