|
16 | 16 | import numpy as np |
17 | 17 |
|
18 | 18 | # Defaults for all llms |
19 | | -DEFAULT_OPTIMIZATION_LEVEL = 0 |
| 19 | +DEFAULT_OPTIMIZATION_LEVEL = 1 |
20 | 20 | DEFAULT_MEMORY_LAYOUT_ANALYSIS = False |
21 | 21 | DEFAULT_TRACE_ENABLED = False |
22 | 22 | DEFAULT_BATCH_SIZE = 32 |
@@ -78,6 +78,8 @@ def test_llm( |
78 | 78 | model_loader = create_model_loader(ModelLoaderModule, num_layers=num_layers, variant=variant) |
79 | 79 | if num_layers is not None and model_loader is None: |
80 | 80 | pytest.fail("num_layers override requested but ModelLoader does not support it.") |
| 81 | + assert optimization_level in [0, 1, 2], "optimization_level must be 0, 1, or 2" |
| 82 | + |
81 | 83 | model_info_name = model_loader.get_model_info(variant=variant).name |
82 | 84 | display_name = resolve_display_name(request=request, fallback=model_info_name) |
83 | 85 |
|
@@ -164,20 +166,34 @@ def test_llm( |
164 | 166 | json.dump(results, file, indent=2) |
165 | 167 |
|
166 | 168 |
|
167 | | -def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, request=None, **kwargs): |
| 169 | +def test_llm_tp( |
| 170 | + ModelLoaderModule, |
| 171 | + variant, |
| 172 | + output_file, |
| 173 | + num_layers=None, |
| 174 | + batch_size=None, |
| 175 | + optimization_level=None, |
| 176 | + request=None, |
| 177 | + **kwargs, |
| 178 | +): |
168 | 179 | # Need to define arch since get_xla_device_arch() doesn't work when spmd is enabled |
169 | 180 | arch = "wormhole_llmbox" |
170 | 181 | mesh_config_fn = ModelLoaderModule.get_mesh_config |
171 | 182 | shard_spec_fn = ModelLoaderModule.load_shard_spec |
| 183 | + if batch_size is None: |
| 184 | + batch_size = DEFAULT_BATCH_SIZE |
| 185 | + if optimization_level is None: |
| 186 | + optimization_level = DEFAULT_OPTIMIZATION_LEVEL |
172 | 187 |
|
173 | 188 | test_llm( |
174 | 189 | ModelLoaderModule=ModelLoaderModule, |
175 | 190 | variant=variant, |
176 | 191 | output_file=output_file, |
177 | 192 | mesh_config_fn=mesh_config_fn, |
178 | 193 | shard_spec_fn=shard_spec_fn, |
179 | | - batch_size=32, |
180 | | - input_sequence_length=128, |
| 194 | + batch_size=batch_size, |
| 195 | + input_sequence_length=DEFAULT_INPUT_SEQUENCE_LENGTH, |
| 196 | + optimization_level=optimization_level, |
181 | 197 | arch=arch, |
182 | 198 | num_layers=num_layers, |
183 | 199 | request=request, |
@@ -606,8 +622,17 @@ def test_llama_3_1_70b_tp(output_file, num_layers, request): |
606 | 622 | ) # https://github.com/tenstorrent/tt-xla/issues/2976 |
607 | 623 |
|
608 | 624 |
|
609 | | -def test_gpt_oss_20b_tp(output_file): |
| 625 | +def test_gpt_oss_20b_tp(output_file, num_layers, request): |
610 | 626 | from third_party.tt_forge_models.gpt_oss.pytorch.loader import ModelLoader, ModelVariant |
611 | 627 |
|
612 | 628 | variant = ModelVariant.GPT_OSS_20B |
613 | | - test_llm_tp(ModelLoader, variant, output_file, required_pcc=0.86) |
| 629 | + test_llm_tp( |
| 630 | + ModelLoader, |
| 631 | + variant, |
| 632 | + output_file, |
| 633 | + num_layers=num_layers, |
| 634 | + batch_size=16, # https://github.com/tenstorrent/tt-xla/issues/3251 |
| 635 | + optimization_level=0, |
| 636 | + request=request, |
| 637 | + required_pcc=0.86, |
| 638 | + ) |
0 commit comments