|
16 | 16 | import numpy as np |
17 | 17 |
|
18 | 18 | # Defaults for all llms |
19 | | -DEFAULT_OPTIMIZATION_LEVEL = 0 |
| 19 | +DEFAULT_OPTIMIZATION_LEVEL = 1 |
20 | 20 | DEFAULT_MEMORY_LAYOUT_ANALYSIS = False |
21 | 21 | DEFAULT_TRACE_ENABLED = False |
22 | 22 | DEFAULT_BATCH_SIZE = 32 |
@@ -78,6 +78,8 @@ def test_llm( |
78 | 78 | model_loader = create_model_loader(ModelLoaderModule, num_layers=num_layers, variant=variant) |
79 | 79 | if num_layers is not None and model_loader is None: |
80 | 80 | pytest.fail("num_layers override requested but ModelLoader does not support it.") |
| 81 | + assert optimization_level in [0, 1, 2], "optimization_level must be 0, 1, or 2" |
| 82 | + |
81 | 83 | model_info_name = model_loader.get_model_info(variant=variant).name |
82 | 84 | display_name = resolve_display_name(request=request, fallback=model_info_name) |
83 | 85 |
|
@@ -164,20 +166,25 @@ def test_llm( |
164 | 166 | json.dump(results, file, indent=2) |
165 | 167 |
|
166 | 168 |
|
167 | | -def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, request=None, **kwargs): |
| 169 | +def test_llm_tp(ModelLoaderModule, variant, output_file, num_layers=None, batch_size=None, optimization_level=None, request=None, **kwargs): |
168 | 170 | # Need to define arch since get_xla_device_arch() doesn't work when spmd is enabled |
169 | 171 | arch = "wormhole_llmbox" |
170 | 172 | mesh_config_fn = ModelLoaderModule.get_mesh_config |
171 | 173 | shard_spec_fn = ModelLoaderModule.load_shard_spec |
172 | | - |
| 174 | + if batch_size is None: |
| 175 | + batch_size = DEFAULT_BATCH_SIZE |
| 176 | + if optimization_level is None: |
| 177 | + optimization_level = DEFAULT_OPTIMIZATION_LEVEL |
| 178 | + |
173 | 179 | test_llm( |
174 | 180 | ModelLoaderModule=ModelLoaderModule, |
175 | 181 | variant=variant, |
176 | 182 | output_file=output_file, |
177 | 183 | mesh_config_fn=mesh_config_fn, |
178 | 184 | shard_spec_fn=shard_spec_fn, |
179 | | - batch_size=32, |
180 | | - input_sequence_length=128, |
| 185 | + batch_size=batch_size, |
| 186 | + input_sequence_length=DEFAULT_INPUT_SEQUENCE_LENGTH, |
| 187 | + optimization_level=optimization_level, |
181 | 188 | arch=arch, |
182 | 189 | num_layers=num_layers, |
183 | 190 | request=request, |
@@ -606,8 +613,18 @@ def test_llama_3_1_70b_tp(output_file, num_layers, request): |
606 | 613 | ) # https://github.com/tenstorrent/tt-xla/issues/2976 |
607 | 614 |
|
608 | 615 |
|
609 | | -def test_gpt_oss_20b_tp(output_file): |
| 616 | +def test_gpt_oss_20b_tp(output_file, num_layers, request): |
| 617 | + num_layers = 1 |
610 | 618 | from third_party.tt_forge_models.gpt_oss.pytorch.loader import ModelLoader, ModelVariant |
611 | 619 |
|
612 | 620 | variant = ModelVariant.GPT_OSS_20B |
613 | | - test_llm_tp(ModelLoader, variant, output_file, required_pcc=0.86) |
| 621 | + test_llm_tp( |
| 622 | + ModelLoader, |
| 623 | + variant, |
| 624 | + output_file, |
| 625 | + num_layers=num_layers, |
| 626 | + batch_size=16, # https://github.com/tenstorrent/tt-xla/issues/3251 |
| 627 | + optimization_level=0, |
| 628 | + request=request, |
| 629 | + required_pcc=0.86 |
| 630 | + ) |
0 commit comments