|
18 | 18 | class PyTorchMegatronLM_AMD(rfm.RunOnlyRegressionTest): |
19 | 19 | num_tasks_per_node = 1 |
20 | 20 | default_num_nodes = variable(int, type(None), value=None) |
21 | | - time_limit = '30m' |
| 21 | + time_limit = '50m' |
22 | 22 | megatron_repo = variable( |
23 | 23 | str, value='https://github.com/ROCm/Megatron-LM' |
24 | 24 | ) |
@@ -379,37 +379,15 @@ def throughput_per_gpu(self): |
379 | 379 | )) |
380 | 380 |
|
381 | 381 |
|
382 | | -class pytorch_image_import(rfm.RunOnlyRegressionTest): |
383 | | - sourcesdir = None |
384 | | - image = variable( |
385 | | - str, |
386 | | - value=('docker://rocm/megatron-lm:v25.6_py312') |
387 | | - ) |
388 | | - archive_name = 'pytorch.sqsh' |
389 | | - executable = 'enroot' |
390 | | - valid_systems = ['+ce'] |
391 | | - valid_prog_environs = ['builtin'] |
392 | | - |
393 | | - @run_before('run') |
394 | | - def set_executable_opts(self): |
395 | | - self.executable_opts = ['import', '-o', self.archive_name, self.image] |
396 | | - |
397 | | - @sanity_function |
398 | | - def assert_image_imported(self): |
399 | | - return sn.path_exists(os.path.join(self.stagedir, self.archive_name)) |
400 | | - |
401 | | - |
402 | 382 | @rfm.simple_test |
403 | 383 | class PyTorchMegatronLM_AMD_CE(PyTorchMegatronLM_AMD, ContainerEngineMixin): |
404 | 384 | valid_systems = ['+amdgpu +ce'] |
405 | 385 | valid_prog_environs = ['builtin'] |
406 | 386 | maintainers = ['ml-team'] |
407 | | - pytorch_image = fixture(pytorch_image_import, scope='session') |
| 387 | + container_image = 'rocm/megatron-lm:v25.6_py312' |
408 | 388 |
|
409 | 389 | @run_after('setup') |
410 | 390 | def set_container_config(self): |
411 | | - self.container_image = os.path.join(self.pytorch_image.stagedir, |
412 | | - self.pytorch_image.archive_name) |
413 | 391 | self.container_env_table = { |
414 | 392 | 'annotations.com.hooks': { |
415 | 393 | 'aws_ofi_nccl.enabled': 'true', |
|
0 commit comments