File tree Expand file tree Collapse file tree 2 files changed +6
-6
lines changed
Expand file tree Collapse file tree 2 files changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -47,7 +47,7 @@ class PyTorchMegatronLM(rfm.RunOnlyRegressionTest):
4747 checkpoint_steps = variable (int , value = 10 )
4848
4949 hf_home = variable (
50- str , value = str (pathlib .Path . home ( ) / '.cache' / 'huggingface' )
50+ str , value = str (pathlib .Path ( os . environ [ 'SCRATCH' ] ) / '.cache' / 'huggingface' )
5151 )
5252
5353 # The number of training steps
@@ -505,7 +505,7 @@ def throughput_per_gpu(self):
505505class PyTorchMegatronLM_CE (PyTorchMegatronLM , ContainerEngineMixin ):
506506 valid_systems = ['+nvgpu +ce' ]
507507 valid_prog_environs = ['builtin' ]
508- maintainers = ['ml-team ' ]
508+ maintainers = ['VCUE' , 'SSA ' ]
509509 container_image = 'docker://jfrog.svc.cscs.ch#reframe-oci/pytorch:25.01-py3_nvrtc-12.9'
510510
511511 @run_after ('setup' )
@@ -543,7 +543,7 @@ def set_container_mounts(self):
543543class PyTorchMegatronLM_UENV (PyTorchMegatronLM ):
544544 valid_systems = ['+nvgpu +uenv' ]
545545 valid_prog_environs = ['+pytorch' ]
546- maintainers = ['ml-team ' ]
546+ maintainers = ['VCUE' , 'SSA ' ]
547547
548548 @run_after ('setup' )
549549 def patch_numpy (self ):
Original file line number Diff line number Diff line change @@ -46,7 +46,7 @@ class PyTorchMegatronLM_AMD(rfm.RunOnlyRegressionTest):
4646 batch_size_per_node = variable (int , value = 256 )
4747 checkpoint_steps = variable (int , value = 10 )
4848 hf_home = variable (
49- str , value = str (pathlib .Path . home ( ) / '.cache' / 'huggingface' )
49+ str , value = str (pathlib .Path ( os . environ [ 'SCRATCH' ] ) / '.cache' / 'huggingface' )
5050 )
5151 training_steps = variable (int , value = 10 )
5252 wandb_logging = variable (bool , value = False )
@@ -88,7 +88,7 @@ class PyTorchMegatronLM_AMD(rfm.RunOnlyRegressionTest):
8888 sourcesdir = None
8989 executable = 'bash'
9090
91- maintainers = ['VCUE' ]
91+ maintainers = ['VCUE' , 'SSA' ]
9292 tags = {'ml' , 'bencher' }
9393
9494 @run_after ('setup' )
@@ -383,7 +383,7 @@ def throughput_per_gpu(self):
383383class PyTorchMegatronLM_AMD_CE (PyTorchMegatronLM_AMD , ContainerEngineMixin ):
384384 valid_systems = ['+amdgpu +ce' ]
385385 valid_prog_environs = ['builtin' ]
386- maintainers = ['ml-team ' ]
386+ maintainers = ['VCUE' , 'SSA ' ]
387387 container_image = 'rocm/megatron-lm:v25.6_py312'
388388
389389 @run_after ('setup' )
You can’t perform that action at this time.
0 commit comments