Merge branch 'a100' into lm_workload_priya

priyakasimbeg · priyakasimbeg · commit ee40a719ad56 · 2025-12-18T18:56:27.000Z
diff --git a/algoperf/pytorch_utils.py b/algoperf/pytorch_utils.py
@@ -20,6 +20,7 @@
 
 
 def pytorch_setup() -> Tuple[bool, int, torch.device, int]:
+  torch.set_float32_matmul_precision('high')
   use_pytorch_ddp = 'LOCAL_RANK' in os.environ
   rank = int(os.environ['LOCAL_RANK']) if use_pytorch_ddp else 0
   device = torch.device(f'cuda:{rank}' if torch.cuda.is_available() else 'cpu')
diff --git a/algoperf/workloads/criteo1tb/workload.py b/algoperf/workloads/criteo1tb/workload.py
@@ -95,11 +95,11 @@ def train_stddev(self):
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 7_703  # ~2.1 hours.
+    return 8_915  # ~2.4 hours.
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 2 * 60  # 2 mins.
+    return 356  # approx 25 evals
 
   def _build_input_queue(
     self,
diff --git a/algoperf/workloads/fastmri/workload.py b/algoperf/workloads/fastmri/workload.py
@@ -95,11 +95,11 @@ def accelerations(self):
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 4_430  # ~1.2 hours
+    return 2_745  # ~0.7 hours
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 80
+    return 110  # approx 25 evals
 
   @property
   def step_hint(self) -> int:
diff --git a/algoperf/workloads/imagenet_resnet/workload.py b/algoperf/workloads/imagenet_resnet/workload.py
@@ -103,11 +103,11 @@ def resize_size(self) -> int:
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 66_159  # ~18.4 hours
+    return 49_918  # ~13.8 hours
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 510  # 8.5 minutes.
+    return 1_996  # approx 25 evals
 
   def _build_dataset(
     self,
diff --git a/algoperf/workloads/imagenet_vit/workload.py b/algoperf/workloads/imagenet_vit/workload.py
@@ -88,11 +88,11 @@ def eval_batch_size(self) -> int:
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 69_768  # ~19.4 hours
+    return 64_292  # ~17.8 hours
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 7 * 60  # 7 mins.
+    return 2_571  # 7 mins.
 
   def _build_dataset(
     self,
diff --git a/algoperf/workloads/librispeech_conformer/workload.py b/algoperf/workloads/librispeech_conformer/workload.py
@@ -80,11 +80,11 @@ def train_stddev(self):
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 58_015  # ~16.1 hours
+    return 43_680  # ~16.1 hours
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 24 * 60
+    return 1747  # approx 25 evals
 
   @property
   def step_hint(self) -> int:
diff --git a/algoperf/workloads/librispeech_deepspeech/librispeech_jax/workload.py b/algoperf/workloads/librispeech_deepspeech/librispeech_jax/workload.py
@@ -100,7 +100,11 @@ def step_hint(self) -> int:
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 44_405  # ~12.3 hours
+    return 36_949  # ~12.3 hours
+
+  @property
+  def eval_period_time_sec(self) -> int:
+    return 1447  # approx 25 evals
 
   @property
   def use_tanh(self) -> bool:
diff --git a/algoperf/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py b/algoperf/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py
@@ -96,7 +96,11 @@ def step_hint(self) -> int:
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 44_405  # ~12.3 hours
+    return 36_949  # 10.3 hours
+
+  @property
+  def eval_period_time_sec(self) -> int:
+    return 1447  # approx 25 evals
 
   @property
   def use_tanh(self) -> bool:
diff --git a/algoperf/workloads/ogbg/workload.py b/algoperf/workloads/ogbg/workload.py
@@ -88,11 +88,11 @@ def train_stddev(self):
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 12_011  # ~3.3 hours
+    return 11_303  # ~3.1 hours
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 4 * 60
+    return 452 # approx 25 evals
 
   def _build_input_queue(
     self,
diff --git a/algoperf/workloads/wmt/workload.py b/algoperf/workloads/wmt/workload.py
@@ -89,11 +89,11 @@ def train_stddev(self):
 
   @property
   def max_allowed_runtime_sec(self) -> int:
-    return 43_336  # ~12.0 hours
+    return 16_114  # ~12.0 hours
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 14 * 60
+    return 644
 
   @property
   def step_hint(self) -> int:
diff --git a/docker/build_docker_images.sh b/docker/build_docker_images.sh
@@ -27,7 +27,7 @@ then
 GIT_BRANCH='main' # Set default argument
 fi 
 
-FRAMEWORKS=( "jax" "pythorch" "both" )
+FRAMEWORKS=( "jax" "pytorch")
 
 if [[ -n "$FRAMEWORK" ]];
 then
diff --git a/pyproject.toml b/pyproject.toml
@@ -108,16 +108,15 @@ jax_cpu = [
 jax_gpu = [
   "jax[cuda12]==0.7.0",
   "algoperf[jax_core_deps]",
-  "nvidia-cudnn-cu12==9.10.2.21", # temporary workaround for https://github.com/jax-ml/jax/issues/30663
 ]
 
 pytorch_cpu = [
   "torch==2.5.1", 
   "torchvision==0.20.1"
 ]
 pytorch_gpu = [
-  "torch==2.5.1",
-  "torchvision==0.20.1",
+  "torch==2.9.0",
+  "torchvision==0.24.0",
 ] # Note: omit the cuda suffix and installing from the appropriate wheel will result in using locally installed CUDA.
 
 ###############################################################################
diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
@@ -71,6 +71,7 @@
   'wer',
   'l1_loss',
   'loss',
+  'ppl'
 ]
 
 MAX_EVAL_METRICS = ['mean_average_precision', 'ssim', 'accuracy', 'bleu']
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
@@ -123,6 +123,8 @@ def get_summary_df(workload, workload_df, include_test_split=False):
     workload_df['accumulated_submission_time'] / workload_df['global_step']
   ).iloc[-1][-1]
 
+  summary_df['step_hint'] = scoring_utils.get_workload_stephint(workload)
+
   # test metrics
   if include_test_split:
     test_metric, test_target = scoring_utils.get_workload_metrics_and_targets(
@@ -157,7 +159,7 @@ def get_summary_df(workload, workload_df, include_test_split=False):
   return summary_df
 
 
-def get_submission_summary(df, include_test_split=True):
+def get_submission_summary(df, include_test_split=False):
   """Summarizes the submission results into metric and time tables
   organized by workload.
   """
diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py
@@ -240,3 +240,23 @@ def get_workload_metrics_and_targets(workload, split='validation'):
     metric = f'test/{metric_name}'
     target = workload_obj.test_target_value
   return metric, target
+
+
+def get_workload_stephint(workload):
+  workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1)
+  framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2)
+  workload_metadata = copy.copy(WORKLOADS[workload_name])
+
+  # Extend path according to framework.
+  workload_metadata['workload_path'] = os.path.join(
+    BASE_WORKLOADS_DIR,
+    workload_metadata['workload_path'] + f'{framework}',
+    'workload.py',
+  )
+  workload_init_kwargs = {}
+  workload_obj = workloads_registry.import_workload(
+    workload_path=workload_metadata['workload_path'],
+    workload_class_name=workload_metadata['workload_class_name'],
+    workload_init_kwargs=workload_init_kwargs,
+  )
+  return workload_obj.step_hint
diff --git a/scoring/utils/run_workloads.py b/scoring/utils/run_workloads.py
@@ -241,7 +241,8 @@ def main(_):
 
     # For each runnable workload check if there are any containers running and if not launch next container command
     for workload in workloads:
-      run_key = prng.fold_in(rng_subkey, hash(workload))
+      workload_foldin = hash(workload) % 9
+      run_key = prng.fold_in(rng_subkey, workload_foldin)
       run_seed = run_key[0]  # arbitrary
       base_workload_name = get_base_workload_name(workload)
       wait_until_container_not_running()
@@ -270,6 +271,7 @@ def main(_):
         'docker run -t -d -v /home/kasimbeg/data/:/data/ '
         '-v /home/kasimbeg/experiment_runs/:/experiment_runs '
         '-v /home/kasimbeg/experiment_runs/logs:/logs '
+        '-v /home/kasimbeg/algorithmic-efficiency:/algorithmic-efficiency '
         f'{mount_repo_flag}'
         '--gpus all --ipc=host '
         f'{docker_image_url} '
diff --git a/scoring/utils/workload_metadata_external_tuning.json b/scoring/utils/workload_metadata_external_tuning.json
@@ -24,7 +24,7 @@
     "dataset": "librispeech"
   },
   "criteo1tb": {
-    "max_steps": 10666,
+    "max_steps": 15666,
     "dataset": "criteo1tb"
   },
   "librispeech_conformer": {