mlcommons
diff --git a/‎datasets/README.md‎ ‎algoperf/datasets/README.md‎datasets/README.md renamed to algoperf/datasets/README.md
Lines changed: 7 additions & 7 deletions b/‎datasets/README.md‎ ‎algoperf/datasets/README.md‎datasets/README.md renamed to algoperf/datasets/README.md
Lines changed: 7 additions & 7 deletions
diff --git a/‎datasets/dataset_setup.py‎ ‎algoperf/datasets/dataset_setup.py‎datasets/dataset_setup.py renamed to algoperf/datasets/dataset_setup.py
Lines changed: 3 additions & 3 deletions b/‎datasets/dataset_setup.py‎ ‎algoperf/datasets/dataset_setup.py‎datasets/dataset_setup.py renamed to algoperf/datasets/dataset_setup.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎datasets/librispeech_preprocess.py‎ ‎…operf/datasets/librispeech_preprocess.py‎datasets/librispeech_preprocess.py renamed to algoperf/datasets/librispeech_preprocess.py
Lines changed: 1 addition & 1 deletion b/‎datasets/librispeech_preprocess.py‎ ‎…operf/datasets/librispeech_preprocess.py‎datasets/librispeech_preprocess.py renamed to algoperf/datasets/librispeech_preprocess.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎datasets/librispeech_tokenizer.py‎ ‎…goperf/datasets/librispeech_tokenizer.py‎datasets/librispeech_tokenizer.py renamed to algoperf/datasets/librispeech_tokenizer.py b/‎datasets/librispeech_tokenizer.py‎ ‎…goperf/datasets/librispeech_tokenizer.py‎datasets/librispeech_tokenizer.py renamed to algoperf/datasets/librispeech_tokenizer.py
diff --git a/‎algoperf/workloads/imagenet_resnet/imagenet_pytorch/workload.py‎
Lines changed: 3 additions & 2 deletions b/‎algoperf/workloads/imagenet_resnet/imagenet_pytorch/workload.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎algoperf/workloads/imagenet_resnet/input_pipeline.py‎
Lines changed: 6 additions & 2 deletions b/‎algoperf/workloads/imagenet_resnet/input_pipeline.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎algorithms/baselines/external_tuning/pytorch_nadamw_full_budget.py‎
Lines changed: 1 addition & 3 deletions b/‎algorithms/baselines/external_tuning/pytorch_nadamw_full_budget.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎debug/benchmark_dataloader_jax.py‎
Lines changed: 11 additions & 11 deletions b/‎debug/benchmark_dataloader_jax.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎debug/benchmark_dataloader_pytorch.py‎
Lines changed: 20 additions & 17 deletions b/‎debug/benchmark_dataloader_pytorch.py‎
Lines changed: 20 additions & 17 deletions
@@ -24,7 +24,7 @@ This document provides instructions on downloading and preparing all datasets ut
 *TL;DR to download and prepare a dataset, run `dataset_setup.py`:*
 
 ```bash
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
   --data_dir=~/data \
   --<dataset_name>
   --<optional_flags>
@@ -88,7 +88,7 @@ By default, a user will be prompted before any files are deleted. If you do not
 From `algorithmic-efficiency` run:
 
 ```bash
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
 --data_dir $DATA_DIR \
 --ogbg
 ```
@@ -124,7 +124,7 @@ In total, it should contain 13 files (via `find -type f | wc -l`) for a total of
 From `algorithmic-efficiency` run:
 
 ```bash
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
 --data_dir $DATA_DIR \
 --wmt
 ```
@@ -194,7 +194,7 @@ you should get an email containing the URLS for "knee_singlecoil_train",
 "knee_singlecoil_val" and "knee_singlecoil_test".  
 
 ```bash
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
 --data_dir $DATA_DIR \
 --fastmri \
 --fastmri_knee_singlecoil_train_url '<knee_singlecoil_train_url>' \
@@ -235,7 +235,7 @@ The ImageNet data pipeline differs between the PyTorch and JAX workloads.
 Therefore, you will have to specify the framework (either `pytorch` or `jax`) through the framework flag.
 
 ```bash
-python3 datasets/dataset_setup.py \ 
+python3 algoperf/datasets/dataset_setup.py \ 
 --data_dir $DATA_DIR \
 --imagenet \
 --temp_dir $DATA_DIR/tmp \  
@@ -349,7 +349,7 @@ In total, it should contain 20 files (via `find -type f | wc -l`) for a total of
 ### Criteo1TB
 
 ```bash
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
 --data_dir $DATA_DIR \
 --temp_dir $DATA_DIR/tmp \
 --criteo1tb 
@@ -378,7 +378,7 @@ In total, it should contain 885 files (via `find -type f | wc -l`) for a total o
 To download, train a tokenizer and preprocess the librispeech dataset:
 
 ```bash
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
 --data_dir $DATA_DIR \
 --temp_dir $DATA_DIR/tmp \
 --librispeech
 
@@ -56,7 +56,7 @@
 
 Example command:
 
-python3 datasets/dataset_setup.py \
+python3 algoperf/datasets/dataset_setup.py \
   --data_dir=~/data \
   --temp_dir=/tmp/mlcommons_data
   --imagenet \
@@ -73,8 +73,8 @@
 
 from algoperf.workloads.wmt import tokenizer
 from algoperf.workloads.wmt.input_pipeline import normalize_feature_names
-from datasets import librispeech_preprocess
-from datasets import librispeech_tokenizer
+from algoperf.datasets import librispeech_preprocess
+from algoperf.datasets import librispeech_tokenizer
 
 import functools
 import os
 
@@ -14,7 +14,7 @@
 from absl import logging
 from pydub import AudioSegment
 
-from datasets import librispeech_tokenizer
+from algoperf.datasets import librispeech_tokenizer
 
 gfile = tf.io.gfile
 copy = tf.io.gfile.copy
 
@@ -94,7 +94,6 @@ def _build_dataset(
       batch_size = global_batch_size // N_GPUS
     else:
       batch_size = global_batch_size
-    
 
     ds = input_pipeline.create_split(
       split,
@@ -107,7 +106,9 @@ def _build_dataset(
       mean_rgb=self.train_mean,
       stddev_rgb=self.train_stddev,
       cache=not train if cache is None else cache,
-      repeat_final_dataset=repeat_final_dataset if repeat_final_dataset is not None else train,
+      repeat_final_dataset=repeat_final_dataset
+      if repeat_final_dataset is not None
+      else train,
       aspect_ratio_range=self.aspect_ratio_range,
       area_range=self.scale_ratio_range,
       use_mixup=use_mixup,
 
@@ -396,9 +396,13 @@ def transpose_batch(batch):
       batch['inputs'] = tf.transpose(batch['inputs'], [0, 3, 1, 2])
       return batch
 
-    ds = ds.map(transpose_batch, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+    ds = ds.map(
+      transpose_batch, num_parallel_calls=tf.data.experimental.AUTOTUNE
+    )
   elif image_format != 'NHWC':
-    raise ValueError(f"image_format must be 'NHWC' or 'NCHW', got {image_format}")
+    raise ValueError(
+      f"image_format must be 'NHWC' or 'NCHW', got {image_format}"
+    )
 
   ds = ds.prefetch(10)
 
 
@@ -5,7 +5,6 @@
 
 import torch
 import torch.distributed.nn as dist_nn
-from absl import logging
 from torch import Tensor
 from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR
 
@@ -300,8 +299,7 @@ def update_params(
   optimizer_state['optimizer'].step()
   optimizer_state['scheduler'].step()
 
-  # Log training metrics - loss, grad_norm, batch_size.
-
+  # Log training metrics - loss, grad_norm.
   if global_step % 100 == 0 and workload.metrics_logger is not None:
     with torch.no_grad():
       parameters = [p for p in current_model.parameters() if p.grad is not None]
 
@@ -18,14 +18,14 @@
 
 
 def main():
-  data_dir = '/home/ak4605/algoperf-data/imagenet/jax'
+  data_dir = '/home/ak4605/data/imagenet/jax'
   global_batch_size = 1024
   num_batches = 100
 
   rng = jax.random.PRNGKey(0)
   ds_builder = tfds.builder('imagenet2012:5.1.0', data_dir=data_dir)
 
-  print(f'Creating JAX ImageNet dataloader...')
+  print('Creating JAX ImageNet dataloader...')
   print(f'Batch size: {global_batch_size}')
   print(f'Num devices: {jax.local_device_count()}')
 
@@ -56,7 +56,7 @@ def main():
     start = time.perf_counter()
     batch = next(ds_iter)
     end = time.perf_counter()
-    print(f'  Warmup batch {i+1}/5: {(end - start)*1000:.2f}ms')
+    print(f'  Warmup batch {i + 1}/5: {(end - start) * 1000:.2f}ms')
 
   print(f"Batch 'inputs' shape: {batch['inputs'].shape}")
 
@@ -71,19 +71,19 @@ def main():
     end = time.perf_counter()
     times.append(end - start)
     if (i + 1) % 20 == 0:
-      print(f'  Batch {i+1}/{num_batches}: {times[-1]*1000:.2f}ms')
+      print(f'  Batch {i + 1}/{num_batches}: {times[-1] * 1000:.2f}ms')
 
   times = np.array(times)
-  print(f'\n=== JAX DataLoader Results ===')
-  print(f'Mean time per batch: {times.mean()*1000:.2f}ms')
-  print(f'Std time per batch: {times.std()*1000:.2f}ms')
-  print(f'Min time per batch: {times.min()*1000:.2f}ms')
-  print(f'Max time per batch: {times.max()*1000:.2f}ms')
+  print('\n=== JAX DataLoader Results ===')
+  print(f'Mean time per batch: {times.mean() * 1000:.2f}ms')
+  print(f'Std time per batch: {times.std() * 1000:.2f}ms')
+  print(f'Min time per batch: {times.min() * 1000:.2f}ms')
+  print(f'Max time per batch: {times.max() * 1000:.2f}ms')
   print(f'Throughput: {global_batch_size / times.mean():.2f} images/sec')
 
   # Print machine-readable results for the fish script
-  print(f'\n=== RESULTS ===')
-  print(f'MEAN_MS={times.mean()*1000:.2f}')
+  print('\n=== RESULTS ===')
+  print(f'MEAN_MS={times.mean() * 1000:.2f}')
   print(f'THROUGHPUT={global_batch_size / times.mean():.2f}')
 
 
 
@@ -5,13 +5,14 @@
 import jax
 import numpy as np
 import tensorflow as tf
+
 tf.config.set_visible_devices([], 'GPU')  # Disable TF GPU usage
-import tensorflow_datasets as tfds
-import torch
-import torch.distributed as dist
+import tensorflow_datasets as tfds  # noqa: E402
+import torch  # noqa: E402
+import torch.distributed as dist  # noqa: E402
 
-from algoperf import pytorch_utils
-from algoperf.workloads.imagenet_resnet import input_pipeline
+from algoperf import pytorch_utils  # noqa: E402
+from algoperf.workloads.imagenet_resnet import input_pipeline  # noqa: E402
 
 # ImageNet constants (same as workload)
 TRAIN_MEAN = (0.485 * 255, 0.456 * 255, 0.406 * 255)
@@ -30,12 +31,12 @@ def main():
     torch.cuda.set_device(RANK)
     dist.init_process_group('nccl')
 
-  data_dir = '/home/ak4605/algoperf-data/imagenet/jax'
+  data_dir = '/home/ak4605/data/imagenet/jax'
   global_batch_size = 1024
   num_batches = 100
 
   if RANK == 0:
-    print(f'Creating PyTorch ImageNet dataloader (shared TFDS pipeline)...')
+    print('Creating PyTorch ImageNet dataloader (shared TFDS pipeline)...')
     print(f'Batch size: {global_batch_size}')
     print(f'Num GPUs: {N_GPUS}')
     print(f'USE_PYTORCH_DDP: {USE_PYTORCH_DDP}')
@@ -77,7 +78,9 @@ def main():
   def get_batch():
     batch = next(ds_iter)
     inputs = torch.from_numpy(batch['inputs'].numpy()).to(DEVICE)
-    targets = torch.from_numpy(batch['targets'].numpy()).to(DEVICE, dtype=torch.long)
+    targets = torch.from_numpy(batch['targets'].numpy()).to(
+      DEVICE, dtype=torch.long
+    )
     return {'inputs': inputs, 'targets': targets}
 
   # Warmup
@@ -88,7 +91,7 @@ def get_batch():
     batch = get_batch()
     end = time.perf_counter()
     if RANK == 0:
-      print(f'  Warmup batch {i+1}/5: {(end - start)*1000:.2f}ms')
+      print(f'  Warmup batch {i + 1}/5: {(end - start) * 1000:.2f}ms')
 
   if RANK == 0:
     print(f"Batch 'inputs' shape: {batch['inputs'].shape}")
@@ -109,20 +112,20 @@ def get_batch():
     end = time.perf_counter()
     times.append(end - start)
     if RANK == 0 and (i + 1) % 20 == 0:
-      print(f'  Batch {i+1}/{num_batches}: {times[-1]*1000:.2f}ms')
+      print(f'  Batch {i + 1}/{num_batches}: {times[-1] * 1000:.2f}ms')
 
   times = np.array(times)
   if RANK == 0:
-    print(f'\n=== PyTorch DataLoader Results ===')
-    print(f'Mean time per batch: {times.mean()*1000:.2f}ms')
-    print(f'Std time per batch: {times.std()*1000:.2f}ms')
-    print(f'Min time per batch: {times.min()*1000:.2f}ms')
-    print(f'Max time per batch: {times.max()*1000:.2f}ms')
+    print('\n=== PyTorch DataLoader Results ===')
+    print(f'Mean time per batch: {times.mean() * 1000:.2f}ms')
+    print(f'Std time per batch: {times.std() * 1000:.2f}ms')
+    print(f'Min time per batch: {times.min() * 1000:.2f}ms')
+    print(f'Max time per batch: {times.max() * 1000:.2f}ms')
     print(f'Throughput: {global_batch_size / times.mean():.2f} images/sec')
 
     # Print machine-readable results for the fish script
-    print(f'\n=== RESULTS ===')
-    print(f'MEAN_MS={times.mean()*1000:.2f}')
+    print('\n=== RESULTS ===')
+    print(f'MEAN_MS={times.mean() * 1000:.2f}')
     print(f'THROUGHPUT={global_batch_size / times.mean():.2f}')
 
   if USE_PYTORCH_DDP: