Skip to content

Commit ee40a71

Browse files
committed
Merge branch 'a100' into lm_workload_priya
2 parents 617e1a3 + c9899cf commit ee40a71

File tree

17 files changed

+56
-23
lines changed

17 files changed

+56
-23
lines changed

algoperf/pytorch_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121

2222
def pytorch_setup() -> Tuple[bool, int, torch.device, int]:
23+
torch.set_float32_matmul_precision('high')
2324
use_pytorch_ddp = 'LOCAL_RANK' in os.environ
2425
rank = int(os.environ['LOCAL_RANK']) if use_pytorch_ddp else 0
2526
device = torch.device(f'cuda:{rank}' if torch.cuda.is_available() else 'cpu')

algoperf/workloads/criteo1tb/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ def train_stddev(self):
9595

9696
@property
9797
def max_allowed_runtime_sec(self) -> int:
98-
return 7_703 # ~2.1 hours.
98+
return 8_915 # ~2.4 hours.
9999

100100
@property
101101
def eval_period_time_sec(self) -> int:
102-
return 2 * 60 # 2 mins.
102+
return 356 # approx 25 evals
103103

104104
def _build_input_queue(
105105
self,

algoperf/workloads/fastmri/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ def accelerations(self):
9595

9696
@property
9797
def max_allowed_runtime_sec(self) -> int:
98-
return 4_430 # ~1.2 hours
98+
return 2_745 # ~0.7 hours
9999

100100
@property
101101
def eval_period_time_sec(self) -> int:
102-
return 80
102+
return 110 # approx 25 evals
103103

104104
@property
105105
def step_hint(self) -> int:

algoperf/workloads/imagenet_resnet/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,11 @@ def resize_size(self) -> int:
103103

104104
@property
105105
def max_allowed_runtime_sec(self) -> int:
106-
return 66_159 # ~18.4 hours
106+
return 49_918 # ~13.8 hours
107107

108108
@property
109109
def eval_period_time_sec(self) -> int:
110-
return 510 # 8.5 minutes.
110+
return 1_996 # approx 25 evals
111111

112112
def _build_dataset(
113113
self,

algoperf/workloads/imagenet_vit/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def eval_batch_size(self) -> int:
8888

8989
@property
9090
def max_allowed_runtime_sec(self) -> int:
91-
return 69_768 # ~19.4 hours
91+
return 64_292 # ~17.8 hours
9292

9393
@property
9494
def eval_period_time_sec(self) -> int:
95-
return 7 * 60 # 7 mins.
95+
return 2_571 # 7 mins.
9696

9797
def _build_dataset(
9898
self,

algoperf/workloads/librispeech_conformer/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ def train_stddev(self):
8080

8181
@property
8282
def max_allowed_runtime_sec(self) -> int:
83-
return 58_015 # ~16.1 hours
83+
return 43_680 # ~16.1 hours
8484

8585
@property
8686
def eval_period_time_sec(self) -> int:
87-
return 24 * 60
87+
return 1747 # approx 25 evals
8888

8989
@property
9090
def step_hint(self) -> int:

algoperf/workloads/librispeech_deepspeech/librispeech_jax/workload.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ def step_hint(self) -> int:
100100

101101
@property
102102
def max_allowed_runtime_sec(self) -> int:
103-
return 44_405 # ~12.3 hours
103+
return 36_949 # ~12.3 hours
104+
105+
@property
106+
def eval_period_time_sec(self) -> int:
107+
return 1447 # approx 25 evals
104108

105109
@property
106110
def use_tanh(self) -> bool:

algoperf/workloads/librispeech_deepspeech/librispeech_pytorch/workload.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,11 @@ def step_hint(self) -> int:
9696

9797
@property
9898
def max_allowed_runtime_sec(self) -> int:
99-
return 44_405 # ~12.3 hours
99+
return 36_949 # 10.3 hours
100+
101+
@property
102+
def eval_period_time_sec(self) -> int:
103+
return 1447 # approx 25 evals
100104

101105
@property
102106
def use_tanh(self) -> bool:

algoperf/workloads/ogbg/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def train_stddev(self):
8888

8989
@property
9090
def max_allowed_runtime_sec(self) -> int:
91-
return 12_011 # ~3.3 hours
91+
return 11_303 # ~3.1 hours
9292

9393
@property
9494
def eval_period_time_sec(self) -> int:
95-
return 4 * 60
95+
return 452 # approx 25 evals
9696

9797
def _build_input_queue(
9898
self,

algoperf/workloads/wmt/workload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,11 @@ def train_stddev(self):
8989

9090
@property
9191
def max_allowed_runtime_sec(self) -> int:
92-
return 43_336 # ~12.0 hours
92+
return 16_114 # ~12.0 hours
9393

9494
@property
9595
def eval_period_time_sec(self) -> int:
96-
return 14 * 60
96+
return 644
9797

9898
@property
9999
def step_hint(self) -> int:

0 commit comments

Comments
 (0)