AI-Hypercomputer
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/frameworks/a3mega/maxtext-configs/llama-2-7b-128gpus-a3mega-bf16.yaml‎
Lines changed: 17 additions & 0 deletions b/‎src/frameworks/a3mega/maxtext-configs/llama-2-7b-128gpus-a3mega-bf16.yaml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/frameworks/a3mega/maxtext-configs/llama-2-7b-256gpus-a3mega-bf16.yaml‎
Lines changed: 17 additions & 0 deletions b/‎src/frameworks/a3mega/maxtext-configs/llama-2-7b-256gpus-a3mega-bf16.yaml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/helm-charts/a3mega/maxtext-training/Chart.yaml‎
Lines changed: 20 additions & 0 deletions b/‎src/helm-charts/a3mega/maxtext-training/Chart.yaml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/helm-charts/a3mega/maxtext-training/templates/maxtext-configmap.yaml‎
Lines changed: 37 additions & 0 deletions b/‎src/helm-charts/a3mega/maxtext-training/templates/maxtext-configmap.yaml‎
Lines changed: 37 additions & 0 deletions
@@ -21,6 +21,7 @@ Welcome to the reproducible benchmark recipes repository for GPUs! This reposito
 | Models           | GPU Machine Type | Framework | Workload Type       | Orchestrator | Link to the recipe |
 | ---------------- | ---------------- | --------- | ------------------- | ------------ | ------------------ |
 | **GPT3-175B**       | [A3 Mega (NVIDIA H100)](https://cloud.google.com/compute/docs/accelerator-optimized-machines#a3-mega-vms)    | NeMo  | Pre-training   | GKE          | [Link](./training/a3mega/gpt3-175b/nemo-pretraining-gke/README.md)              |
+| **Llama-2-7B**     | [A3 Mega (NVIDIA H100)](https://cloud.google.com/compute/docs/accelerator-optimized-machines#a3-mega-vms)    | MaxText  | Pre-training   | GKE          | [Link](./training/a3mega/llama-2-7b/maxtext-pretraining-gke/README.md)            |
 | **Llama-3-70B**     | [A3 Mega (NVIDIA H100)](https://cloud.google.com/compute/docs/accelerator-optimized-machines#a3-mega-vms)    | NeMo  | Pre-training   | GKE          | [Link](./training/a3mega/llama-3-70b/nemo-pretraining-gke/README.md)            |
 | **Llama-3.1-70B**    | [A3 Mega (NVIDIA H100)](https://cloud.google.com/compute/docs/accelerator-optimized-machines#a3-mega-vms)    | NeMo  | Pre-training   | GKE          | [Link](./training/a3mega/llama-3.1-70b/nemo-pretraining-gke/README.md)            |
 | **Mixtral-8-7B**     | [A3 Mega (NVIDIA H100)](https://cloud.google.com/compute/docs/accelerator-optimized-machines#a3-mega-vms)    | NeMo  | Pre-training   | GKE          | [Link](./training/a3mega/mixtral-8x7b/nemo-pretraining-gke/README.md)            |
 
@@ -0,0 +1,17 @@
+hardware: gpu
+dcn_data_parallelism: 16
+ici_fsdp_parallelism: 8
+per_device_batch_size: 4
+max_target_length: 4096
+model_name: llama2-7b
+enable_checkpointing: false
+attention: cudnn_flash_te
+remat_policy: minimal_flash
+use_iota_embed: true
+scan_layers: false
+dataset_type: synthetic
+logits_dot_in_fp32: false
+enable_goodput_recording: false
+monitor_goodput: false
+save_config_to_gcs: true
+
@@ -0,0 +1,17 @@
+hardware: gpu
+dcn_data_parallelism: 32
+ici_fsdp_parallelism: 8
+per_device_batch_size: 4
+max_target_length: 4096
+model_name: llama2-7b
+enable_checkpointing: false
+attention: cudnn_flash_te
+remat_policy: minimal_flash
+use_iota_embed: true
+scan_layers: false
+dataset_type: synthetic
+logits_dot_in_fp32: false
+enable_goodput_recording: false
+monitor_goodput: false
+save_config_to_gcs: true
+
@@ -0,0 +1,20 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+name: maxtext_pretraining_workload
+description: maxtext_pretraining_workload_a3mega
+type: application
+version: 0.1.0
+appVersion: "1.16.0"
@@ -0,0 +1,37 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Release.Name }}"
+data:
+  maxtext-configuration.yaml: |- 
+{{ .Values.maxtext_config | nindent 4 }}
+  xla-flags: >-
+    --xla_gpu_enable_latency_hiding_scheduler=true
+    --xla_gpu_enable_triton_gemm=false
+    --xla_gpu_graph_level=0
+    --xla_gpu_enable_highest_priority_async_stream=true
+    --xla_gpu_all_reduce_combine_threshold_bytes=536870912
+    --xla_gpu_all_gather_combine_threshold_bytes=134217728
+    --xla_gpu_reduce_scatter_combine_threshold_bytes=67108864
+    --xla_gpu_enable_pipelined_all_gather=true
+    --xla_gpu_enable_pipelined_reduce_scatter=true
+    --xla_gpu_enable_pipelined_all_reduce=true
+    --xla_gpu_enable_while_loop_double_buffering=true
+    --xla_gpu_enable_triton_softmax_fusion=false
+    --xla_gpu_enable_all_gather_combine_by_dim=false
+    --xla_gpu_enable_reduce_scatter_combine_by_dim=false
+    --xla_disable_hlo_passes=rematerialization"