File tree Expand file tree Collapse file tree 2 files changed +18
-3
lines changed
Expand file tree Collapse file tree 2 files changed +18
-3
lines changed Original file line number Diff line number Diff line change 1+ # Copyright (c) 2025, NVIDIA CORPORATION.
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+
116import random
217from abc import ABC , abstractmethod
318from typing import List
Original file line number Diff line number Diff line change @@ -89,7 +89,7 @@ def pretrain_config(
8989 use_megatron_fsdp : bool = False ,
9090 # Training hyperparameters
9191 train_iters : int = 10000 ,
92- global_batch_size : int = 8 ,
92+ global_batch_size : int = 2 , # TODO: set it to num devices
9393 micro_batch_size : int = 1 ,
9494 lr : float = 0.9e-4 ,
9595 lr_warmup_iters : int = 2000 ,
@@ -163,7 +163,7 @@ def pretrain_config(
163163 model = model_cfg ,
164164 train = TrainingConfig (
165165 train_iters = train_iters ,
166- eval_interval = 2000 ,
166+ eval_interval = 1000 ,
167167 eval_iters = 32 ,
168168 global_batch_size = global_batch_size ,
169169 micro_batch_size = micro_batch_size ,
@@ -186,7 +186,7 @@ def pretrain_config(
186186 path = dataset_path ,
187187 seq_length = 2048 ,
188188 task_encoder_seq_length = 8000 ,
189- packing_buffer_size = 32 ,
189+ packing_buffer_size = 40 ,
190190 micro_batch_size = micro_batch_size ,
191191 global_batch_size = global_batch_size ,
192192 num_workers = 10 ,
You can’t perform that action at this time.
0 commit comments