Skip to content

Commit a120cce

Browse files
committed
Add the missing copy write. Minor update to the config.
Signed-off-by: Sajad Norouzi <snorouzi@nvidia.com>
1 parent a23e360 commit a120cce

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

dfm/src/megatron/data/dit/diffusion_task_encoder_with_sp.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
116
import random
217
from abc import ABC, abstractmethod
318
from typing import List

dfm/src/megatron/recipes/dit/dit.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def pretrain_config(
8989
use_megatron_fsdp: bool = False,
9090
# Training hyperparameters
9191
train_iters: int = 10000,
92-
global_batch_size: int = 8,
92+
global_batch_size: int = 2, # TODO: set it to num devices
9393
micro_batch_size: int = 1,
9494
lr: float = 0.9e-4,
9595
lr_warmup_iters: int = 2000,
@@ -163,7 +163,7 @@ def pretrain_config(
163163
model=model_cfg,
164164
train=TrainingConfig(
165165
train_iters=train_iters,
166-
eval_interval=2000,
166+
eval_interval=1000,
167167
eval_iters=32,
168168
global_batch_size=global_batch_size,
169169
micro_batch_size=micro_batch_size,
@@ -186,7 +186,7 @@ def pretrain_config(
186186
path=dataset_path,
187187
seq_length=2048,
188188
task_encoder_seq_length=8000,
189-
packing_buffer_size=32,
189+
packing_buffer_size=40,
190190
micro_batch_size=micro_batch_size,
191191
global_batch_size=global_batch_size,
192192
num_workers=10,

0 commit comments

Comments
 (0)