Add the missing copy write. Minor update to the config.

sajadn · sajadn · commit a120cce45ef9 · 2025-11-10T08:38:01.000-08:00
Signed-off-by: Sajad Norouzi &lt;snorouzi@nvidia.com&gt;
diff --git a/dfm/src/megatron/data/dit/diffusion_task_encoder_with_sp.py b/dfm/src/megatron/data/dit/diffusion_task_encoder_with_sp.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
 import random
 from abc import ABC, abstractmethod
 from typing import List
diff --git a/dfm/src/megatron/recipes/dit/dit.py b/dfm/src/megatron/recipes/dit/dit.py
@@ -89,7 +89,7 @@ def pretrain_config(
     use_megatron_fsdp: bool = False,
     # Training hyperparameters
     train_iters: int = 10000,
-    global_batch_size: int = 8,
+    global_batch_size: int = 2,  # TODO: set it to num devices
     micro_batch_size: int = 1,
     lr: float = 0.9e-4,
     lr_warmup_iters: int = 2000,
@@ -163,7 +163,7 @@ def pretrain_config(
         model=model_cfg,
         train=TrainingConfig(
             train_iters=train_iters,
-            eval_interval=2000,
+            eval_interval=1000,
             eval_iters=32,
             global_batch_size=global_batch_size,
             micro_batch_size=micro_batch_size,
@@ -186,7 +186,7 @@ def pretrain_config(
             path=dataset_path,
             seq_length=2048,
             task_encoder_seq_length=8000,
-            packing_buffer_size=32,
+            packing_buffer_size=40,
             micro_batch_size=micro_batch_size,
             global_batch_size=global_batch_size,
             num_workers=10,