perf: Add num_workers in DPO, GRPO and SFT for loading data (#1314)

katec846 · web-flow · commit 355aa98f13d7 · 2025-10-14T10:47:55.000-07:00
Signed-off-by: Kate Cheng &lt;yunhsuanc@nvidia.com&gt;
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
@@ -158,6 +158,7 @@ policy:
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
   shuffle: true
+  num_workers: 1
 
   dataset_name: HelpSteer3
   # You can use custom preference datasets for training and validation. For example:
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
@@ -219,6 +219,7 @@ data:
   prompt_file: "examples/prompts/cot.txt"
   system_prompt_file: null
   shuffle: true
+  num_workers: 1
 
   dataset_name: "OpenMathInstruct-2"
   # You can use custom response datasets for training and validation. For example:
diff --git a/examples/configs/rm.yaml b/examples/configs/rm.yaml
@@ -129,6 +129,7 @@ policy:
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
   shuffle: true
+  num_workers: 1
 
   dataset_name: HelpSteer3
   # You can use custom preference datasets for training and validation. For example:
diff --git a/examples/configs/sft_openmathinstruct2_megatron.yaml b/examples/configs/sft_openmathinstruct2_megatron.yaml
@@ -132,6 +132,7 @@ data:
   add_eos: true
   add_generation_prompt: true
   output_key: 'generated_solution'
+  num_workers: 1
 
 logger:
   log_dir: "logs"  # Base directory for all logs
diff --git a/examples/configs/vlm_grpo_3B.yaml b/examples/configs/vlm_grpo_3B.yaml
@@ -203,6 +203,7 @@ data:
   dataset_name: "clevr-cogent"
   split: "trainA"
   shuffle: true
+  num_workers: 1
 
 env:
   clevr-cogent:
diff --git a/examples/configs/vlm_grpo_3B_megatron.yaml b/examples/configs/vlm_grpo_3B_megatron.yaml
@@ -156,6 +156,7 @@ data:
   dataset_name: clevr-cogent
   split: trainA
   shuffle: true
+  num_workers: 1
 env:
   clevr-cogent:
     num_workers: 8
diff --git a/nemo_rl/algorithms/dpo.py b/nemo_rl/algorithms/dpo.py
@@ -176,6 +176,7 @@ def setup(
             add_loss_mask=True,
         ),
         drop_last=True,
+        num_workers=data_config["num_workers"],
     )
 
     if last_checkpoint_path is not None:
@@ -198,6 +199,7 @@ def setup(
                 add_loss_mask=True,
             ),
             drop_last=False,
+            num_workers=data_config["num_workers"],
         )
         for k, v in val_dataset.items()
     }
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
@@ -207,6 +207,7 @@ def setup(
         shuffle=data_config["shuffle"],
         collate_fn=rl_collate_fn,
         drop_last=True,
+        num_workers=data_config["num_workers"],
     )
     if last_checkpoint_path is not None:
         dataloader_state_dict = torch.load(
@@ -228,6 +229,7 @@ def setup(
             batch_size=grpo_config["val_batch_size"],
             shuffle=False,
             collate_fn=rl_collate_fn,
+            num_workers=data_config["num_workers"],
         )
         print(
             f"  ✓ Validation dataloader loaded with {len(val_dataset)} samples",
diff --git a/nemo_rl/algorithms/rm.py b/nemo_rl/algorithms/rm.py
@@ -151,6 +151,7 @@ def setup(
             add_loss_mask=False,
         ),
         drop_last=True,
+        num_workers=data_config["num_workers"],
     )
 
     if last_checkpoint_path is not None:
@@ -173,6 +174,7 @@ def setup(
                 add_loss_mask=False,
             ),
             drop_last=False,
+            num_workers=data_config["num_workers"],
         )
         for k, v in val_dataset.items()
     }
diff --git a/tests/unit/algorithms/test_grpo.py b/tests/unit/algorithms/test_grpo.py
@@ -240,7 +240,7 @@ def test_noncolocated_inference_requires_explicit_gpus_per_node_single_node():
             "val_period": 0,
             "val_at_start": False,
         },
-        "data": {"shuffle": False},
+        "data": {"shuffle": False, "num_workers": 1},
         "logger": {},  # Config extraction requires this key
         "checkpointing": {},  # Config extraction requires this key
         "cluster": {
@@ -296,7 +296,7 @@ def test_noncolocated_inference_requires_explicit_gpus_per_node_multi_node():
             "val_period": 0,
             "val_at_start": False,
         },
-        "data": {"shuffle": False},
+        "data": {"shuffle": False, "num_workers": 1},
         "logger": {},  # Config extraction requires this key
         "checkpointing": {},  # Config extraction requires this key
         "cluster": {

Original file line number	Diff line number	Diff line change
`@@ -176,6 +176,7 @@ def setup(`
`176`	`176`	`add_loss_mask=True,`
`177`	`177`	`),`
`178`	`178`	`drop_last=True,`
	`179`	`+ num_workers=data_config["num_workers"],`
`179`	`180`	`)`
`180`	`181`
`181`	`182`	`if last_checkpoint_path is not None:`
`@@ -198,6 +199,7 @@ def setup(`
`198`	`199`	`add_loss_mask=True,`
`199`	`200`	`),`
`200`	`201`	`drop_last=False,`
	`202`	`+ num_workers=data_config["num_workers"],`
`201`	`203`	`)`
`202`	`204`	`for k, v in val_dataset.items()`
`203`	`205`	`}`
Original file line number	Diff line number	Diff line change
`@@ -151,6 +151,7 @@ def setup(`
`151`	`151`	`add_loss_mask=False,`
`152`	`152`	`),`
`153`	`153`	`drop_last=True,`
	`154`	`+ num_workers=data_config["num_workers"],`
`154`	`155`	`)`
`155`	`156`
`156`	`157`	`if last_checkpoint_path is not None:`
`@@ -173,6 +174,7 @@ def setup(`
`173`	`174`	`add_loss_mask=False,`
`174`	`175`	`),`
`175`	`176`	`drop_last=False,`
	`177`	`+ num_workers=data_config["num_workers"],`
`176`	`178`	`)`
`177`	`179`	`for k, v in val_dataset.items()`
`178`	`180`	`}`