update comments

youngeunkwon0405 · youngeunkwon0405 · commit 67cd8512a242 · 2025-10-07T00:01:08.000-07:00
Signed-off-by: Youngeun Kwon &lt;youngeunk@nvidia.com&gt;
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -827,7 +827,8 @@ def init_collective(
         from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
         from vllm.distributed.utils import StatelessProcessGroup
 
-        # All training ranks [0..train_world_size-1] join the communicator with their rank
+        # world_size = train_world_size + inference_world_size
+        # variable train_world_size is used in inference cluster
         pg = StatelessProcessGroup.create(
             host=ip, port=port, rank=self.rank, world_size=world_size
         )

Original file line number	Diff line number	Diff line change
`@@ -827,7 +827,8 @@ def init_collective(`
`827`	`827`	`from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator`
`828`	`828`	`from vllm.distributed.utils import StatelessProcessGroup`
`829`	`829`
`830`		`- # All training ranks [0..train_world_size-1] join the communicator with their rank`
	`830`	`+ # world_size = train_world_size + inference_world_size`
	`831`	`+ # variable train_world_size is used in inference cluster`
`831`	`832`	`pg = StatelessProcessGroup.create(`
`832`	`833`	`host=ip, port=port, rank=self.rank, world_size=world_size`
`833`	`834`	`)`