address comments and lint

ZhiyuLi-Nvidia · ZhiyuLi-Nvidia · commit b9f427b3a8e0 · 2025-10-03T17:34:26.000-07:00
Signed-off-by: Zhiyu Li &lt;zhiyul@NVIDIA.com&gt;
diff --git a/nemo_rl/models/generation/vllm/vllm_backend.py b/nemo_rl/models/generation/vllm/vllm_backend.py
@@ -15,9 +15,9 @@
 from typing import Any
 
 import torch
+import zmq
 from torch.multiprocessing.reductions import rebuild_cuda_tensor
 
-import zmq
 from nemo_rl.utils.nsys import wrap_with_nvtx_name
 
 try:
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -23,6 +23,7 @@
 
 import ray
 import torch
+import zmq
 from accelerate import init_empty_weights
 from torch import nn
 from torch.distributed.checkpoint.state_dict import (
@@ -45,7 +46,6 @@
 )
 from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
 
-import zmq
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.algorithms.loss_functions import SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
@@ -1708,9 +1708,7 @@ def maybe_init_zmq(self):
     def prepare_refit_info(self) -> Optional[dict[str, Any]]:
         state_dict_info = {}
         for name, tensor in self.model.state_dict().items():
-            assert tensor.dtype == self.dtype, (
-                f"Tensor {name} has dtype {tensor.dtype} but expected {self.dtype}"
-            )
+            # all tensor will be casted to self.dtype in stream_weights_via_ipc_zmq/broadcast_weights_for_collective
             state_dict_info[name] = (tensor.shape, self.dtype)
 
         return state_dict_info
diff --git a/nemo_rl/models/policy/dtensor_policy_worker_v2.py b/nemo_rl/models/policy/dtensor_policy_worker_v2.py
@@ -22,6 +22,7 @@
 
 import ray
 import torch
+import zmq
 from accelerate import init_empty_weights
 from nemo_automodel import (
     NeMoAutoModelForSequenceClassification,
@@ -62,7 +63,6 @@
 )
 from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
 
-import zmq
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.algorithms.loss_functions import SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
@@ -1670,9 +1670,7 @@ def maybe_init_zmq(self):
     def prepare_refit_info(self) -> Optional[dict[str, Any]]:
         state_dict_info = {}
         for name, tensor in self.model.state_dict().items():
-            assert tensor.dtype == self.dtype, (
-                f"Tensor {name} has dtype {tensor.dtype} but expected {self.dtype}"
-            )
+            # all tensor will be casted to self.dtype in stream_weights_via_ipc_zmq/broadcast_weights_for_collective
             state_dict_info[name] = (tensor.shape, self.dtype)
 
         return state_dict_info
@@ -1698,10 +1696,10 @@ def dtensor_params_generator():
                     # Convert DTensor to full tensor for streaming
                     full_tensor = tensor.full_tensor()
                     # Convert to target dtype
-                    yield name, full_tensor.to(self.dtype, non_blocking=True)
+                    yield name, full_tensor.to(self.dtype, non_blocking=True).contiguous()
                 else:
                     # Convert to target dtype
-                    yield name, tensor.to(self.dtype, non_blocking=True)
+                    yield name, tensor.to(self.dtype, non_blocking=True).contiguous()
 
         # Use the shared implementation
         stream_weights_via_ipc_zmq_impl(
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -23,6 +23,7 @@
 
 import ray
 import torch
+import zmq
 from megatron.bridge import AutoBridge
 from megatron.bridge.models.model_provider import get_model
 from megatron.bridge.training import fault_tolerance
@@ -97,7 +98,6 @@
 from ray.util.queue import Queue
 from transformers import PreTrainedTokenizerBase
 
-import zmq
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.model_utils import (
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,7 +50,7 @@ dependencies = [
     "mlflow",
     "nvidia-nvshmem-cu12", # for deep_ep build
     "swanlab",
-    "zmq",
+    "pyzmq",
 ]
 
 [project.optional-dependencies]
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ dependencies = [`
`50`	`50`	`"mlflow",`
`51`	`51`	`"nvidia-nvshmem-cu12", # for deep_ep build`
`52`	`52`	`"swanlab",`
`53`		`- "zmq",`
	`53`	`+ "pyzmq",`
`54`	`54`	`]`
`55`	`55`
`56`	`56`	`[project.optional-dependencies]`