fix the fixes 2

vmoens · vmoens · commit 35afb9ca33d1 · 2025-11-10T15:40:56.000Z
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -155,19 +155,20 @@ def _map_to_cpu_if_needed(x):
 def _make_meta_policy(policy: nn.Module) -> nn.Module:
     """Create policy structure with parameters on meta device.
 
-    This is used when policy_factory is provided but we still want to send
-    the policy structure to workers. The actual weights will be sent via queue.
+    This is used with weight sync schemes to send policy structure without weights.
+    The actual weights are distributed by the schemes.
 
     Args:
         policy: Policy module to extract structure from.
 
     Returns:
-        A copy of the policy with all parameters on meta device.
+        A copy of the policy with all parameters on meta device and requires_grad=False.
     """
 
     def _cast(p, param_maybe_buffer):
         if isinstance(param_maybe_buffer, Parameter):
-            return Parameter(p)
+            # Create parameter without gradients to avoid serialization issues
+            return Parameter(p, requires_grad=False)
         if isinstance(param_maybe_buffer, Buffer):
             return Buffer(p)
         return p
@@ -182,19 +183,20 @@ def _cast(p, param_maybe_buffer):
 def _make_meta_policy(policy: nn.Module) -> nn.Module:  # noqa: F811
     """Create policy structure with parameters on meta device.
 
-    This is used when policy_factory is provided but we still want to send
-    the policy structure to workers. The actual weights will be sent via queue.
+    This is used with weight sync schemes to send policy structure without weights.
+    The actual weights are distributed by the schemes.
 
     Args:
         policy: Policy module to extract structure from.
 
     Returns:
-        A copy of the policy with all parameters on meta device.
+        A copy of the policy with all parameters on meta device and requires_grad=False.
     """
 
     def _cast(p, param_maybe_buffer):
         if isinstance(param_maybe_buffer, Parameter):
-            return Parameter(p)
+            # Create parameter without gradients to avoid serialization issues
+            return Parameter(p, requires_grad=False)
         return p
 
     param_and_buf = TensorDict.from_module(policy, as_module=True)
@@ -3142,8 +3144,11 @@ def _run_processes(self) -> None:
                 # Schemes handle weight distribution on worker side
                 if any(policy_factory):
                     policy_to_send = None  # Factory will create policy in worker
+                elif policy is not None:
+                    # Send meta-device policy (empty structure) - schemes apply weights
+                    policy_to_send = _make_meta_policy(policy)
                 else:
-                    policy_to_send = policy  # Stateless - schemes apply weights
+                    policy_to_send = None
                 cm = contextlib.nullcontext()
             else:
                 # With weight updater, use in-place weight replacement