fix the fixes 4

vmoens · vmoens · commit 81b1ed513d2c · 2025-11-10T18:34:13.000Z
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -1192,7 +1192,23 @@ def _setup_replay_buffer(
     def _setup_policy_and_weights(self, policy: TensorDictModule | Callable) -> None:
         """Set up policy, wrapped policy, and extract weights."""
         self._original_policy = policy
-        policy, self.get_weights_fn = self._get_policy_and_device(policy=policy)
+
+        # Check if policy has meta-device parameters (sent from weight sync schemes)
+        # In that case, skip device placement - weights will come from the receiver
+        has_meta_params = False
+        if isinstance(policy, nn.Module):
+            for p in policy.parameters():
+                if p.device.type == "meta":
+                    has_meta_params = True
+                    break
+
+        if has_meta_params:
+            # Skip device placement for meta policies - schemes handle weight application
+            # Policy stays as-is, weights will be applied by the receiver
+            self.get_weights_fn = lambda: TensorDict.from_module(policy).data
+        else:
+            # Normal path: move policy to correct device
+            policy, self.get_weights_fn = self._get_policy_and_device(policy=policy)
 
         if not self.trust_policy:
             self.policy = policy