[BugFix] Fix parameter conflict resolution and validation tests in Wrappers (#3114)

vmoens · web-flow · commit 79217c872a79 · 2025-08-06T17:54:46.000+01:00
diff --git a/test/llm/test_wrapper.py b/test/llm/test_wrapper.py
@@ -2705,7 +2705,7 @@ def test_parameter_validation(
                 tokenizer=tokenizer,
                 input_mode="text",
                 generate=True,
-                generate_kwargs={"repetition_penalty": 0.5},
+                generate_kwargs={"repetition_penalty": 0.0},
             )
 
         # Test conflicting do_sample and temperature
@@ -2725,36 +2725,129 @@ def test_parameter_validation(
         [vLLMWrapper, TransformersWrapperMaxTokens],
         ids=["vllm", "transformers"],
     )
-    def test_parameter_conflict_errors(
+    def test_batching_null_dimension(
         self, wrapper_class, vllm_instance, transformers_instance
     ):
-        """Test that parameter conflicts are properly detected."""
-        model = vllm_instance if wrapper_class == vLLMWrapper else transformers_instance
-        tokenizer = model.get_tokenizer() if hasattr(model, "get_tokenizer") else None
+        """Test that null dimension inputs (batch_dims=0) work correctly.
 
-        # Test conflicting max_tokens and max_new_tokens
-        with pytest.raises(
-            ValueError, match="Cannot specify both 'max_tokens'.*'max_new_tokens'"
-        ):
-            wrapper_class(
-                model,
-                tokenizer=tokenizer,
-                input_mode="text",
-                generate=True,
-                generate_kwargs={"max_tokens": 10, "max_new_tokens": 20},
-            )
+        This test specifically verifies the fix for handling TensorDicts with batch_dims=0
+        in the batching decorator, ensuring proper squeeze operation and result handling.
+        """
+        # Handle the case where vLLM is not available
+        if wrapper_class == vLLMWrapper:
+            try:
+                model, tokenizer = vllm_instance
+            except Exception as e:
+                if "vLLM compatibility issue" in str(e):
+                    pytest.skip("vLLM not available due to compatibility issues")
+                raise
+        else:
+            model, tokenizer = transformers_instance
 
-        # Test conflicting n and num_return_sequences
-        with pytest.raises(
-            ValueError, match="Cannot specify both 'n'.*'num_return_sequences'"
-        ):
-            wrapper_class(
-                model,
-                tokenizer=tokenizer,
-                input_mode="text",
-                generate=True,
-                generate_kwargs={"n": 1, "num_return_sequences": 2},
-            )
+        # Test without batching first to verify basic functionality
+        wrapper_no_batch = wrapper_class(
+            model,
+            tokenizer=tokenizer,
+            input_mode="text",
+            generate=True,
+            return_log_probs=True,
+            # No batching parameters to avoid batching issues
+        )
+
+        # Test 1: Single null dimension input should work
+        # This is the key test case - a TensorDict without batch dimensions
+        null_dim_input = TensorDict(
+            text=Text(prompt="Single question without batch dimension?"),
+            batch_size=(),  # Empty tuple means no batch dimension
+        )
+
+        result_null = wrapper_no_batch(null_dim_input)
+
+        # Verify the result structure
+        assert "text" in result_null
+        assert "tokens" in result_null
+        assert "masks" in result_null
+        assert "log_probs" in result_null
+
+        # Verify the result has the expected shape (should maintain null dimension)
+        assert result_null.batch_size == ()
+        assert isinstance(
+            result_null["text"].prompt, str
+        )  # Should be a single string, not a list
+
+        # Test 2: Batch input should work normally
+        batch_input = TensorDict(
+            text=Text(prompt=["Question 1?", "Question 2?"]),
+            batch_size=(2,),
+        )
+
+        result_batch = wrapper_no_batch(batch_input)
+        assert result_batch.batch_size == (2,)
+        assert isinstance(result_batch["text"].prompt, list)
+        assert len(result_batch["text"].prompt) == 2
+
+        # Test 3: Test with batching enabled but with min_batch_size=1 to avoid complex batching
+        wrapper_with_batch = wrapper_class(
+            model,
+            tokenizer=tokenizer,
+            input_mode="text",
+            generate=True,
+            return_log_probs=True,
+            min_batch_size=1,  # Set to 1 to avoid complex batching scenarios
+        )
+
+        # Test null dimension with batching enabled
+        result_null_batch = wrapper_with_batch(null_dim_input)
+
+        # Verify the result structure
+        assert "text" in result_null_batch
+        assert "tokens" in result_null_batch
+        assert "masks" in result_null_batch
+        assert "log_probs" in result_null_batch
+
+        # Verify the result has the expected shape (should maintain null dimension)
+        assert result_null_batch.batch_size == ()
+        assert isinstance(
+            result_null_batch["text"].prompt, str
+        )  # Should be a single string, not a list
+
+        # Test 4: Verify that the _batching decorator correctly handles the squeeze logic
+        # This tests the specific fix in the _batching decorator
+        from torchrl.modules.llm.policies.common import _batching
+
+        # Create a simple mock function to test the decorator
+        def mock_forward(self, td_input, **kwargs):
+            # Return the input as-is for testing
+            return td_input
+
+        # Apply the batching decorator
+        batched_mock = _batching(mock_forward)
+
+        # Create a mock self object with batching attributes
+        class MockSelf:
+            def __init__(self):
+                self._min_batch_size = 1
+                self._max_batch_size = None
+                self._batch_queue = []
+                self._futures = []
+                self._batching_lock = type(
+                    "MockLock",
+                    (),
+                    {
+                        "__enter__": lambda self: None,
+                        "__exit__": lambda self, *args: None,
+                    },
+                )()
+                self._batching_timeout = 10.0
+
+        mock_self = MockSelf()
+
+        # Test the decorator with null dimension input
+        result = batched_mock(mock_self, null_dim_input)
+
+        # The result should be the same as the input since our mock just returns the input
+        assert result.batch_size == ()
+        assert result["text"].prompt == "Single question without batch dimension?"
 
 
 if __name__ == "__main__":
diff --git a/torchrl/modules/llm/policies/common.py b/torchrl/modules/llm/policies/common.py
@@ -384,12 +384,12 @@ class LLMWrapperBase(TensorDictModuleBase):
             **Parameter Conflict Resolution:**
 
             When both legacy (backend-specific) and standardized parameter names are provided,
-            a :exc:`ValueError` is raised to prevent confusion. For example:
+            the legacy names silently prevail. This ensures backward compatibility with existing code.
 
-            * If both ``max_tokens`` and ``max_new_tokens`` are passed, an error is raised
-            * If both ``n`` and ``num_return_sequences`` are passed, an error is raised
+            * If both ``max_tokens`` and ``max_new_tokens`` are passed, ``max_tokens`` wins
+            * If both ``n`` and ``num_return_sequences`` are passed, ``n`` wins
 
-            This ensures clear parameter usage and prevents unexpected behavior.
+            This behavior allows existing code to continue working without modification.
 
             **Parameter Validation:**
 
@@ -520,35 +520,34 @@ def _standardize_generate_kwargs(cls, generate_kwargs: dict | None) -> dict:
         * vLLM's ``max_tokens`` -> ``max_new_tokens``
         * vLLM's ``n`` -> ``num_return_sequences``
 
+        **Parameter Conflict Resolution:**
+
+        When both legacy (backend-specific) and standardized parameter names are provided,
+        the legacy names silently prevail. This ensures backward compatibility with existing code.
+
         Args:
             generate_kwargs: The generation parameters to standardize
 
         Returns:
             Standardized generation parameters
-
-        Raises:
-            ValueError: If conflicting parameter names are provided
         """
         if generate_kwargs is None:
             return {}
 
         standardized = dict(generate_kwargs)
 
         # Convert vLLM parameter names to common names
+        # Legacy names prevail in conflicts (backward compatibility)
         if "max_tokens" in standardized:
             if "max_new_tokens" in standardized:
-                raise ValueError(
-                    "Cannot specify both 'max_tokens' (legacy vLLM) and 'max_new_tokens' (standardized). "
-                    "Use 'max_new_tokens' for cross-backend compatibility."
-                )
+                # Legacy name wins - remove the standardized name
+                standardized.pop("max_new_tokens")
             standardized["max_new_tokens"] = standardized.pop("max_tokens")
 
         if "n" in standardized:
             if "num_return_sequences" in standardized:
-                raise ValueError(
-                    "Cannot specify both 'n' (legacy vLLM) and 'num_return_sequences' (standardized). "
-                    "Use 'num_return_sequences' for cross-backend compatibility."
-                )
+                # Legacy name wins - remove the standardized name
+                standardized.pop("num_return_sequences")
             standardized["num_return_sequences"] = standardized.pop("n")
 
         # Validate parameter combinations
@@ -1259,7 +1258,9 @@ def _batched_func(self, td_input: TensorDictBase, **kwargs):
         max_batch_size = getattr(self, "_max_batch_size", None)
         if min_batch_size is not None or max_batch_size is not None:
             # put elements in a queue until the batch size is reached
+            squeeze = False
             if td_input.batch_dims == 0:
+                squeeze = True
                 inputs = [td_input]
             else:
                 if td_input.batch_dims > 1:
@@ -1339,7 +1340,10 @@ def _batched_func(self, td_input: TensorDictBase, **kwargs):
                         if not future.done():
                             future.set_exception(e)
                     raise
-
+            if squeeze:
+                if len(futures) > 1:
+                    raise RuntimeError("More results than expected")
+                return futures[0].result()
             return lazy_stack([future.result() for future in futures])
         return func(self, td_input, **kwargs)