Fix coverage issues, improve testing logits processors

RobinPicard · RobinPicard · commit 778cd105d7fe · 2025-08-18T09:27:54.000+02:00
diff --git a/outlines/backends/llguidance.py b/outlines/backends/llguidance.py
@@ -37,9 +37,6 @@ def __init__(
             The name of the tensor library used by the model
 
         """
-        if tensor_library_name not in SUPPORTED_TENSOR_LIBRARIES:
-            raise TypeError(f"Unsupported tensor library: {tensor_library_name}")
-
         self.is_first_token = True
         self.grammar = grammar
         self.llg_tokenizer = llg_tokenizer
diff --git a/outlines/backends/outlines_core.py b/outlines/backends/outlines_core.py
@@ -70,15 +70,15 @@ def _setup(self, batch_size: int, vocab_size: int) -> None:
             self.allocate_token_bitmask = allocate_token_bitmask
             self.bias_logits = self._bias_logits_numpy
 
-        elif self.tensor_library_name == "mlx":
+        elif self.tensor_library_name == "mlx": # pragma: no cover
             from outlines_core.kernels.mlx import (
                 allocate_token_bitmask
             )
 
             self.allocate_token_bitmask = allocate_token_bitmask
             self.bias_logits = self._bias_logits_mlx
 
-        else:
+        else: # pragma: no cover
             raise ValueError(
                 f"Unsupported tensor library: {self.tensor_library_name}"
             )
@@ -217,13 +217,13 @@ def __init__(self, model: SteerableModel):
             eos_token_id = tokenizer.eos_token_id
             eos_token = tokenizer.eos_token
             token_to_str = tokenizer.convert_token_to_string
-        elif isinstance(model, MLXLM):
+        elif isinstance(model, MLXLM): # pragma: no cover
             tokenizer = model.mlx_tokenizer # type: ignore
             vocabulary = tokenizer.get_vocab()
             eos_token_id = tokenizer.eos_token_id
             eos_token = tokenizer.eos_token
             token_to_str = lambda token: tokenizer.convert_tokens_to_string([token]) # type: ignore
-        else:
+        else: # pragma: no cover
             raise ValueError(f"Unsupported model type: {type(model)}")
 
         self.eos_token_id = eos_token_id
diff --git a/outlines/backends/xgrammar.py b/outlines/backends/xgrammar.py
@@ -39,7 +39,7 @@ def _setup(self, batch_size: int, vocab_size: int) -> None:
         """Setup the logits processor for a new generation."""
         if self.tensor_library_name == "torch":
             self._bias_logits = self._bias_logits_torch
-        elif self.tensor_library_name == "mlx":
+        elif self.tensor_library_name == "mlx": # pragma: no cover
             self._bias_logits = self._bias_logits_mlx
         else: # pragma: no cover
             raise ValueError(
@@ -101,7 +101,7 @@ def process_logits(
             self.is_first_token = False
         else:
             for i in range(batch_size):
-                if not self._matchers[i].is_terminated():
+                if not self._matchers[i].is_terminated(): # pragma: no cover
                     last_token_id = self.tensor_adapter.to_scalar(
                         input_ids[i][-1] # type: ignore
                     )
@@ -125,7 +125,7 @@ def __init__(self, model: SteerableModel):
 
         if isinstance(model, Transformers):
             tokenizer = model.hf_tokenizer
-        elif isinstance(model, MLXLM):
+        elif isinstance(model, MLXLM): # pragma: no cover
             tokenizer = model.mlx_tokenizer._tokenizer
         else: # pragma: no cover
             raise ValueError(
diff --git a/outlines/caching.py b/outlines/caching.py
@@ -61,7 +61,7 @@ def get_cache():
         cache_dir = outlines_cache_dir
     elif xdg_cache_home:  # pragma: no cover
         cache_dir = os.path.join(xdg_cache_home, ".cache", "outlines")
-    elif home_dir != "/":
+    elif home_dir != "/": # pragma: no cover
         cache_dir = os.path.join(home_dir, ".cache", "outlines")
     else:  # pragma: no cover
         # home_dir may be / inside a docker container without existing user
diff --git a/tests/backends/test_backends_utils.py b/tests/backends/test_backends_utils.py
@@ -0,0 +1,70 @@
+import torch
+import numpy as np
+
+
+def simulate_model_calling_processor(processor, tensor_library_name, vocabulary_size, eos_token_id, batch_size):
+    if tensor_library_name == "torch":
+        tensor_adapter = TorchTensorAdapter()
+    elif tensor_library_name == "numpy":
+        tensor_adapter = NumpyTensorAdapter()
+    elif tensor_library_name == "mlx":
+        tensor_adapter = MLXTensorAdapter()
+
+    processor.reset()
+    i = 0
+    input_ids = tensor_adapter.randint(0, vocabulary_size, (batch_size, 10))
+    while True:
+        i += 1
+        logits = tensor_adapter.randn((batch_size, vocabulary_size))
+        output = processor(input_ids, logits)
+        assert output.shape == (batch_size, vocabulary_size)
+        if all(input_ids[:, -1] == eos_token_id):
+            break
+        input_ids = tensor_adapter.add_token_inputs_ids(input_ids, output)
+        print(input_ids)
+        if i > 20:
+            break
+    return input_ids[:, 10:]
+
+class TorchTensorAdapter():
+    def randn(self, shape):
+        return torch.randn(*shape)
+
+    def randint(self, low, high, size):
+        return torch.randint(low, high, size)
+
+    def add_token_inputs_ids(self, input_ids, logits):
+        next_token_ids = torch.argmax(logits, dim=-1)
+        input_ids = torch.cat([input_ids, next_token_ids.unsqueeze(-1)], dim=-1)
+        return input_ids
+
+
+class NumpyTensorAdapter():
+    def randn(self, shape):
+        return np.random.randn(*shape)
+
+    def randint(self, low, high, size):
+        return np.random.randint(low, high, size)
+
+    def add_token_inputs_ids(self, input_ids, logits):
+        next_token_ids = np.argmax(logits, axis=-1)
+        print("next_token_ids",next_token_ids)
+        input_ids = np.concatenate([input_ids, next_token_ids[..., None]], axis=-1)
+        return input_ids
+
+
+class MLXTensorAdapter():
+    def __init__(self):
+        import mlx
+        self.mlx = mlx
+
+    def randn(self, shape):
+        return self.mlx.random.randn(*shape)
+
+    def randint(self, low, high, size):
+        return self.mlx.random.randint(low, high, size)
+
+    def add_token_inputs_ids(self, input_ids, logits):
+        next_token_ids = self.mlx.argmax(logits, axis=-1)
+        input_ids = self.mlx.concatenate([input_ids, next_token_ids[..., None]], axis=-1)
+        return input_ids
diff --git a/tests/backends/test_llguidance.py b/tests/backends/test_llguidance.py
@@ -1,9 +1,8 @@
-import pytest
+import re
 
 import llama_cpp
-import llguidance.hf
-import numpy as np
-import torch
+import llguidance
+import pytest
 import transformers
 from llguidance import LLTokenizer
 
@@ -12,9 +11,9 @@
     LLGuidanceBackend,
     LLGuidanceLogitsProcessor
 )
+from tests.backends.test_backends_utils import simulate_model_calling_processor
 
 try:
-    import mlx.core as mx
     import mlx_lm
     HAS_MLX = True
 except ImportError:
@@ -40,20 +39,6 @@ def model_mlxlm():
         *mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit")
     )
 
-@pytest.fixture
-def llg_tokenizer():
-    return llguidance.hf.from_tokenizer(
-        transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"),
-    )
-
-@pytest.fixture
-def llg_grammar_spec():
-    return (
-        '{"grammars": [{ "json_schema": {"type": "object", "properties":'
-        + ' {"name": {"type": "string"}, "age": {"type": "integer"}}, "requ'
-        + 'ired": ["name", "age"], "additionalProperties": false} }] }'
-    )
-
 @pytest.fixture
 def json_schema():
     return (
@@ -97,42 +82,61 @@ def cfg_ebnf():
 """
 
 
-def test_llguidance_processor_torch(llg_grammar_spec, llg_tokenizer):
-    processor = LLGuidanceLogitsProcessor(llg_grammar_spec, llg_tokenizer, "torch")
-    logits = torch.randn(2, llg_tokenizer.vocab_size)
-    input_ids = torch.randint(0, llg_tokenizer.vocab_size, (2, 10))
-    output = processor(input_ids, logits)
-    assert output.shape == (2, llg_tokenizer.vocab_size)
-    processor(input_ids, logits)
-
+def test_llguidance_processor_torch(regex):
+    model = model_transformers()
+    tokenizer = model.tokenizer
+    hf_tokenizer = model.hf_tokenizer
+    llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer
+    grammar_spec = llguidance.grammar_from("regex", regex)
+    processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, "torch")
+    for _ in range(2):
+        input_ids = simulate_model_calling_processor(
+            processor,
+            "torch",
+            len(tokenizer.get_vocab()),
+            tokenizer.eos_token_id,
+            2
+        )
+        assert re.match(regex, hf_tokenizer.decode(input_ids[0]))
+        assert re.match(regex, hf_tokenizer.decode(input_ids[1]))
+
+
+def test_llguidance_processor_numpy(regex):
+    model = model_llamacpp()
+    tokenizer = model.tokenizer
+    llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer
+    grammar_spec = llguidance.grammar_from("regex", regex)
+    processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, "numpy")
+    for _ in range(2):
+        input_ids = simulate_model_calling_processor(
+            processor,
+            "numpy",
+            len(tokenizer.vocabulary),
+            tokenizer.eos_token_id,
+            2
+        )
+        assert re.match(regex, tokenizer.decode(input_ids[0])[0])
+        assert re.match(regex, tokenizer.decode(input_ids[1])[0])
 
-def test_llguidance_processor_numpy(llg_grammar_spec, llg_tokenizer):
-    processor = LLGuidanceLogitsProcessor(llg_grammar_spec, llg_tokenizer, "numpy")
-    logits = np.random.randn(2, llg_tokenizer.vocab_size)
-    input_ids = np.random.randint(0, llg_tokenizer.vocab_size, (2, 10))
-    output = processor(input_ids, logits)
-    assert output.shape == (2, llg_tokenizer.vocab_size)
-    processor(input_ids, logits)
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
-def test_llguidance_processor_mlx(llg_grammar_spec, llg_tokenizer):
-    processor = LLGuidanceLogitsProcessor(llg_grammar_spec, llg_tokenizer, "mlx")
-    logits = mx.random.normal((2, llg_tokenizer.vocab_size))
-    input_ids = mx.random.randint(0, llg_tokenizer.vocab_size, (2, 10))
-    output = processor(input_ids, logits)
-    assert output.shape == (2, llg_tokenizer.vocab_size)
-    processor(input_ids, logits)
-
-
-def test_llguidance_processor_tensorflow(llg_grammar_spec, llg_tokenizer):
-    with pytest.raises(TypeError):
-        LLGuidanceLogitsProcessor(llg_grammar_spec, llg_tokenizer, "tensorflow")
-
-
-def test_llguidance_processor_jax(llg_grammar_spec, llg_tokenizer):
-    with pytest.raises(TypeError):
-        LLGuidanceLogitsProcessor(llg_grammar_spec, llg_tokenizer, "jax")
+def test_llguidance_processor_mlx(regex):
+    model = model_mlxlm()
+    tokenizer = model.mlx_tokenizer
+    llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer
+    grammar_spec = llguidance.grammar_from("regex", regex)
+    processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, "mlx")
+    for _ in range(2):
+        input_ids = simulate_model_calling_processor(
+            processor,
+            "mlx",
+            len(tokenizer.vocabulary),
+            tokenizer.eos_token_id,
+            2
+        )
+        assert re.match(regex, tokenizer.decode(input_ids[0]))
+        assert re.match(regex, tokenizer.decode(input_ids[1]))
 
 
 models = [
@@ -155,7 +159,6 @@ def test_llguidance_backend(model, tensor_library_name, json_schema, regex, cfg_
     generator = outlines.Generator(model, backend="llguidance", processor=processor)
     response = generator("Hello, how are you?")
     assert response[0] == "{"
-    assert "name" in response
 
     # regex
     processor = backend.get_regex_logits_processor(regex)
@@ -184,3 +187,16 @@ def test_llguidance_backend(model, tensor_library_name, json_schema, regex, cfg_
     generator = outlines.Generator(model, backend="llguidance", processor=processor)
     response = generator("Hello, how are you?")
     assert response == "yes" or response == "no"
+
+    # batch + multiple generations
+    processor = backend.get_json_schema_logits_processor(json_schema)
+    generator = outlines.Generator(model, backend="llguidance", processor=processor)
+    for _ in range(2):
+        if tensor_library_name == "torch":
+            response = generator.batch(["Create a character", "Hello, how are you?"], max_new_tokens=200)
+            assert len(response) == 2
+            for r in response:
+                assert r[0] == "{"
+        else:
+            response = generator("Create a character", max_tokens=20)
+            assert response[0] == "{"
diff --git a/tests/backends/test_outlines_core.py b/tests/backends/test_outlines_core.py
diff --git a/tests/backends/test_xgrammar.py b/tests/backends/test_xgrammar.py