567-labs · xtzie · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -241,7 +241,7 @@ Documentation improvements are always welcome! Follow these guidelines:
 
 We encourage contributions to our evaluation tests:
 
-1. Explore existing evals in the [evals directory](https://github.com/instructor-ai/instructor/tree/main/tests/llm/test_openai/evals)
+1. Explore existing evals in the [evals directory](https://github.com/instructor-ai/instructor/tree/main/tests/llm)
 2. Contribute new evals as pytest tests
 3. Evals should test specific capabilities or edge cases of the library or models
 4. Follow the existing patterns for structuring eval tests

diff --git a/instructor/dsl/partial.py b/instructor/dsl/partial.py
@@ -242,9 +242,16 @@ def model_from_chunks(
         partial_mode = (
             "on" if issubclass(cls, PartialLiteralMixin) else "trailing-strings"
         )
-        chunk_buffer = []
+        chunk_buffer: list[str] = []
         for chunk in json_chunks:
-            chunk_buffer += chunk
+            if chunk is None:
+                continue
+            if not isinstance(chunk, str):
+                try:
+                    chunk = str(chunk)
+                except Exception:
+                    continue
+            chunk_buffer.append(chunk)
             if len(chunk_buffer) < 2:
                 continue
             potential_object += remove_control_chars("".join(chunk_buffer))
@@ -254,7 +261,7 @@ def model_from_chunks(
             )
             yield obj
         if chunk_buffer:
-            potential_object += remove_control_chars(chunk_buffer[0])
+            potential_object += remove_control_chars("".join(chunk_buffer))
             obj = process_potential_object(
                 potential_object, partial_mode, partial_model, **kwargs
             )
@@ -269,12 +276,29 @@ async def model_from_chunks_async(
         partial_mode = (
             "on" if issubclass(cls, PartialLiteralMixin) else "trailing-strings"
         )
+        chunk_buffer: list[str] = []
         async for chunk in json_chunks:
-            potential_object += chunk
-            obj = from_json(
-                (potential_object.strip() or "{}").encode(), partial_mode=partial_mode
+            if chunk is None:
+                continue
+            if not isinstance(chunk, str):
+                try:
+                    chunk = str(chunk)
+                except Exception:
+                    continue
+            chunk_buffer.append(chunk)
+            if len(chunk_buffer) < 2:
+                continue
+            potential_object += remove_control_chars("".join(chunk_buffer))
+            chunk_buffer = []
+            obj = process_potential_object(
+                potential_object, partial_mode, partial_model, **kwargs
+            )
+            yield obj
+        if chunk_buffer:
+            potential_object += remove_control_chars("".join(chunk_buffer))
+            obj = process_potential_object(
+                potential_object, partial_mode, partial_model, **kwargs
             )
-            obj = partial_model.model_validate(obj, strict=None, **kwargs)
             yield obj
 
     @staticmethod

diff --git a/tests/dsl/test_partial.py b/tests/dsl/test_partial.py
@@ -92,26 +92,25 @@ def test_partial():
     }, "Partial model JSON schema has changed"
 
 
+partial_chunks = ["\n", "\t", " ", "\x00", '{"b": {"b": 1}}']
+expected_model_from_chunks = [
+    # First model has default values
+    {"a": None, "b": {}},
+    # Second model has default values, unaffected by control characters
+    {"a": None, "b": {}},
+    # Last model has b populated from JSON (from the JSON chunk)
+    {"a": None, "b": {"b": 1}},
+]
+
+
 def test_partial_with_whitespace():
     partial = Partial[SamplePartial]
 
     # Get the actual models from chunks
-    models = list(partial.model_from_chunks(["\n", "\t", " ", '{"b": {"b": 1}}']))
-
-    # Print actual values for debugging
-    print(f"Number of models: {len(models)}")
+    models = list(partial.model_from_chunks(partial_chunks))
     for i, model in enumerate(models):
-        print(f"Model {i}: {model.model_dump()}")
-
-    # Actual behavior: When whitespace chunks are processed, we may get models
-    # First model has default values
-    assert models[0].model_dump() == {"a": None, "b": {}}
-
-    # Last model has b populated from JSON (from the JSON chunk)
-    assert models[-1].model_dump() == {"a": None, "b": {"b": 1}}
-
-    # Check we have the expected number of models (2 instead of 4)
-    assert len(models) == 2
+        # Expected behavior: When whitespace chunks are processed, we should always get a model
+        assert model.model_dump() == expected_model_from_chunks[i]
 
 
 @pytest.mark.asyncio
@@ -120,23 +119,15 @@ async def test_async_partial_with_whitespace():
 
     # Handle any leading whitespace from the model
     async def async_generator():
-        for chunk in ["\n", "\t", " ", '{"b": {"b": 1}}']:
+        for chunk in partial_chunks:
             yield chunk
 
-    expected_model_dicts = [
-        {"a": None, "b": {}},
-        {"a": None, "b": {}},
-        {"a": None, "b": {}},
-        {"a": None, "b": {"b": 1}},
-    ]
-
     i = 0
     async for model in partial.model_from_chunks_async(async_generator()):
-        assert model.model_dump() == expected_model_dicts[i]
+        # Expected behavior: When whitespace chunks are processed, we should always get a model
+        assert model.model_dump() == expected_model_from_chunks[i]
         i += 1
 
-    assert model.model_dump() == {"a": None, "b": {"b": 1}}
-
 
 @pytest.mark.skipif(not os.getenv("OPENAI_API_KEY"), reason="OPENAI_API_KEY not set")
 def test_summary_extraction():