Aider-AI · graydeon · Apr 3, 2026
diff --git a/aider/models.py b/aider/models.py
@@ -1006,7 +1006,7 @@ def send_completion(self, messages, functions, stream, temperature=None):
             kwargs["timeout"] = request_timeout
         if self.verbose:
             dump(kwargs)
-        kwargs["messages"] = messages
+        kwargs["messages"] = sanitize_for_utf8(messages)
 
         # Are we using github copilot?
         if "GITHUB_COPILOT_TOKEN" in os.environ:
@@ -1067,6 +1067,24 @@ def simple_send_with_retries(self, messages):
                 return None
 
 
+def sanitize_for_utf8(obj):
+    """Replace surrogate characters that cannot be encoded as UTF-8.
+
+    On some Windows systems (e.g. Chinese locale), file content or console
+    input can contain surrogate characters that cause UnicodeEncodeError
+    when httpx tries to JSON-encode the outgoing LLM request.  This
+    recursively walks the message structure and replaces any surrogates
+    with the Unicode replacement character.
+    """
+    if isinstance(obj, str):
+        return obj.encode("utf-8", errors="replace").decode("utf-8")
+    if isinstance(obj, dict):
+        return {k: sanitize_for_utf8(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [sanitize_for_utf8(item) for item in obj]
+    return obj
+
+
 def register_models(model_settings_fnames):
     files_loaded = []
     for model_settings_fname in model_settings_fnames:

diff --git a/tests/basic/test_models.py b/tests/basic/test_models.py
@@ -6,6 +6,7 @@
     Model,
     ModelInfoManager,
     register_models,
+    sanitize_for_utf8,
     sanity_check_model,
     sanity_check_models,
 )
@@ -556,5 +557,37 @@ def test_use_temperature_in_send_completion(self, mock_completion):
         )
 
 
+class TestSanitizeForUtf8(unittest.TestCase):
+    def test_replaces_surrogates_in_string(self):
+        text = "hello \udcb0 world"
+        result = sanitize_for_utf8(text)
+        self.assertNotIn("\udcb0", result)
+        self.assertIn("hello", result)
+        self.assertIn("world", result)
+
+    def test_handles_nested_messages(self):
+        messages = [
+            {"role": "user", "content": "test \udcb0 content"},
+            {"role": "assistant", "content": "clean content"},
+        ]
+        result = sanitize_for_utf8(messages)
+        # Should be JSON-encodable to UTF-8 without errors
+        import json
+
+        json.dumps(result).encode("utf-8")
+        self.assertNotIn("\udcb0", result[0]["content"])
+        self.assertEqual(result[1]["content"], "clean content")
+
+    def test_preserves_non_surrogate_unicode(self):
+        text = "hello \u4e16\u754c"  # Chinese characters
+        result = sanitize_for_utf8(text)
+        self.assertEqual(result, text)
+
+    def test_passes_through_non_string_types(self):
+        self.assertEqual(sanitize_for_utf8(42), 42)
+        self.assertIsNone(sanitize_for_utf8(None))
+        self.assertEqual(sanitize_for_utf8(True), True)
+
+
 if __name__ == "__main__":
     unittest.main()