diff --git a/langfuse/_client/attributes.py b/langfuse/_client/attributes.py index 5ae81000c..9bd01c44e 100644 --- a/langfuse/_client/attributes.py +++ b/langfuse/_client/attributes.py @@ -164,7 +164,7 @@ def _serialize(obj: Any) -> Optional[str]: if obj is None or isinstance(obj, str): return obj - return json.dumps(obj, cls=EventSerializer) + return json.dumps(obj, cls=EventSerializer, ensure_ascii=False) def _flatten_and_serialize_metadata( diff --git a/langfuse/_task_manager/score_ingestion_consumer.py b/langfuse/_task_manager/score_ingestion_consumer.py index 1a5b61f91..7ad781be0 100644 --- a/langfuse/_task_manager/score_ingestion_consumer.py +++ b/langfuse/_task_manager/score_ingestion_consumer.py @@ -85,7 +85,7 @@ def _next(self) -> list: # check for serialization errors try: - json.dumps(event, cls=EventSerializer) + json.dumps(event, cls=EventSerializer, ensure_ascii=False) except Exception as e: self._log.error( f"Data error: Failed to serialize score object for ingestion. Score will be dropped. Error: {e}" @@ -117,7 +117,7 @@ def _next(self) -> list: def _get_item_size(self, item: Any) -> int: """Return the size of the item in bytes.""" - return len(json.dumps(item, cls=EventSerializer).encode()) + return len(json.dumps(item, cls=EventSerializer, ensure_ascii=False).encode()) def run(self) -> None: """Run the consumer.""" diff --git a/langfuse/_utils/request.py b/langfuse/_utils/request.py index b106cee2f..a7be90f47 100644 --- a/langfuse/_utils/request.py +++ b/langfuse/_utils/request.py @@ -60,7 +60,7 @@ def post(self, **kwargs: Any) -> httpx.Response: """Post the `kwargs` to the API""" log = logging.getLogger("langfuse") url = self._remove_trailing_slash(self._base_url) + "/api/public/ingestion" - data = json.dumps(kwargs, cls=EventSerializer) + data = json.dumps(kwargs, cls=EventSerializer, ensure_ascii=False) log.debug("making request: %s to %s", data, url) headers = self.generate_headers() res = self._session.post( diff --git a/tests/test_unicode_serialization.py b/tests/test_unicode_serialization.py new file mode 100644 index 000000000..9c9080390 --- /dev/null +++ b/tests/test_unicode_serialization.py @@ -0,0 +1,20 @@ +"""Test Unicode character handling in serialization.""" + +from langfuse._client.attributes import _serialize + +def test_mixed_unicode_preserved(): + """Test that mixed Unicode content is preserved.""" + data = { + "japanese": "こんにちは", + "chinese": "你好", + "korean": "안녕하세요", + "arabic": "مرحبا", + "russian": "Привет", + "emoji": "Hello, 🌍!", + } + serialized = _serialize(data) + assert serialized is not None + + assert "\\u" not in serialized, "Should not contain Unicode escapes" + for value in data.values(): + assert value in serialized, f"Should contain {value}"