Fixed an issue where non-ASCII characters were changed to Unicode characters within the prompt (#1490)

Youngrok123 · web-flow · commit d74be965475d · 2024-10-14T18:08:44.000+05:30
- There is an issue where non-ASCII characters are converted to Unicode
characters within the prompt.
- Due to this issue, Users that use non-English characters will receive
an abnormal response when communicating with LLM.
- This issue can be resolved by adding the ensure_ascii=False option
when using the json.dumps function.
diff --git a/src/ragas/llms/output_parser.py b/src/ragas/llms/output_parser.py
@@ -47,7 +47,7 @@ def get_json_format_instructions(pydantic_object: t.Type[TBaseModel]) -> str:
     if "title" in reduced_schema:
         del reduced_schema["title"]
     # Ensure json in context is well-formed with double quotes.
-    schema_str = json.dumps(reduced_schema)
+    schema_str = json.dumps(reduced_schema, ensure_ascii=False)
 
     resp = JSON_FORMAT_INSTRUCTIONS.format(schema=schema_str)
     return resp
diff --git a/src/ragas/llms/prompt.py b/src/ragas/llms/prompt.py
@@ -160,7 +160,7 @@ def format(self, **kwargs: t.Any) -> PromptValue:
             )
         for key, value in kwargs.items():
             if isinstance(value, str):
-                kwargs[key] = json.dumps(value)
+                kwargs[key] = json.dumps(value, ensure_ascii=False).encode("utf8").decode()
 
         prompt = self.to_string()
         return PromptValue(prompt_str=prompt.format(**kwargs))

Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,7 @@ def format(self, **kwargs: t.Any) -> PromptValue:`
`160`	`160`	`)`
`161`	`161`	`for key, value in kwargs.items():`
`162`	`162`	`if isinstance(value, str):`
`163`		`- kwargs[key] = json.dumps(value)`
	`163`	`+ kwargs[key] = json.dumps(value, ensure_ascii=False).encode("utf8").decode()`
`164`	`164`
`165`	`165`	`prompt = self.to_string()`
`166`	`166`	`return PromptValue(prompt_str=prompt.format(**kwargs))`