adding plotly

matteocacciola · matteocacciola · commit 21e5e9d07b14 · 2025-06-16T19:47:15.000+02:00
instructing the template with the opportunity to use plotly
diff --git a/pandasai/core/code_execution/environment.py b/pandasai/core/code_execution/environment.py
@@ -29,6 +29,7 @@ def get_environment() -> dict:
         "pd": import_dependency("pandas"),
         "plt": import_dependency("matplotlib.pyplot"),
         "np": import_dependency("numpy"),
+        "px": import_dependency("plotly.express"),
     }
 
     return env
diff --git a/pandasai/core/code_generation/code_cleaning.py b/pandasai/core/code_generation/code_cleaning.py
@@ -136,9 +136,10 @@ def clean_code(self, code: str) -> str:
             tuple: Cleaned code as a string and a list of additional dependencies.
         """
         code = self._replace_output_filenames_with_temp_chart(code)
+        code = self._replace_output_filenames_with_temp_json_chart(code)
 
-        # If plt.show is in the code, remove that line
-        code = re.sub(r"plt.show\(\)", "", code)
+        # If plt.show or fig.show is in the code, remove that line
+        code = re.sub(r"[a-z].show\(\)", "", code)
 
         tree = ast.parse(code)
         new_body = []
@@ -166,3 +167,16 @@ def _replace_output_filenames_with_temp_chart(self, code: str) -> str:
             lambda m: f"{m.group(1)}{chart_path}{m.group(1)}",
             code,
         )
+
+    def _replace_output_filenames_with_temp_json_chart(self, code: str) -> str:
+        """
+        Replace output file names with "temp_chart.json" (in case of usage of plotly).
+        """
+        _id = uuid.uuid4()
+        chart_path = os.path.join(DEFAULT_CHART_DIRECTORY, f"temp_chart_{_id}.json")
+        chart_path = chart_path.replace("\\", "\\\\")
+        return re.sub(
+            r"""(['"])([^'"]*\.json)\1""",
+            lambda m: f"{m.group(1)}{chart_path}{m.group(1)}",
+            code,
+        )
diff --git a/pandasai/core/prompts/templates/generate_python_code_with_sql.tmpl b/pandasai/core/prompts/templates/generate_python_code_with_sql.tmpl
@@ -10,6 +10,9 @@ def execute_sql_query(sql_query: str) -> pd.Dataframe
     """This method connects to the database, executes the sql query and returns the dataframe"""
 </function>
 
+For the charts, you can either use `matplotlib.pyplot` or `plotly.express` to generate the charts.
+If you use `plotly.express`, you have to save each chart as a dictionary into a JSON file.
+
 {% if last_code_generated != "" and context.memory.count() > 0 %}
 {{ last_code_generated }}
 {% else %}
@@ -31,4 +34,4 @@ At the end, declare "result" variable as a dictionary of type and value.
 
 Generate python code and return full updated code:
 
-### Note: Use only relevant table for query and do aggregation, sorting, joins and grouby through sql query
+### Note: Use only relevant table for query and do aggregation, sorting, joins and group by through sql query
diff --git a/pandasai/core/prompts/templates/shared/output_type_template.tmpl b/pandasai/core/prompts/templates/shared/output_type_template.tmpl
@@ -1,5 +1,5 @@
 {% if not output_type %}
-type (possible values "string", "number", "dataframe", "plot"). No other type available. Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }
+type (possible values "string", "number", "dataframe", "plot", "iplot"). No other type available. "plot" is when "matplotlib" is used; "iplot" when "plotly" si used. Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" } or { "type": "iplot", "value": "temp_chart.json" }
 {% elif output_type == "number" %}
 type (must be "number"), value must int. Example: { "type": "number", "value": 125 }
 {% elif output_type == "string" %}
@@ -8,4 +8,6 @@ type (must be "string"), value must be string. Example: { "type": "string", "val
 type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) }
 {% elif output_type == "plot" %}
 type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }
+{% elif output_type == "iplot" %}
+type (must be "iplot"), value must be string. Example: { "type": "iplot", "value": "temp_chart.json" }
 {% endif %}
diff --git a/pandasai/core/response/__init__.py b/pandasai/core/response/__init__.py
@@ -2,6 +2,7 @@
 from .chart import ChartResponse
 from .dataframe import DataFrameResponse
 from .error import ErrorResponse
+from .interactive_chart import InteractiveChartResponse
 from .number import NumberResponse
 from .parser import ResponseParser
 from .string import StringResponse
@@ -10,6 +11,7 @@
     "ResponseParser",
     "BaseResponse",
     "ChartResponse",
+    "InteractiveChartResponse",
     "DataFrameResponse",
     "NumberResponse",
     "StringResponse",
diff --git a/pandasai/core/response/interactive_chart.py b/pandasai/core/response/interactive_chart.py
@@ -0,0 +1,34 @@
+import json
+import os
+from typing import Any
+
+from .base import BaseResponse
+
+
+class InteractiveChartResponse(BaseResponse):
+    def __init__(self, value: Any, last_code_executed: str):
+        super().__init__(value, "ichart", last_code_executed)
+
+    def _get_chart(self) -> dict:
+        if isinstance(self.value, dict):
+            return self.value
+
+        if isinstance(self.value, str):
+            if os.path.exists(self.value):
+                with open(self.value, "rb") as f:
+                    return json.load(f)
+
+            return json.loads(self.value)
+
+        raise ValueError("Invalid value type for InteractiveChartResponse. Expected dict or str.")
+
+    def save(self, path: str):
+        img = self._get_chart()
+        with open(path, "w") as f:
+            json.dump(img, f)
+
+    def __str__(self) -> str:
+        return self.value if isinstance(self.value, str) else json.dumps(self.value)
+
+    def get_dict_image(self) -> dict:
+        return self._get_chart()
diff --git a/pandasai/core/response/parser.py b/pandasai/core/response/parser.py
@@ -8,6 +8,7 @@
 from .base import BaseResponse
 from .chart import ChartResponse
 from .dataframe import DataFrameResponse
+from .interactive_chart import InteractiveChartResponse
 from .number import NumberResponse
 from .string import StringResponse
 
@@ -26,6 +27,8 @@ def _generate_response(self, result: dict, last_code_executed: str = None):
             return DataFrameResponse(result["value"], last_code_executed)
         elif result["type"] == "plot":
             return ChartResponse(result["value"], last_code_executed)
+        elif result["type"] == "iplot":
+            return InteractiveChartResponse(result["value"], last_code_executed)
         else:
             raise InvalidOutputValueMismatch(f"Invalid output type: {result['type']}")
 
@@ -72,4 +75,16 @@ def _validate_response(self, result: dict):
                     "Invalid output: Expected a plot save path str but received an incompatible type."
                 )
 
+        elif result["type"] == "iplot":
+            if not isinstance(result["value"], (str, dict)):
+                raise InvalidOutputValueMismatch(
+                    "Invalid output: Expected a plot save path str but received an incompatible type."
+                )
+
+            path_to_plot_pattern = r"^(\/[\w.-]+)+(/[\w.-]+)*$|^[^\s/]+(/[\w.-]+)*$"
+            if not bool(re.match(path_to_plot_pattern, result["value"])):
+                raise InvalidOutputValueMismatch(
+                    "Invalid output: Expected a plot save path str but received an incompatible type."
+                )
+
         return True
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,7 @@ seaborn = "^0.12.2"
 sqlglot = "^25.0.3" 
 pyarrow = "^14.0.1"
 pyyaml = "^6.0.2"
+plotly = "^6.1.1"
 
 [tool.poetry.group.dev]
 optional = true
diff --git a/tests/unit_tests/core/code_execution/test_environment.py b/tests/unit_tests/core/code_execution/test_environment.py
@@ -18,6 +18,7 @@ def test_get_environment_with_secure_mode(self, mock_import_dependency):
         self.assertIn("pd", env)
         self.assertIn("plt", env)
         self.assertIn("np", env)
+        self.assertIn("px", env)
 
     @patch("pandasai.core.code_execution.environment.import_dependency")
     def test_get_environment_without_secure_mode(self, mock_import_dependency):
@@ -28,6 +29,7 @@ def test_get_environment_without_secure_mode(self, mock_import_dependency):
         self.assertIn("pd", env)
         self.assertIn("plt", env)
         self.assertIn("np", env)
+        self.assertIn("px", env)
         self.assertIsInstance(env["pd"], MagicMock)
 
     @patch("pandasai.core.code_execution.environment.importlib.import_module")
diff --git a/tests/unit_tests/core/code_generation/test_code_cleaning.py b/tests/unit_tests/core/code_generation/test_code_cleaning.py
@@ -104,6 +104,23 @@ def test_replace_output_filenames_with_temp_chart(self):
         )
         self.assertRegex(code, expected_pattern)
 
+    def test_replace_output_filenames_with_temp_json_chart(self):
+        handler = self.cleaner
+        handler.context = MagicMock()
+        handler.context.config.save_charts = True
+        handler.context.logger = MagicMock()  # Mock logger
+        handler.context.last_prompt_id = 123
+        handler.context.config.save_charts_path = "/custom/path"
+
+        code = 'some text "hello.json" more text'
+
+        code = handler._replace_output_filenames_with_temp_json_chart(code)
+
+        expected_pattern = re.compile(
+            r'some text "exports[/\\]+charts[/\\]+temp_chart_.*\.json" more text'
+        )
+        self.assertRegex(code, expected_pattern)
+
     def test_replace_output_filenames_with_temp_chart_windows_paths(self):
         handler = self.cleaner
         handler.context = MagicMock()
@@ -156,6 +173,18 @@ def test_replace_output_filenames_with_temp_chart_empty_code(self):
             result, expected_code, f"Expected '{expected_code}', but got '{result}'"
         )
 
+    def test_replace_output_filenames_with_temp_json_chart_empty_code(self):
+        handler = self.cleaner
+
+        code = ""
+        expected_code = ""  # It should remain empty, as no substitution is made
+
+        result = handler._replace_output_filenames_with_temp_json_chart(code)
+
+        self.assertEqual(
+            result, expected_code, f"Expected '{expected_code}', but got '{result}'"
+        )
+
     def test_replace_output_filenames_with_temp_chart_no_png(self):
         handler = self.cleaner
 
@@ -168,6 +197,18 @@ def test_replace_output_filenames_with_temp_chart_no_png(self):
             result, expected_code, f"Expected '{expected_code}', but got '{result}'"
         )
 
+    def test_replace_output_filenames_with_temp_json_chart_no_json(self):
+        handler = self.cleaner
+
+        code = "some text without json"
+        expected_code = "some text without json"  # No change should occur
+
+        result = handler._replace_output_filenames_with_temp_json_chart(code)
+
+        self.assertEqual(
+            result, expected_code, f"Expected '{expected_code}', but got '{result}'"
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/unit_tests/helpers/test_optional_dependency.py b/tests/unit_tests/helpers/test_optional_dependency.py
@@ -32,3 +32,4 @@ def test_env_for_necessary_deps():
     assert "pd" in env
     assert "plt" in env
     assert "np" in env
+    assert "px" in env
diff --git a/tests/unit_tests/helpers/test_responses.py b/tests/unit_tests/helpers/test_responses.py
@@ -9,6 +9,7 @@
 from pandasai.core.response import (
     ChartResponse,
     DataFrameResponse,
+    InteractiveChartResponse,
     NumberResponse,
     StringResponse,
 )
@@ -55,6 +56,14 @@ def test_parse_valid_plot(self):
         self.assertEqual(response.last_code_executed, None)
         self.assertEqual(response.type, "chart")
 
+    def test_parse_valid_interactive_plot(self):
+        result = {"type": "iplot", "value": "path/to/plot.json"}
+        response = self.response_parser.parse(result)
+        self.assertIsInstance(response, InteractiveChartResponse)
+        self.assertEqual(response.value, "path/to/plot.json")
+        self.assertEqual(response.last_code_executed, None)
+        self.assertEqual(response.type, "ichart")
+
     def test_plot_img_show_triggered(self):
         result = {
             "type": "plot",
diff --git a/tests/unit_tests/prompts/test_sql_prompt.py b/tests/unit_tests/prompts/test_sql_prompt.py
@@ -21,7 +21,7 @@ class TestGeneratePythonCodeWithSQLPrompt:
         [
             (
                 "",
-                """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }""",
+                """type (possible values "string", "number", "dataframe", "plot", "iplot"). No other type available. "plot" is when "matplotlib" is used; "iplot" when "plotly" si used. Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" } or { "type": "iplot", "value": "temp_chart.json" }""",
             ),
             (
                 "number",
@@ -35,6 +35,10 @@ class TestGeneratePythonCodeWithSQLPrompt:
                 "plot",
                 """type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }""",
             ),
+            (
+                "iplot",
+                """type (must be "iplot"), value must be string. Example: { "type": "iplot", "value": "temp_chart.json" }""",
+            ),
             (
                 "string",
                 """type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." }""",
@@ -77,6 +81,9 @@ def execute_sql_query(sql_query: str) -> pd.Dataframe
     """This method connects to the database, executes the sql query and returns the dataframe"""
 </function>
 
+For the charts, you can either use `matplotlib.pyplot` or `plotly.express` to generate the charts.
+If you use `plotly.express`, you have to save each chart as a dictionary into a JSON file.
+
 
 Update this initial code:
 ```python
@@ -99,5 +106,5 @@ def execute_sql_query(sql_query: str) -> pd.Dataframe
 
 Generate python code and return full updated code:
 
-### Note: Use only relevant table for query and do aggregation, sorting, joins and grouby through sql query'''  # noqa: E501
+### Note: Use only relevant table for query and do aggregation, sorting, joins and group by through sql query'''  # noqa: E501
         )
diff --git a/tests/unit_tests/response/test_interactive_chart_response.py b/tests/unit_tests/response/test_interactive_chart_response.py

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ def get_environment() -> dict:`
`29`	`29`	`"pd": import_dependency("pandas"),`
`30`	`30`	`"plt": import_dependency("matplotlib.pyplot"),`
`31`	`31`	`"np": import_dependency("numpy"),`
	`32`	`+ "px": import_dependency("plotly.express"),`
`32`	`33`	`}`
`33`	`34`
`34`	`35`	`return env`