Skip to content

Commit 65ad267

Browse files
authored
Merge pull request #1733 from matteocacciola/fix/serialize-dataframe-columns
serialization of columns added into the definition of the table
2 parents 8961e05 + ab7c8fe commit 65ad267

File tree

5 files changed

+34
-16
lines changed

5 files changed

+34
-16
lines changed

CONTRIBUTING.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,25 @@ make spell_fix
6565
We use `pytest` to test our code. You can run the tests by running the following command:
6666

6767
```bash
68-
make tests
68+
make test_all
69+
```
70+
71+
If you prefer, you can run only the core tests with the command:
72+
73+
```bash
74+
make test_core
75+
```
76+
77+
or the test of extensions with the command:
78+
79+
```bash
80+
make test_extensions
81+
```
82+
83+
You can also run the tests with coverage by running the following command:
84+
85+
```bash
86+
make test-coverage
6987
```
7088

7189
Make sure that all tests pass before submitting a pull request.

pandasai/agent/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ def generate_code_with_retries(self, query: str) -> Any:
175175
self._state.logger.log(
176176
f"Retrying Code Generation ({attempts}/{max_retries})..."
177177
)
178+
return None
178179

179180
def execute_with_retries(self, code: str) -> Any:
180181
"""Execute the code with retry logic."""
@@ -194,6 +195,7 @@ def execute_with_retries(self, code: str) -> Any:
194195
f"Retrying execution ({attempts}/{max_retries})..."
195196
)
196197
code = self._regenerate_code_after_error(code, e)
198+
return None
197199

198200
def train(
199201
self,

pandasai/data_loader/semantic_layer_schema.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ class Column(BaseModel):
4747
name: str = Field(..., description="Name of the column.")
4848
type: Optional[str] = Field(None, description="Data type of the column.")
4949
description: Optional[str] = Field(None, description="Description of the column")
50-
expression: Optional[str] = Field(
51-
None, description="Aggregation expression (avg, min, max, sum)"
52-
)
50+
expression: Optional[str] = Field(None, description="Aggregation expression (avg, min, max, sum)")
5351
alias: Optional[str] = Field(None, description="Alias for the column")
5452

5553
@field_validator("type")
@@ -63,7 +61,9 @@ def is_column_type_supported(cls, type: str) -> str:
6361

6462
@field_validator("expression")
6563
@classmethod
66-
def is_expression_valid(cls, expr: str) -> str:
64+
def is_expression_valid(cls, expr: str) -> Optional[str]:
65+
if expr is None:
66+
return expr
6767
try:
6868
parse_one(expr)
6969
return expr
@@ -73,12 +73,8 @@ def is_expression_valid(cls, expr: str) -> str:
7373

7474
class Relation(BaseModel):
7575
name: Optional[str] = Field(None, description="Name of the relationship.")
76-
description: Optional[str] = Field(
77-
None, description="Description of the relationship."
78-
)
79-
from_: str = Field(
80-
..., alias="from", description="Source column for the relationship."
81-
)
76+
description: Optional[str] = Field(None, description="Description of the relationship.")
77+
from_: str = Field(..., alias="from", description="Source column for the relationship.")
8278
to: str = Field(..., description="Target column for the relationship.")
8379

8480

pandasai/helpers/dataframe_serializer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ def serialize(cls, df: "DataFrame", dialect: str = "postgres") -> str:
2828
if df.schema.description is not None:
2929
dataframe_info += f' description="{df.schema.description}"'
3030

31+
if df.schema.columns:
32+
columns = [column.model_dump() for column in df.schema.columns]
33+
dataframe_info += f' columns="{json.dumps(columns, ensure_ascii=False)}"'
34+
3135
dataframe_info += f' dimensions="{df.rows_count}x{df.columns_count}">'
3236

3337
# Truncate long values

tests/unit_tests/helpers/test_dataframe_serializer.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import pandas as pd
2-
31
from pandasai.helpers.dataframe_serializer import DataframeSerializer
42

53

@@ -8,7 +6,7 @@ def test_serialize_with_name_and_description(self, sample_df):
86
"""Test serialization with name and description attributes."""
97

108
result = DataframeSerializer.serialize(sample_df)
11-
expected = """<table dialect="postgres" table_name="table_6c30b42101939c7bdf95f4c1052d615c" dimensions="3x2">
9+
expected = """<table dialect="postgres" table_name="table_6c30b42101939c7bdf95f4c1052d615c" columns="[{"name": "A", "type": "integer", "description": null, "expression": null, "alias": null}, {"name": "B", "type": "integer", "description": null, "expression": null, "alias": null}]" dimensions="3x2">
1210
A,B
1311
1,4
1412
2,5
@@ -21,7 +19,7 @@ def test_serialize_with_name_and_description_with_dialect(self, sample_df):
2119
"""Test serialization with name and description attributes."""
2220

2321
result = DataframeSerializer.serialize(sample_df, dialect="mysql")
24-
expected = """<table dialect="mysql" table_name="table_6c30b42101939c7bdf95f4c1052d615c" dimensions="3x2">
22+
expected = """<table dialect="mysql" table_name="table_6c30b42101939c7bdf95f4c1052d615c" columns="[{"name": "A", "type": "integer", "description": null, "expression": null, "alias": null}, {"name": "B", "type": "integer", "description": null, "expression": null, "alias": null}]" dimensions="3x2">
2523
A,B
2624
1,4
2725
2,5
@@ -44,7 +42,7 @@ def test_serialize_with_dataframe_long_strings(self, sample_df):
4442
truncated_text = long_text[: DataframeSerializer.MAX_COLUMN_TEXT_LENGTH] + "…"
4543

4644
# Expected output
47-
expected = f"""<table dialect="mysql" table_name="table_6c30b42101939c7bdf95f4c1052d615c" dimensions="3x2">
45+
expected = f"""<table dialect="mysql" table_name="table_6c30b42101939c7bdf95f4c1052d615c" columns="[{{"name": "A", "type": "integer", "description": null, "expression": null, "alias": null}}, {{"name": "B", "type": "integer", "description": null, "expression": null, "alias": null}}]" dimensions="3x2">
4846
A,B
4947
{truncated_text},4
5048
2,5

0 commit comments

Comments
 (0)