Skip to content

Commit df047b2

Browse files
committed
feat: support Pandas v3
1 parent b5b1b0d commit df047b2

File tree

6 files changed

+1611
-1119
lines changed

6 files changed

+1611
-1119
lines changed

.github/workflows/python.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
strategy:
1919
matrix:
2020
python-version: ["3.10", 3.11, 3.12, 3.13]
21+
pandas-version: [">2.2,<3.0", "3.0,<4.0"]
2122
defaults:
2223
run:
2324
working-directory: ./python-client
@@ -31,8 +32,10 @@ jobs:
3132
- uses: actions/cache@v4
3233
with:
3334
path: ~/.cache/uv
34-
key: uv-${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
35-
- run: uv sync --frozen --all-extras
35+
key: uv-${{ matrix.python-version }}-${{ matrix.pandas-version }}-${{ hashFiles('uv.lock') }}
36+
- run: |
37+
uv sync --frozen --all-extras
38+
uv pip install "pandas${{ matrix.pandas-version }}"
3639
- run: uv run pytest
3740
lint:
3841
runs-on: ubuntu-latest

python-client/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies = [
1616
"loguru~=0.7",
1717
"micawber~=0.5",
1818
"multimethod~=2.0",
19-
"pandas~=2.2",
19+
"pandas>=2.2,<4.0",
2020
"pyarrow~=21.0.0",
2121
"pydantic>2.0.1,!=2.1.0,<3.0.0", # same as FastAPI
2222
"pyhumps~=3.8",

python-client/report.html

Lines changed: 43 additions & 0 deletions
Large diffs are not rendered by default.

python-client/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
uv==0.9.18
1+
uv==0.9.26

python-client/tests/common/test_df_processor.py

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
)
1919
from arakawa.types import SList
2020

21+
IS_PANDAS_V2 = pd.__version__.startswith("2.")
22+
IS_PANDAS_V3 = pd.__version__.startswith("3.")
23+
2124

2225
def _check_categories_parsed(df: pd.DataFrame, categorical_columns: SList):
2326
assert set(df.select_dtypes("category").columns) == set(categorical_columns)
@@ -153,7 +156,12 @@ def test_timedelta_to_str():
153156
}
154157
)
155158
timedelta_to_str(df)
156-
assert [str(x) for x in df.dtypes] == ["object", "int64", "object"]
159+
160+
if IS_PANDAS_V2:
161+
assert [str(x) for x in df.dtypes] == ["object", "int64", "object"]
162+
if IS_PANDAS_V3:
163+
assert [str(x) for x in df.dtypes] == ["str", "int64", "str"]
164+
157165
df1 = df.convert_dtypes()
158166
assert [str(x) for x in df1.dtypes] == ["string", "Int64", "string"]
159167

@@ -211,7 +219,7 @@ def _test_order(df: pd.DataFrame):
211219
def test_e2e_df_processing(tmp_path: Path):
212220
def _test_df(
213221
df: pd.DataFrame,
214-
expected_types_pd13: SList,
222+
expected_types: SList,
215223
):
216224
df_conv = df.convert_dtypes()
217225
df_proc = process_df(df, copy=True)
@@ -235,26 +243,40 @@ def _test_df(
235243
check_column_type=False,
236244
)
237245

238-
expected_types = expected_types_pd13
239-
240246
assert [str(x) for x in df2.dtypes] == expected_types
241247

242248
# DF 1
243249
df = vd.data.cars()
244-
_test_df(
245-
df,
246-
[
247-
"string",
248-
"Float64",
249-
"UInt8",
250-
"Float64",
251-
"UInt8",
252-
"UInt16",
253-
"Float64",
254-
"datetime64[ns]",
255-
"category",
256-
],
257-
)
250+
if IS_PANDAS_V2:
251+
_test_df(
252+
df,
253+
[
254+
"string",
255+
"Float64",
256+
"UInt8",
257+
"Float64",
258+
"UInt8",
259+
"UInt16",
260+
"Float64",
261+
"datetime64[ns]",
262+
"category",
263+
],
264+
)
265+
if IS_PANDAS_V3:
266+
_test_df(
267+
df,
268+
[
269+
"string",
270+
"Float64",
271+
"UInt8",
272+
"Float64",
273+
"UInt8",
274+
"UInt16",
275+
"Float64",
276+
"datetime64[us]",
277+
"category",
278+
],
279+
)
258280

259281
# DF 2 - float64/int64 downcasting for older pandas versions
260282
df = pd.DataFrame(
@@ -296,20 +318,36 @@ def _test_df(
296318
"cat_obj_col": [("a", "b") for x in range(30)],
297319
}
298320
)
299-
_test_df(
300-
df,
301-
[
302-
"string",
303-
"category",
304-
"UInt8",
305-
"Int64",
306-
"Float64",
307-
"string",
308-
"datetime64[ns]",
309-
"string",
310-
"category",
311-
],
312-
)
321+
if IS_PANDAS_V2:
322+
_test_df(
323+
df,
324+
[
325+
"string",
326+
"category",
327+
"UInt8",
328+
"Int64",
329+
"Float64",
330+
"string",
331+
"datetime64[ns]",
332+
"string",
333+
"category",
334+
],
335+
)
336+
if IS_PANDAS_V3:
337+
_test_df(
338+
df,
339+
[
340+
"string",
341+
"category",
342+
"UInt8",
343+
"Int64",
344+
"Float64",
345+
"string",
346+
"datetime64[us]",
347+
"string",
348+
"category",
349+
],
350+
)
313351

314352
# DF 4 - nullable
315353
df = pd.DataFrame(
@@ -323,4 +361,27 @@ def _test_df(
323361
"date_col": [datetime.utcnow() for x in range(30)] + [pd.NaT, pd.NaT],
324362
}
325363
)
326-
_test_df(df, ["string", "category", "UInt8", "Float64", "string", "datetime64[ns]"])
364+
if IS_PANDAS_V2:
365+
_test_df(
366+
df,
367+
[
368+
"string",
369+
"category",
370+
"UInt8",
371+
"Float64",
372+
"string",
373+
"datetime64[ns]",
374+
],
375+
)
376+
if IS_PANDAS_V3:
377+
_test_df(
378+
df,
379+
[
380+
"string",
381+
"category",
382+
"UInt8",
383+
"Float64",
384+
"string",
385+
"datetime64[us]",
386+
],
387+
)

0 commit comments

Comments
 (0)