Skip to content

Commit 8764224

Browse files
authored
INTPYTHON-653 & INTPYTHON-620 Make pandas optional and add free-threading support on Windows (#324)
1 parent fc50c17 commit 8764224

File tree

16 files changed

+149
-38
lines changed

16 files changed

+149
-38
lines changed

.github/workflows/test-python.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ jobs:
3838
matrix:
3939
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
4040
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
41-
exclude:
42-
- os: "windows-latest"
43-
python-version: "3.13t"
4441
fail-fast: false
4542
name: CPython ${{ matrix.python-version }}-${{ matrix.os }}
4643
steps:
@@ -89,10 +86,14 @@ jobs:
8986
- name: Ensure imports with no test deps
9087
run: just import-check
9188
- name: Run the tests
89+
if: ${{ ! endsWith(matrix.python-version, 't') }}
9290
env:
9391
UV_PYTHON: ${{matrix.python-version}}
9492
run: just test
95-
93+
- name: Run the tests with no optional deps
94+
env:
95+
UV_PYTHON: ${{matrix.python-version}}
96+
run: just test-no-optional
9697
docs:
9798
runs-on: ubuntu-latest
9899
steps:

bindings/python/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33

44
---
55

6+
# Changes in Version 1.10.0 (2025/xx/yy)
7+
8+
- Make `pandas` an optional dependency.
9+
- Add support for free-threaded python on Windows.
10+
611
# Changes in Version 1.9.0 (2025/05/27)
712

813
- Providing a schema now enforces strict type adherence for data.

bindings/python/justfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import-check:
1414
uv run python -c "from pymongoarrow.lib import libbson_version"
1515

1616
benchmark *args:
17-
uv sync --dev --extra test --extra test-polars
17+
uv sync --dev --extra test --extra test-polars --extra test-pandas
1818
uv run asv run -e --python=$(uv run python -c "import sys;print(sys.executable)") {{args}}
1919

2020
install:
@@ -23,8 +23,10 @@ install:
2323
uv run pre-commit install
2424

2525
test *args:
26-
uv sync --extra test --extra test-polars || uv sync --extra test
27-
uv run pytest {{args}}
26+
uv run --no-dev --extra test --extra test-polars --extra test-pandas pytest {{args}}
27+
28+
test-no-optional *args:
29+
uv run --no-dev --extra test pytest {{args}}
2830

2931
lint:
3032
uv sync --no-install-project --dev --frozen

bindings/python/pymongoarrow/api.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
from decimal import Decimal
1616

1717
import numpy as np
18-
import pandas as pd
18+
19+
try:
20+
import pandas as pd
21+
except ImportError:
22+
pd = None
1923

2024
try:
2125
import polars as pl
@@ -170,6 +174,9 @@ def _arrow_to_pandas(arrow_table):
170174
See https://arrow.apache.org/docs/python/pandas.html#reducing-memory-use-in-table-to-pandas
171175
for details.
172176
"""
177+
if pd is None:
178+
msg = "pandas is not installed. Try pip install pandas."
179+
raise ValueError(msg)
173180
return arrow_table.to_pandas(split_blocks=True, self_destruct=True)
174181

175182

@@ -238,10 +245,10 @@ def _arrow_to_numpy(arrow_table, schema=None):
238245

239246
for fname in schema:
240247
dtype = get_numpy_type(schema[fname])
248+
container[fname] = arrow_table[fname].to_numpy()
241249
if dtype == np.str_:
242-
container[fname] = arrow_table[fname].to_pandas().to_numpy(dtype=dtype)
243-
else:
244-
container[fname] = arrow_table[fname].to_numpy()
250+
container[fname] = container[fname].astype(np.str_)
251+
245252
return container
246253

247254

@@ -427,7 +434,7 @@ def _tabular_generator(tabular, *, exclude_none=False):
427434
yield {k: v for k, v in row.items() if v is not None}
428435
else:
429436
yield row
430-
elif isinstance(tabular, pd.DataFrame):
437+
elif pd is not None and isinstance(tabular, pd.DataFrame):
431438
for row in tabular.to_dict("records"):
432439
if exclude_none:
433440
yield {k: v for k, v in row.items() if not np.isnan(v)}
@@ -498,7 +505,7 @@ def write(collection, tabular, *, exclude_none: bool = False):
498505
cols = [tabular.column(i).cast(new_types[i]) for i in range(tabular.num_columns)]
499506
tabular = Table.from_arrays(cols, names=tabular.column_names)
500507
_validate_schema(tabular.schema.types)
501-
elif isinstance(tabular, pd.DataFrame):
508+
elif pd is not None and isinstance(tabular, pd.DataFrame):
502509
_validate_schema(ArrowSchema.from_pandas(tabular).types)
503510
elif pl is not None and isinstance(tabular, pl.DataFrame):
504511
tabular = tabular.to_arrow() # zero-copy in most cases and done in tabular_gen anyway
@@ -523,7 +530,10 @@ def write(collection, tabular, *, exclude_none: bool = False):
523530

524531
# Add handling for special case types.
525532
codec_options = collection.codec_options
526-
type_registry = TypeRegistry([_PandasNACodec(), _DecimalCodec()])
533+
if pd is not None:
534+
type_registry = TypeRegistry([_PandasNACodec(), _DecimalCodec()])
535+
else:
536+
type_registry = TypeRegistry([_DecimalCodec()])
527537
codec_options = codec_options.with_options(type_registry=type_registry)
528538

529539
while cur_offset < tab_size:

bindings/python/pymongoarrow/pandas_types.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,22 @@
1919
import re
2020

2121
import numpy as np
22-
import pandas as pd
2322
import pyarrow as pa
2423
from bson import Binary, Code, Decimal128, ObjectId
25-
from pandas.api.extensions import (
26-
ExtensionArray,
27-
ExtensionDtype,
28-
register_extension_dtype,
29-
)
24+
25+
try:
26+
import pandas as pd
27+
from pandas.api.extensions import (
28+
ExtensionArray,
29+
ExtensionDtype,
30+
register_extension_dtype,
31+
)
32+
except ImportError:
33+
ExtensionDtype = object
34+
ExtensionArray = object
35+
36+
def register_extension_dtype(func):
37+
return func
3038

3139

3240
class PandasBSONDtype(ExtensionDtype):

bindings/python/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies = [
3939
# Must be kept in sync with "build_sytem.requires" above.
4040
"pyarrow >=20.0,<20.1",
4141
"pymongo >=4.4,<5",
42-
"pandas >=1.3.5,<3",
42+
"numpy>=2.0.1",
4343
"packaging >=23.2",
4444
]
4545
dynamic = ["version"]
@@ -53,6 +53,7 @@ Tracker = "https://jira.mongodb.org/projects/INTPYTHON/issues"
5353
[project.optional-dependencies]
5454
test = ["pytz", "pytest"]
5555
test-polars = ["polars"]
56+
test-pandas = ["pandas>=1.3.5,<3"]
5657

5758
[tool.setuptools]
5859
zip-safe = false

bindings/python/test/conftest.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,14 @@
1515

1616
import pytest
1717

18-
pytest_plugins = [
19-
"pandas.tests.extension.conftest",
20-
]
18+
try:
19+
import pandas as pd # noqa: F401
20+
21+
pytest_plugins = [
22+
"pandas.tests.extension.conftest",
23+
]
24+
except ImportError:
25+
pass
2126

2227

2328
@pytest.fixture(autouse=True, scope="session")

bindings/python/test/pandas_types/test_binary.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@
1414
import numpy as np
1515
import pytest
1616
from bson import Binary
17-
from pandas.tests.extension import base
1817

1918
from pymongoarrow.pandas_types import PandasBinary, PandasBinaryArray
2019

20+
try:
21+
from pandas.tests.extension import base
22+
except ImportError:
23+
pytest.skip("skipping pandas tests", allow_module_level=True)
24+
2125
try:
2226
base.BaseIndexTests
2327
except AttributeError:

bindings/python/test/pandas_types/test_code.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,15 @@
1414
import numpy as np
1515
import pytest
1616
from bson import Code
17-
from pandas.tests.extension import base
1817

1918
from pymongoarrow.pandas_types import PandasCode, PandasCodeArray
2019

20+
try:
21+
from pandas.tests.extension import base
22+
except ImportError:
23+
pytest.skip("skipping pandas tests", allow_module_level=True)
24+
25+
2126
try:
2227
base.BaseIndexTests
2328
except AttributeError:

bindings/python/test/pandas_types/test_decimal128.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,15 @@
1414
import numpy as np
1515
import pytest
1616
from bson import Decimal128
17-
from pandas.tests.extension import base
1817

1918
from pymongoarrow.pandas_types import PandasDecimal128, PandasDecimal128Array
2019

20+
try:
21+
from pandas.tests.extension import base
22+
except ImportError:
23+
pytest.skip("skipping pandas tests", allow_module_level=True)
24+
25+
2126
try:
2227
base.BaseIndexTests
2328
except AttributeError:

0 commit comments

Comments
 (0)