Skip to content

Commit dfc19ed

Browse files
authored
ARROW-188 Support PyArrow 14 (#178)
* ARROW-188 Support PyArrow 14 * add setuptools * fix py312 compat * fix version spec * address warning * cleanup
1 parent f30d7f7 commit dfc19ed

File tree

6 files changed

+57
-29
lines changed

6 files changed

+57
-29
lines changed

.github/workflows/release-python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
- [ubuntu-20.04, manylinux_aarch64]
3333
- [macos-11, macosx_*]
3434
- [windows-2019, win_amd64]
35-
python: ["cp38", "cp39", "cp310", "cp311"]
35+
python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
3636

3737
steps:
3838
- name: Checkout pymongoarrow

.github/workflows/test-python.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
strategy:
3434
matrix:
3535
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
36-
python-version: [3.8, 3.9, "3.10", "3.11"]
36+
python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
3737
fail-fast: false
3838
name: CPython ${{ matrix.python-version }}-${{ matrix.os }}
3939
steps:
@@ -72,6 +72,7 @@ jobs:
7272
- name: Install libbson
7373
run: |
7474
pip install packaging # needed for mongo-c-driver-1.24.4/build/calc_release_version.py
75+
pip install setuptools # needed for use of distutils
7576
./build-libbson.sh
7677
- name: Install Python dependencies
7778
run: |

bindings/python/pymongoarrow/__init__.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,10 @@
1717

1818
# We must import pyarrow before attempting to load the Cython module.
1919
import pyarrow as pa # noqa: F401
20+
from packaging.version import parse as _parse_version
2021

2122
from pymongoarrow.version import _MIN_LIBBSON_VERSION, __version__ # noqa: F401
2223

23-
try:
24-
from pkg_resources import parse_version as _parse_version
25-
except ImportError:
26-
from distutils.version import LooseVersion as _LooseVersion
27-
28-
def _parse_version(version):
29-
return _LooseVersion(version)
30-
31-
3224
try:
3325
from pymongoarrow.lib import libbson_version
3426
except ImportError:

bindings/python/pymongoarrow/types.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pyarrow import DataType as _ArrowDataType
2222
from pyarrow import (
2323
ExtensionScalar,
24-
PyExtensionType,
24+
ExtensionType,
2525
binary,
2626
bool_,
2727
float64,
@@ -71,11 +71,11 @@ class ObjectIdScalar(BSONExtensionScalar):
7171
_bson_class = ObjectId
7272

7373

74-
class ObjectIdType(PyExtensionType):
74+
class ObjectIdType(ExtensionType):
7575
_type_marker = _BsonArrowTypes.objectid
7676

7777
def __init__(self):
78-
super().__init__(binary(12))
78+
super().__init__(binary(12), "pymongoarrow.objectid")
7979

8080
def __reduce__(self):
8181
return ObjectIdType, ()
@@ -86,6 +86,13 @@ def __arrow_ext_scalar_class__(self):
8686
def to_pandas_dtype(self):
8787
return PandasObjectId()
8888

89+
def __arrow_ext_serialize__(self):
90+
return b""
91+
92+
@classmethod
93+
def __arrow_ext_deserialize__(self, storage_type, serialized):
94+
return ObjectIdType()
95+
8996

9097
class Decimal128Scalar(ExtensionScalar):
9198
def as_py(self):
@@ -94,11 +101,11 @@ def as_py(self):
94101
return Decimal128.from_bid(self.value.as_py())
95102

96103

97-
class Decimal128Type(PyExtensionType):
104+
class Decimal128Type(ExtensionType):
98105
_type_marker = _BsonArrowTypes.decimal128
99106

100107
def __init__(self):
101-
super().__init__(binary(16))
108+
super().__init__(binary(16), "pymongoarrow.decimal128")
102109

103110
def __reduce__(self):
104111
return Decimal128Type, ()
@@ -109,6 +116,13 @@ def __arrow_ext_scalar_class__(self):
109116
def to_pandas_dtype(self):
110117
return PandasDecimal128()
111118

119+
def __arrow_ext_serialize__(self):
120+
return b""
121+
122+
@classmethod
123+
def __arrow_ext_deserialize__(self, storage_type, serialized):
124+
return Decimal128Type()
125+
112126

113127
class BinaryScalar(ExtensionScalar):
114128
def as_py(self):
@@ -118,12 +132,12 @@ def as_py(self):
118132
return Binary(self.value.as_py(), self.type.subtype)
119133

120134

121-
class BinaryType(PyExtensionType):
135+
class BinaryType(ExtensionType):
122136
_type_marker = _BsonArrowTypes.binary
123137

124138
def __init__(self, subtype):
125139
self._subtype = subtype
126-
super().__init__(binary())
140+
super().__init__(binary(), "pymongoarrow.binary")
127141

128142
@property
129143
def subtype(self):
@@ -138,16 +152,26 @@ def __arrow_ext_scalar_class__(self):
138152
def to_pandas_dtype(self):
139153
return PandasBinary(self.subtype)
140154

155+
def __arrow_ext_serialize__(self):
156+
return f"subtype={self.subtype}".encode()
157+
158+
@classmethod
159+
def __arrow_ext_deserialize__(cls, storage_type, serialized):
160+
serialized = serialized.decode()
161+
assert serialized.startswith("subtype=") # noqa: S101
162+
subtype = int(serialized.split("=")[1])
163+
return BinaryType(subtype)
164+
141165

142166
class CodeScalar(BSONExtensionScalar):
143167
_bson_class = Code
144168

145169

146-
class CodeType(PyExtensionType):
170+
class CodeType(ExtensionType):
147171
_type_marker = _BsonArrowTypes.code
148172

149173
def __init__(self):
150-
super().__init__(string())
174+
super().__init__(string(), "pymongoarrow.code")
151175

152176
def __reduce__(self):
153177
return CodeType, ()
@@ -158,6 +182,18 @@ def __arrow_ext_scalar_class__(self):
158182
def to_pandas_dtype(self):
159183
return PandasCode()
160184

185+
def __arrow_ext_serialize__(self):
186+
return b""
187+
188+
@classmethod
189+
def __arrow_ext_deserialize__(self, storage_type, serialized):
190+
return CodeType()
191+
192+
193+
# Register all of the extension types.
194+
for dtype in [ObjectIdType, CodeType, Decimal128Type]:
195+
pa.register_extension_type(dtype())
196+
pa.register_extension_type(BinaryType(0))
161197

162198
# Internal Type Handling.
163199

bindings/python/pyproject.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = [
44
"wheel>=0.37",
55
"cython>=0.29",
66
# Must be kept in sync with "project.dependencies" below.
7-
"pyarrow>=13.0,<13.1.0",
7+
"pyarrow>=14.0,<14.1.0",
88
]
99

1010
[project]
@@ -33,9 +33,10 @@ classifiers = [
3333
requires-python = ">=3.8"
3434
dependencies = [
3535
# Must be kept in sync with "build_sytem.requires" above.
36-
"pyarrow >=13.0,<13.1",
36+
"pyarrow >=14.0,<14.1",
3737
"pymongo >=4.4,<5",
3838
"pandas >=1.3.5,<3",
39+
"packaging >=23.2,<24"
3940
]
4041
dynamic = ["version"]
4142

@@ -100,7 +101,9 @@ norecursedirs = ["test/*"]
100101
faulthandler_timeout = 1500
101102
xfail_strict = true
102103
filterwarnings = [
103-
"error"
104+
"error",
105+
# https://github.com/dateutil/dateutil/issues/1314
106+
"module:datetime.datetime.utc:DeprecationWarning",
104107
]
105108

106109
[tool.ruff]

bindings/python/test/test_arrow.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -327,18 +327,14 @@ def test_write_batching(self, mock):
327327
self.assertEqual(mock.call_count, 2)
328328

329329
def _create_nested_data(self, nested_elem=None):
330-
schema = {k.__name__: v(True) for k, v in _TYPE_NORMALIZER_FACTORY.items()}
330+
schema = {k.__name__: v(0) for k, v in _TYPE_NORMALIZER_FACTORY.items()}
331331
if nested_elem:
332332
schem_ent, nested_elem = nested_elem
333333
schema["list"] = list_(schem_ent)
334334

335335
# PyArrow does not support from_pydict with nested extension types.
336336
schema["nested"] = struct(
337-
[
338-
field(a, b)
339-
for (a, b) in list(schema.items())
340-
if not isinstance(b, pa.PyExtensionType)
341-
]
337+
[field(a, b) for (a, b) in list(schema.items()) if not isinstance(b, pa.ExtensionType)]
342338
)
343339
raw_data = {
344340
"str": [None] + [str(i) for i in range(2)],

0 commit comments

Comments
 (0)