Skip to content

Commit 8cfddb5

Browse files
ENH: Fix deprecation warnings about __array__ and quantile (xorbitsai#851)
1 parent 9b34b65 commit 8cfddb5

File tree

11 files changed

+122
-131
lines changed

11 files changed

+122
-131
lines changed

.github/workflows/python.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ jobs:
156156
else
157157
pip install -e "git+https://github.com/xorbitsai/xoscar.git@main#subdirectory=python&egg=xoscar"
158158
# TODO: pandas v2.2.3 now does not support well for numpy v2.2.0 and pyarrow v19.0.0.
159-
pip install -U "numpy<2.2.0" "pyarrow<19.0.0" scipy cython pyftpdlib coverage flaky numexpr openpyxl
159+
pip install -U numpy pyarrow scipy cython pyftpdlib coverage flaky numexpr openpyxl
160160
161161
if [[ "$MODULE" == "mars-core" ]]; then
162162
pip install oss2
@@ -291,7 +291,7 @@ jobs:
291291
run: |
292292
source activate ${{ env.CONDA_ENV }}
293293
pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12==24.10.*
294-
pip install ucxx-cu12 cython "numpy>=1.14.0,<2.0.0" cloudpickle scikit-learn \
294+
pip install ucxx-cu12 cython numpy cloudpickle scikit-learn \
295295
pyyaml psutil tornado sqlalchemy defusedxml tqdm uvloop coverage \
296296
pytest pytest-cov pytest-timeout pytest-forked pytest-asyncio pytest-mock
297297
pip install -U xoscar

python/xorbits/_mars/dataframe/core.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -954,15 +954,15 @@ def __str__(self):
954954
def __repr__(self):
955955
return self._to_str(representation=True)
956956

957-
def _to_arr(self):
957+
def _to_arr(self, dtype=None, copy=None):
958958
if len(self._executed_sessions) == 0: # pragma: no cover
959959
raise NotImplementedError
960960

961961
data = self.fetch(session=self._executed_sessions[-1])
962-
return np.asarray(data)
962+
return np.asarray(data, dtype=dtype, copy=copy)
963963

964-
def __array__(self):
965-
return self._to_arr()
964+
def __array__(self, dtype=None, copy=None):
965+
return self._to_arr(dtype=dtype, copy=copy)
966966

967967
def _to_mars_tensor(self, dtype=None, order="K", extract_multi_index=False):
968968
tensor = self.to_tensor(extract_multi_index=extract_multi_index)
@@ -1455,15 +1455,15 @@ def __str__(self):
14551455
def __repr__(self):
14561456
return self._to_str(representation=False)
14571457

1458-
def _to_arr(self):
1458+
def _to_arr(self, dtype=None, copy=None):
14591459
if len(self._executed_sessions) == 0: # pragma: no cover
14601460
raise NotImplementedError
14611461

14621462
data = self.fetch(session=self._executed_sessions[-1])
1463-
return np.asarray(data)
1463+
return np.asarray(data, dtype=dtype, copy=copy)
14641464

1465-
def __array__(self):
1466-
return self._to_arr()
1465+
def __array__(self, dtype=None, copy=None):
1466+
return self._to_arr(dtype=dtype, copy=copy)
14671467

14681468
@property
14691469
def dtype(self):
@@ -2054,9 +2054,9 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
20542054
"chunks",
20552055
FieldTypes.reference(DataFrameChunkData),
20562056
on_serialize=lambda x: [it.data for it in x] if x is not None else x,
2057-
on_deserialize=lambda x: [DataFrameChunk(it) for it in x]
2058-
if x is not None
2059-
else x,
2057+
on_deserialize=lambda x: (
2058+
[DataFrameChunk(it) for it in x] if x is not None else x
2059+
),
20602060
)
20612061

20622062
def __init__(
@@ -2263,15 +2263,15 @@ def _to_str(self, representation=False):
22632263
def __str__(self):
22642264
return self._to_str(representation=False)
22652265

2266-
def _to_arr(self):
2266+
def _to_arr(self, dtype=None, copy=None):
22672267
if len(self._executed_sessions) == 0:
22682268
raise NotImplementedError
22692269

22702270
data = self.fetch(session=self._executed_sessions[-1])
2271-
return np.asarray(data)
2271+
return np.asarray(data, dtype=dtype, copy=copy)
22722272

2273-
def __array__(self):
2274-
return self._to_arr()
2273+
def __array__(self, dtype=None, copy=None):
2274+
return self._to_arr(dtype=dtype, copy=copy)
22752275

22762276
def __repr__(self):
22772277
return self._to_str(representation=True)
@@ -2767,9 +2767,9 @@ class DataFrameGroupByData(BaseDataFrameData):
27672767
"chunks",
27682768
FieldTypes.reference(DataFrameGroupByChunkData),
27692769
on_serialize=lambda x: [it.data for it in x] if x is not None else x,
2770-
on_deserialize=lambda x: [DataFrameGroupByChunk(it) for it in x]
2771-
if x is not None
2772-
else x,
2770+
on_deserialize=lambda x: (
2771+
[DataFrameGroupByChunk(it) for it in x] if x is not None else x
2772+
),
27732773
)
27742774

27752775
@property
@@ -2816,9 +2816,9 @@ class SeriesGroupByData(BaseSeriesData):
28162816
"chunks",
28172817
FieldTypes.reference(SeriesGroupByChunkData),
28182818
on_serialize=lambda x: [it.data for it in x] if x is not None else x,
2819-
on_deserialize=lambda x: [SeriesGroupByChunk(it) for it in x]
2820-
if x is not None
2821-
else x,
2819+
on_deserialize=lambda x: (
2820+
[SeriesGroupByChunk(it) for it in x] if x is not None else x
2821+
),
28222822
)
28232823

28242824
@property
@@ -2995,9 +2995,9 @@ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
29952995
"chunks",
29962996
FieldTypes.reference(CategoricalChunkData),
29972997
on_serialize=lambda x: [it.data for it in x] if x is not None else x,
2998-
on_deserialize=lambda x: [CategoricalChunk(it) for it in x]
2999-
if x is not None
3000-
else x,
2998+
on_deserialize=lambda x: (
2999+
[CategoricalChunk(it) for it in x] if x is not None else x
3000+
),
30013001
)
30023002

30033003
def __init__(
@@ -3195,9 +3195,9 @@ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
31953195
"chunks",
31963196
FieldTypes.reference(DataFrameOrSeriesChunkData),
31973197
on_serialize=lambda x: [it.data for it in x] if x is not None else x,
3198-
on_deserialize=lambda x: [DataFrameOrSeriesChunk(it) for it in x]
3199-
if x is not None
3200-
else x,
3198+
on_deserialize=lambda x: (
3199+
[DataFrameOrSeriesChunk(it) for it in x] if x is not None else x
3200+
),
32013201
)
32023202

32033203
_data_type = StringField("data_type")

python/xorbits/_mars/dataframe/plotting/tests/test_plot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def assert_is_valid_plot_return_object(objs): # pragma: no cover
4444
import matplotlib.pyplot as plt
4545

4646
if isinstance(objs, (pd.Series, np.ndarray)):
47-
for el in objs.ravel():
47+
for el in objs.to_numpy():
4848
msg = (
4949
"one of 'objs' is not a matplotlib Axes instance, "
5050
f"type encountered {type(el).__name__}"

python/xorbits/_mars/dataframe/reduction/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ def _execute_map(cls, ctx, op):
566566
kwargs["skipna"] = op.skipna
567567
partial = getattr(in_data, getattr(cls, "_func_name"))(**kwargs)
568568
if op.skipna:
569-
partial.fillna(method="ffill", axis=op.axis, inplace=True)
569+
partial.ffill(axis=op.axis, inplace=True)
570570
ctx[op.outputs[0].key] = cls._get_last_slice(op, partial, -1)
571571

572572
@classmethod
@@ -583,7 +583,7 @@ def _execute_combine(cls, ctx, op):
583583
pd.concat(ref_datas, axis=op.axis), getattr(cls, "_func_name")
584584
)(**kwargs)
585585
if op.skipna:
586-
concat_df.fillna(method="ffill", axis=op.axis, inplace=True)
586+
concat_df.ffill(axis=op.axis, inplace=True)
587587

588588
in_data = ctx[op.inputs[0].key]
589589
concat_df = pd.concat(

python/xorbits/_mars/dataframe/statistics/quantile.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _calc_dtype_on_axis_1(self, a, dtypes):
106106
dt = tensor_quantile(
107107
tensor_from_series(a[name]),
108108
self._q,
109-
interpolation=self._interpolation,
109+
method=self._interpolation,
110110
handle_non_numeric=not self._numeric_only,
111111
).dtype
112112
quantile_dtypes.append(dt)
@@ -152,7 +152,7 @@ def _call_dataframe(self, a, inputs):
152152
dt = tensor_quantile(
153153
empty(a.shape[1], dtype=find_common_type(list(dtypes))),
154154
self._q,
155-
interpolation=self._interpolation,
155+
method=self._interpolation,
156156
handle_non_numeric=not self._numeric_only,
157157
).dtype
158158
return self.new_series(
@@ -173,7 +173,7 @@ def _call_dataframe(self, a, inputs):
173173
tensor_quantile(
174174
tensor_from_series(a[name]),
175175
self._q,
176-
interpolation=self._interpolation,
176+
method=self._interpolation,
177177
handle_non_numeric=not self._numeric_only,
178178
).dtype
179179
)
@@ -222,7 +222,7 @@ def _call_series(self, a, inputs):
222222
self._dtype = dtype = tensor_quantile(
223223
a_t,
224224
self._q,
225-
interpolation=self._interpolation,
225+
method=self._interpolation,
226226
handle_non_numeric=not self._numeric_only,
227227
).dtype
228228

@@ -266,7 +266,7 @@ def _tile_dataframe(cls, op):
266266
t = tensor_quantile(
267267
a,
268268
op.q,
269-
interpolation=op.interpolation,
269+
method=op.interpolation,
270270
handle_non_numeric=not op.numeric_only,
271271
)
272272
ts.append(t)
@@ -288,7 +288,7 @@ def _tile_dataframe(cls, op):
288288
t,
289289
op.q,
290290
axis=1,
291-
interpolation=op.interpolation,
291+
method=op.interpolation,
292292
handle_non_numeric=not op.numeric_only,
293293
)
294294
r = series_from_tensor(tr, index=op.input.index, name=tr.op.q.item())
@@ -301,7 +301,7 @@ def _tile_dataframe(cls, op):
301301
t = tensor_quantile(
302302
a,
303303
op.q,
304-
interpolation=op.interpolation,
304+
method=op.interpolation,
305305
handle_non_numeric=not op.numeric_only,
306306
)
307307
d[name] = t
@@ -315,7 +315,7 @@ def _tile_dataframe(cls, op):
315315
t,
316316
op.q,
317317
axis=1,
318-
interpolation=op.interpolation,
318+
method=op.interpolation,
319319
handle_non_numeric=not op.numeric_only,
320320
)
321321
if not op.input.index_value.has_value():
@@ -333,7 +333,7 @@ def _tile_series(cls, op):
333333
t = tensor_quantile(
334334
a,
335335
op.q,
336-
interpolation=op.interpolation,
336+
method=op.interpolation,
337337
handle_non_numeric=not op.numeric_only,
338338
)
339339
if isinstance(op.outputs[0], TENSOR_TYPE):

python/xorbits/_mars/lib/groupby_wrapper.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ def __init__(
4040
keys=None,
4141
axis=0,
4242
level=None,
43-
grouper=None,
4443
exclusions=None,
4544
selection=None,
4645
as_index=True,
@@ -85,7 +84,6 @@ def _is_frame_groupby(data: Any) -> bool:
8584
keys=keys,
8685
axis=axis,
8786
level=level,
88-
grouper=grouper,
8987
exclusions=exclusions,
9088
as_index=as_index,
9189
group_keys=group_keys,
@@ -115,7 +113,6 @@ def __getitem__(self, item):
115113
keys=self.keys,
116114
axis=self.axis,
117115
level=self.level,
118-
grouper=self.groupby_obj.grouper,
119116
exclusions=self.exclusions,
120117
selection=item,
121118
as_index=self.as_index,

python/xorbits/_mars/tensor/statistics/percentile.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def percentile(
2828
axis=None,
2929
out=None,
3030
overwrite_input=False,
31-
interpolation="linear",
31+
method="linear",
3232
keepdims=False,
3333
):
3434
"""
@@ -53,8 +53,8 @@ def percentile(
5353
but the type (of the output) will be cast if necessary.
5454
overwrite_input : bool, optional
5555
Just for compatibility with Numpy, would not take effect.
56-
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
57-
This optional parameter specifies the interpolation method to
56+
method : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
57+
This optional parameter specifies the method method to
5858
use when the desired percentile lies between two data points
5959
``i < j``:
6060
@@ -94,7 +94,7 @@ def percentile(
9494
Given a vector ``V`` of length ``N``, the q-th percentile of
9595
``V`` is the value ``q/100`` of the way from the minimum to the
9696
maximum in a sorted copy of ``V``. The values and distances of
97-
the two nearest neighbors as well as the `interpolation` parameter
97+
the two nearest neighbors as well as the `method` parameter
9898
will determine the percentile if the normalized ranking does not
9999
match the location of ``q`` exactly. This function is the same as
100100
the median if ``q=50``, the same as the minimum if ``q=0`` and the
@@ -124,7 +124,7 @@ def percentile(
124124
>>> m.execute()
125125
array([6.5, 4.5, 2.5])
126126
127-
The different types of interpolation can be visualized graphically:
127+
The different types of method can be visualized graphically:
128128
129129
.. plot::
130130
@@ -142,10 +142,10 @@ def percentile(
142142
('nearest', '-.'),
143143
('midpoint', '-.'),
144144
]
145-
for interpolation, style in lines:
145+
for method, style in lines:
146146
ax.plot(
147-
np.asarray(p), np.asarray(mt.percentile(a, p, interpolation=interpolation)),
148-
label=interpolation, linestyle=style)
147+
np.asarray(p), np.asarray(mt.percentile(a, p, method=method)),
148+
label=method, linestyle=style)
149149
ax.set(
150150
title='Interpolation methods for list: ' + str(a),
151151
xlabel='Percentile',
@@ -170,7 +170,7 @@ def percentile(
170170
axis=axis,
171171
out=out,
172172
overwrite_input=overwrite_input,
173-
interpolation=interpolation,
173+
method=method,
174174
keepdims=keepdims,
175175
q_error_msg=q_error_msg,
176176
)

0 commit comments

Comments
 (0)