Skip to content

Commit db6a83f

Browse files
authored
Merge branch 'main' into cooolheater-issue59965
2 parents ee6f45c + c52846f commit db6a83f

File tree

26 files changed

+242
-88
lines changed

26 files changed

+242
-88
lines changed

.circleci/config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ jobs:
3434
fi
3535
python -m pip install --no-build-isolation -ve . -Csetup-args="--werror"
3636
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
37-
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
3837
ci/run_tests.sh
3938
test-linux-musl:
4039
docker:

ci/code_checks.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8484
-i "pandas.arrays.IntervalArray.length SA01" \
8585
-i "pandas.arrays.NumpyExtensionArray SA01" \
8686
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
87-
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
8887
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
8988
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
9089
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
@@ -95,9 +94,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9594
-i "pandas.core.resample.Resampler.std SA01" \
9695
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
9796
-i "pandas.core.resample.Resampler.var SA01" \
98-
-i "pandas.errors.NullFrequencyError SA01" \
99-
-i "pandas.errors.NumbaUtilError SA01" \
100-
-i "pandas.errors.PerformanceWarning SA01" \
10197
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
10298
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
10399
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \

doc/source/reference/frame.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ Reindexing / selection / label manipulation
185185
DataFrame.duplicated
186186
DataFrame.equals
187187
DataFrame.filter
188-
DataFrame.head
189188
DataFrame.idxmax
190189
DataFrame.idxmin
191190
DataFrame.reindex
@@ -196,7 +195,6 @@ Reindexing / selection / label manipulation
196195
DataFrame.sample
197196
DataFrame.set_axis
198197
DataFrame.set_index
199-
DataFrame.tail
200198
DataFrame.take
201199
DataFrame.truncate
202200

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ Datetimelike
627627
- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
628628
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
629629
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
630+
- Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`)
630631
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
631632
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
632633
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
@@ -799,6 +800,7 @@ Other
799800
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
800801
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
801802
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
803+
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
802804
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
803805

804806
.. ***DO NOT USE THIS SECTION***

pandas/core/arrays/arrow/accessors.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@ def len(self) -> Series:
117117

118118
value_lengths = pc.list_value_length(self._pa_array)
119119
return Series(
120-
value_lengths, dtype=ArrowDtype(value_lengths.type), index=self._data.index
120+
value_lengths,
121+
dtype=ArrowDtype(value_lengths.type),
122+
index=self._data.index,
123+
name=self._data.name,
121124
)
122125

123126
def __getitem__(self, key: int | slice) -> Series:
@@ -162,7 +165,10 @@ def __getitem__(self, key: int | slice) -> Series:
162165
# key = pc.add(key, pc.list_value_length(self._pa_array))
163166
element = pc.list_element(self._pa_array, key)
164167
return Series(
165-
element, dtype=ArrowDtype(element.type), index=self._data.index
168+
element,
169+
dtype=ArrowDtype(element.type),
170+
index=self._data.index,
171+
name=self._data.name,
166172
)
167173
elif isinstance(key, slice):
168174
if pa_version_under11p0:
@@ -181,7 +187,12 @@ def __getitem__(self, key: int | slice) -> Series:
181187
if step is None:
182188
step = 1
183189
sliced = pc.list_slice(self._pa_array, start, stop, step)
184-
return Series(sliced, dtype=ArrowDtype(sliced.type), index=self._data.index)
190+
return Series(
191+
sliced,
192+
dtype=ArrowDtype(sliced.type),
193+
index=self._data.index,
194+
name=self._data.name,
195+
)
185196
else:
186197
raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
187198

@@ -223,7 +234,12 @@ def flatten(self) -> Series:
223234
counts = pa.compute.list_value_length(self._pa_array)
224235
flattened = pa.compute.list_flatten(self._pa_array)
225236
index = self._data.index.repeat(counts.fill_null(pa.scalar(0, counts.type)))
226-
return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
237+
return Series(
238+
flattened,
239+
dtype=ArrowDtype(flattened.type),
240+
index=index,
241+
name=self._data.name,
242+
)
227243

228244

229245
class StructAccessor(ArrowAccessor):

pandas/core/computation/expressions.py

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -65,23 +65,23 @@ def set_numexpr_threads(n=None) -> None:
6565
ne.set_num_threads(n)
6666

6767

68-
def _evaluate_standard(op, op_str, a, b):
68+
def _evaluate_standard(op, op_str, left_op, right_op):
6969
"""
7070
Standard evaluation.
7171
"""
7272
if _TEST_MODE:
7373
_store_test_result(False)
74-
return op(a, b)
74+
return op(left_op, right_op)
7575

7676

77-
def _can_use_numexpr(op, op_str, a, b, dtype_check) -> bool:
78-
"""return a boolean if we WILL be using numexpr"""
77+
def _can_use_numexpr(op, op_str, left_op, right_op, dtype_check) -> bool:
78+
"""return left_op boolean if we WILL be using numexpr"""
7979
if op_str is not None:
8080
# required min elements (otherwise we are adding overhead)
81-
if a.size > _MIN_ELEMENTS:
81+
if left_op.size > _MIN_ELEMENTS:
8282
# check for dtype compatibility
8383
dtypes: set[str] = set()
84-
for o in [a, b]:
84+
for o in [left_op, right_op]:
8585
# ndarray and Series Case
8686
if hasattr(o, "dtype"):
8787
dtypes |= {o.dtype.name}
@@ -93,43 +93,43 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check) -> bool:
9393
return False
9494

9595

96-
def _evaluate_numexpr(op, op_str, a, b):
96+
def _evaluate_numexpr(op, op_str, left_op, right_op):
9797
result = None
9898

99-
if _can_use_numexpr(op, op_str, a, b, "evaluate"):
99+
if _can_use_numexpr(op, op_str, left_op, right_op, "evaluate"):
100100
is_reversed = op.__name__.strip("_").startswith("r")
101101
if is_reversed:
102102
# we were originally called by a reversed op method
103-
a, b = b, a
103+
left_op, right_op = right_op, left_op
104104

105-
a_value = a
106-
b_value = b
105+
left_value = left_op
106+
right_value = right_op
107107

108108
try:
109109
result = ne.evaluate(
110-
f"a_value {op_str} b_value",
111-
local_dict={"a_value": a_value, "b_value": b_value},
110+
f"left_value {op_str} right_value",
111+
local_dict={"left_value": left_value, "right_value": right_value},
112112
casting="safe",
113113
)
114114
except TypeError:
115115
# numexpr raises eg for array ** array with integers
116116
# (https://github.com/pydata/numexpr/issues/379)
117117
pass
118118
except NotImplementedError:
119-
if _bool_arith_fallback(op_str, a, b):
119+
if _bool_arith_fallback(op_str, left_op, right_op):
120120
pass
121121
else:
122122
raise
123123

124124
if is_reversed:
125125
# reverse order to original for fallback
126-
a, b = b, a
126+
left_op, right_op = right_op, left_op
127127

128128
if _TEST_MODE:
129129
_store_test_result(result is not None)
130130

131131
if result is None:
132-
result = _evaluate_standard(op, op_str, a, b)
132+
result = _evaluate_standard(op, op_str, left_op, right_op)
133133

134134
return result
135135

@@ -170,24 +170,24 @@ def _evaluate_numexpr(op, op_str, a, b):
170170
}
171171

172172

173-
def _where_standard(cond, a, b):
173+
def _where_standard(cond, left_op, right_op):
174174
# Caller is responsible for extracting ndarray if necessary
175-
return np.where(cond, a, b)
175+
return np.where(cond, left_op, right_op)
176176

177177

178-
def _where_numexpr(cond, a, b):
178+
def _where_numexpr(cond, left_op, right_op):
179179
# Caller is responsible for extracting ndarray if necessary
180180
result = None
181181

182-
if _can_use_numexpr(None, "where", a, b, "where"):
182+
if _can_use_numexpr(None, "where", left_op, right_op, "where"):
183183
result = ne.evaluate(
184184
"where(cond_value, a_value, b_value)",
185-
local_dict={"cond_value": cond, "a_value": a, "b_value": b},
185+
local_dict={"cond_value": cond, "a_value": left_op, "b_value": right_op},
186186
casting="safe",
187187
)
188188

189189
if result is None:
190-
result = _where_standard(cond, a, b)
190+
result = _where_standard(cond, left_op, right_op)
191191

192192
return result
193193

@@ -206,13 +206,13 @@ def _has_bool_dtype(x):
206206
_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"}
207207

208208

209-
def _bool_arith_fallback(op_str, a, b) -> bool:
209+
def _bool_arith_fallback(op_str, left_op, right_op) -> bool:
210210
"""
211211
Check if we should fallback to the python `_evaluate_standard` in case
212212
of an unsupported operation by numexpr, which is the case for some
213213
boolean ops.
214214
"""
215-
if _has_bool_dtype(a) and _has_bool_dtype(b):
215+
if _has_bool_dtype(left_op) and _has_bool_dtype(right_op):
216216
if op_str in _BOOL_OP_UNSUPPORTED:
217217
warnings.warn(
218218
f"evaluating in Python space because the {op_str!r} "
@@ -224,40 +224,43 @@ def _bool_arith_fallback(op_str, a, b) -> bool:
224224
return False
225225

226226

227-
def evaluate(op, a, b, use_numexpr: bool = True):
227+
def evaluate(op, left_op, right_op, use_numexpr: bool = True):
228228
"""
229-
Evaluate and return the expression of the op on a and b.
229+
Evaluate and return the expression of the op on left_op and right_op.
230230
231231
Parameters
232232
----------
233233
op : the actual operand
234-
a : left operand
235-
b : right operand
234+
left_op : left operand
235+
right_op : right operand
236236
use_numexpr : bool, default True
237237
Whether to try to use numexpr.
238238
"""
239239
op_str = _op_str_mapping[op]
240240
if op_str is not None:
241241
if use_numexpr:
242242
# error: "None" not callable
243-
return _evaluate(op, op_str, a, b) # type: ignore[misc]
244-
return _evaluate_standard(op, op_str, a, b)
243+
return _evaluate(op, op_str, left_op, right_op) # type: ignore[misc]
244+
return _evaluate_standard(op, op_str, left_op, right_op)
245245

246246

247-
def where(cond, a, b, use_numexpr: bool = True):
247+
def where(cond, left_op, right_op, use_numexpr: bool = True):
248248
"""
249-
Evaluate the where condition cond on a and b.
249+
Evaluate the where condition cond on left_op and right_op.
250250
251251
Parameters
252252
----------
253253
cond : np.ndarray[bool]
254-
a : return if cond is True
255-
b : return if cond is False
254+
left_op : return if cond is True
255+
right_op : return if cond is False
256256
use_numexpr : bool, default True
257257
Whether to try to use numexpr.
258258
"""
259259
assert _where is not None
260-
return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b)
260+
if use_numexpr:
261+
return _where(cond, left_op, right_op)
262+
else:
263+
return _where_standard(cond, left_op, right_op)
261264

262265

263266
def set_test_mode(v: bool = True) -> None:

pandas/core/computation/pytables.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def generate(self, v) -> str:
205205
val = v.tostring(self.encoding)
206206
return f"({self.lhs} {self.op} {val})"
207207

208-
def convert_value(self, v) -> TermValue:
208+
def convert_value(self, conv_val) -> TermValue:
209209
"""
210210
convert the expression that is in the term to something that is
211211
accepted by pytables
@@ -219,44 +219,44 @@ def stringify(value):
219219
kind = ensure_decoded(self.kind)
220220
meta = ensure_decoded(self.meta)
221221
if kind == "datetime" or (kind and kind.startswith("datetime64")):
222-
if isinstance(v, (int, float)):
223-
v = stringify(v)
224-
v = ensure_decoded(v)
225-
v = Timestamp(v).as_unit("ns")
226-
if v.tz is not None:
227-
v = v.tz_convert("UTC")
228-
return TermValue(v, v._value, kind)
222+
if isinstance(conv_val, (int, float)):
223+
conv_val = stringify(conv_val)
224+
conv_val = ensure_decoded(conv_val)
225+
conv_val = Timestamp(conv_val).as_unit("ns")
226+
if conv_val.tz is not None:
227+
conv_val = conv_val.tz_convert("UTC")
228+
return TermValue(conv_val, conv_val._value, kind)
229229
elif kind in ("timedelta64", "timedelta"):
230-
if isinstance(v, str):
231-
v = Timedelta(v)
230+
if isinstance(conv_val, str):
231+
conv_val = Timedelta(conv_val)
232232
else:
233-
v = Timedelta(v, unit="s")
234-
v = v.as_unit("ns")._value
235-
return TermValue(int(v), v, kind)
233+
conv_val = Timedelta(conv_val, unit="s")
234+
conv_val = conv_val.as_unit("ns")._value
235+
return TermValue(int(conv_val), conv_val, kind)
236236
elif meta == "category":
237237
metadata = extract_array(self.metadata, extract_numpy=True)
238238
result: npt.NDArray[np.intp] | np.intp | int
239-
if v not in metadata:
239+
if conv_val not in metadata:
240240
result = -1
241241
else:
242-
result = metadata.searchsorted(v, side="left")
242+
result = metadata.searchsorted(conv_val, side="left")
243243
return TermValue(result, result, "integer")
244244
elif kind == "integer":
245245
try:
246-
v_dec = Decimal(v)
246+
v_dec = Decimal(conv_val)
247247
except InvalidOperation:
248248
# GH 54186
249249
# convert v to float to raise float's ValueError
250-
float(v)
250+
float(conv_val)
251251
else:
252-
v = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN"))
253-
return TermValue(v, v, kind)
252+
conv_val = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN"))
253+
return TermValue(conv_val, conv_val, kind)
254254
elif kind == "float":
255-
v = float(v)
256-
return TermValue(v, v, kind)
255+
conv_val = float(conv_val)
256+
return TermValue(conv_val, conv_val, kind)
257257
elif kind == "bool":
258-
if isinstance(v, str):
259-
v = v.strip().lower() not in [
258+
if isinstance(conv_val, str):
259+
conv_val = conv_val.strip().lower() not in [
260260
"false",
261261
"f",
262262
"no",
@@ -268,13 +268,15 @@ def stringify(value):
268268
"",
269269
]
270270
else:
271-
v = bool(v)
272-
return TermValue(v, v, kind)
273-
elif isinstance(v, str):
271+
conv_val = bool(conv_val)
272+
return TermValue(conv_val, conv_val, kind)
273+
elif isinstance(conv_val, str):
274274
# string quoting
275-
return TermValue(v, stringify(v), "string")
275+
return TermValue(conv_val, stringify(conv_val), "string")
276276
else:
277-
raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column")
277+
raise TypeError(
278+
f"Cannot compare {conv_val} of type {type(conv_val)} to {kind} column"
279+
)
278280

279281
def convert_values(self) -> None:
280282
pass

0 commit comments

Comments
 (0)