Skip to content

Commit f765bfe

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ref/json/lessstate
2 parents 9d42978 + db13fb5 commit f765bfe

File tree

13 files changed

+74
-57
lines changed

13 files changed

+74
-57
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ Other API changes
230230
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
231231
- :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`)
232232
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
233+
- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)
233234
- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
234235
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
235236
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
@@ -558,6 +559,7 @@ I/O
558559
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
559560
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
560561
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
562+
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
561563
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
562564
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
563565
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
@@ -594,6 +596,7 @@ Reshaping
594596
^^^^^^^^^
595597
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
596598
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
599+
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
597600

598601
Sparse
599602
^^^^^^

pandas/_libs/src/datetime/pd_datetime.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,12 @@ static int pandas_datetime_exec(PyObject *Py_UNUSED(module)) {
245245
}
246246

247247
static PyModuleDef_Slot pandas_datetime_slots[] = {
248-
{Py_mod_exec, pandas_datetime_exec}, {0, NULL}};
248+
{Py_mod_exec, pandas_datetime_exec},
249+
#if PY_VERSION_HEX >= 0x030D0000
250+
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
251+
#endif
252+
{0, NULL},
253+
};
249254

250255
static struct PyModuleDef pandas_datetimemodule = {
251256
PyModuleDef_HEAD_INIT,

pandas/_libs/src/parser/pd_parser.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,12 @@ static int pandas_parser_exec(PyObject *Py_UNUSED(module)) {
161161
}
162162

163163
static PyModuleDef_Slot pandas_parser_slots[] = {
164-
{Py_mod_exec, pandas_parser_exec}, {0, NULL}};
164+
{Py_mod_exec, pandas_parser_exec},
165+
#if PY_VERSION_HEX >= 0x030D0000
166+
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
167+
#endif
168+
{0, NULL},
169+
};
165170

166171
static struct PyModuleDef pandas_parsermodule = {
167172
PyModuleDef_HEAD_INIT,

pandas/_libs/tslibs/dtypes.pyx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -453,10 +453,6 @@ class Resolution(Enum):
453453
"""
454454
cdef:
455455
str abbrev
456-
if freq in {"T", "t", "L", "l", "U", "u", "N", "n"}:
457-
raise ValueError(
458-
f"Frequency \'{freq}\' is no longer supported."
459-
)
460456
try:
461457
if freq in c_DEPR_ABBREVS:
462458
abbrev = c_DEPR_ABBREVS[freq]

pandas/_libs/tslibs/timedeltas.pyi

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ UnitChoices: TypeAlias = Literal[
3939
"minute",
4040
"min",
4141
"minutes",
42-
"T",
43-
"t",
4442
"s",
4543
"seconds",
4644
"sec",
@@ -50,21 +48,17 @@ UnitChoices: TypeAlias = Literal[
5048
"millisecond",
5149
"milli",
5250
"millis",
53-
"L",
54-
"l",
5551
"us",
5652
"microseconds",
5753
"microsecond",
5854
"µs",
5955
"micro",
6056
"micros",
61-
"u",
6257
"ns",
6358
"nanoseconds",
6459
"nano",
6560
"nanos",
6661
"nanosecond",
67-
"n",
6862
]
6963
_S = TypeVar("_S", bound=timedelta)
7064

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,11 +1818,6 @@ class Timedelta(_Timedelta):
18181818
* 'microseconds', 'microsecond', 'micros', 'micro', or 'us'
18191819
* 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'.
18201820
1821-
.. deprecated:: 2.2.0
1822-
1823-
Values `H`, `T`, `S`, `L`, `U`, and `N` are deprecated in favour
1824-
of the values `h`, `min`, `s`, `ms`, `us`, and `ns`.
1825-
18261821
.. deprecated:: 3.0.0
18271822
18281823
Allowing the values `w`, `d`, `MIN`, `MS`, `US` and `NS` to denote units

pandas/core/computation/eval.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,11 @@ def eval(
193193
corresponding bitwise operators. :class:`~pandas.Series` and
194194
:class:`~pandas.DataFrame` objects are supported and behave as they would
195195
with plain ol' Python evaluation.
196-
`eval` can run arbitrary code which can make you vulnerable to code
197-
injection if you pass user input to this function.
196+
197+
.. warning::
198+
199+
``eval`` can run arbitrary code which can make you vulnerable to code
200+
injection and untrusted data.
198201
199202
Parameters
200203
----------

pandas/core/reshape/reshape.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -288,21 +288,19 @@ def get_new_values(self, values, fill_value=None):
288288

289289
dtype = values.dtype
290290

291-
# if our mask is all True, then we can use our existing dtype
292-
if mask_all:
293-
dtype = values.dtype
294-
new_values = np.empty(result_shape, dtype=dtype)
295-
else:
296-
if isinstance(dtype, ExtensionDtype):
297-
# GH#41875
298-
# We are assuming that fill_value can be held by this dtype,
299-
# unlike the non-EA case that promotes.
300-
cls = dtype.construct_array_type()
301-
new_values = cls._empty(result_shape, dtype=dtype)
291+
if isinstance(dtype, ExtensionDtype):
292+
# GH#41875
293+
# We are assuming that fill_value can be held by this dtype,
294+
# unlike the non-EA case that promotes.
295+
cls = dtype.construct_array_type()
296+
new_values = cls._empty(result_shape, dtype=dtype)
297+
if not mask_all:
302298
new_values[:] = fill_value
303-
else:
299+
else:
300+
if not mask_all:
304301
dtype, fill_value = maybe_promote(dtype, fill_value)
305-
new_values = np.empty(result_shape, dtype=dtype)
302+
new_values = np.empty(result_shape, dtype=dtype)
303+
if not mask_all:
306304
new_values.fill(fill_value)
307305

308306
name = dtype.name

pandas/io/parsers/readers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,14 @@ def _read(
674674
# Extract some of the arguments (pass chunksize on).
675675
iterator = kwds.get("iterator", False)
676676
chunksize = kwds.get("chunksize", None)
677+
678+
# Check type of encoding_errors
679+
errors = kwds.get("encoding_errors", "strict")
680+
if not isinstance(errors, str):
681+
raise ValueError(
682+
f"encoding_errors must be a string, got {type(errors).__name__}"
683+
)
684+
677685
if kwds.get("engine") == "pyarrow":
678686
if iterator:
679687
raise ValueError(

pandas/tests/io/test_common.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def test_explicit_encoding(io_class, mode, msg):
555555
expected.to_csv(buffer, mode=f"w{mode}")
556556

557557

558-
@pytest.mark.parametrize("encoding_errors", [None, "strict", "replace"])
558+
@pytest.mark.parametrize("encoding_errors", ["strict", "replace"])
559559
@pytest.mark.parametrize("format", ["csv", "json"])
560560
def test_encoding_errors(encoding_errors, format):
561561
# GH39450
@@ -590,6 +590,17 @@ def test_encoding_errors(encoding_errors, format):
590590
tm.assert_frame_equal(df, expected)
591591

592592

593+
@pytest.mark.parametrize("encoding_errors", [0, None])
594+
def test_encoding_errors_badtype(encoding_errors):
595+
# GH 59075
596+
content = StringIO("A,B\n1,2\n3,4\n")
597+
reader = partial(pd.read_csv, encoding_errors=encoding_errors)
598+
expected_error = "encoding_errors must be a string, got "
599+
expected_error += f"{type(encoding_errors).__name__}"
600+
with pytest.raises(ValueError, match=expected_error):
601+
reader(content)
602+
603+
593604
def test_bad_encdoing_errors():
594605
# GH 39777
595606
with tm.ensure_clean() as path:

0 commit comments

Comments
 (0)