Skip to content

Commit ab5f337

Browse files
committed
Merge remote-tracking branch 'upstream/main' into split-arrow
2 parents cd802ac + a89f208 commit ab5f337

File tree

15 files changed

+90
-84
lines changed

15 files changed

+90
-84
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ repos:
6767
- id: fix-encoding-pragma
6868
args: [--remove]
6969
- id: trailing-whitespace
70+
args: [--markdown-linebreak-ext=md]
7071
- repo: https://github.com/PyCQA/isort
7172
rev: 5.13.2
7273
hooks:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ Other API changes
230230
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
231231
- :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`)
232232
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
233+
- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)
233234
- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
234235
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
235236
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
@@ -559,6 +560,7 @@ I/O
559560
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
560561
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
561562
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
563+
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
562564
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
563565
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
564566
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
@@ -599,7 +601,7 @@ Reshaping
599601
Sparse
600602
^^^^^^
601603
- Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
602-
-
604+
- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`)
603605

604606
ExtensionArray
605607
^^^^^^^^^^^^^^

pandas/_libs/tslibs/dtypes.pyx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -453,10 +453,6 @@ class Resolution(Enum):
453453
"""
454454
cdef:
455455
str abbrev
456-
if freq in {"T", "t", "L", "l", "U", "u", "N", "n"}:
457-
raise ValueError(
458-
f"Frequency \'{freq}\' is no longer supported."
459-
)
460456
try:
461457
if freq in c_DEPR_ABBREVS:
462458
abbrev = c_DEPR_ABBREVS[freq]

pandas/_libs/tslibs/timedeltas.pyi

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ UnitChoices: TypeAlias = Literal[
3939
"minute",
4040
"min",
4141
"minutes",
42-
"T",
43-
"t",
4442
"s",
4543
"seconds",
4644
"sec",
@@ -50,21 +48,17 @@ UnitChoices: TypeAlias = Literal[
5048
"millisecond",
5149
"milli",
5250
"millis",
53-
"L",
54-
"l",
5551
"us",
5652
"microseconds",
5753
"microsecond",
5854
"µs",
5955
"micro",
6056
"micros",
61-
"u",
6257
"ns",
6358
"nanoseconds",
6459
"nano",
6560
"nanos",
6661
"nanosecond",
67-
"n",
6862
]
6963
_S = TypeVar("_S", bound=timedelta)
7064

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,11 +1818,6 @@ class Timedelta(_Timedelta):
18181818
* 'microseconds', 'microsecond', 'micros', 'micro', or 'us'
18191819
* 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'.
18201820
1821-
.. deprecated:: 2.2.0
1822-
1823-
Values `H`, `T`, `S`, `L`, `U`, and `N` are deprecated in favour
1824-
of the values `h`, `min`, `s`, `ms`, `us`, and `ns`.
1825-
18261821
.. deprecated:: 3.0.0
18271822
18281823
Allowing the values `w`, `d`, `MIN`, `MS`, `US` and `NS` to denote units

pandas/core/arrays/sparse/accessor.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,12 +291,12 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
291291
Examples
292292
--------
293293
>>> import scipy.sparse
294-
>>> mat = scipy.sparse.eye(3, dtype=float)
294+
>>> mat = scipy.sparse.eye(3, dtype=int)
295295
>>> pd.DataFrame.sparse.from_spmatrix(mat)
296296
0 1 2
297-
0 1.0 0 0
298-
1 0 1.0 0
299-
2 0 0 1.0
297+
0 1 0 0
298+
1 0 1 0
299+
2 0 0 1
300300
"""
301301
from pandas._libs.sparse import IntIndex
302302

@@ -313,7 +313,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
313313
indices = data.indices
314314
indptr = data.indptr
315315
array_data = data.data
316-
dtype = SparseDtype(array_data.dtype, 0)
316+
dtype = SparseDtype(array_data.dtype)
317317
arrays = []
318318
for i in range(n_columns):
319319
sl = slice(indptr[i], indptr[i + 1])
@@ -393,8 +393,6 @@ def to_coo(self) -> spmatrix:
393393
cols, rows, data = [], [], []
394394
for col, (_, ser) in enumerate(self._parent.items()):
395395
sp_arr = ser.array
396-
if sp_arr.fill_value != 0:
397-
raise ValueError("fill value must be 0 when converting to COO matrix")
398396

399397
row = sp_arr.sp_index.indices
400398
cols.append(np.repeat(col, len(row)))

pandas/core/computation/eval.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,11 @@ def eval(
193193
corresponding bitwise operators. :class:`~pandas.Series` and
194194
:class:`~pandas.DataFrame` objects are supported and behave as they would
195195
with plain ol' Python evaluation.
196-
`eval` can run arbitrary code which can make you vulnerable to code
197-
injection if you pass user input to this function.
196+
197+
.. warning::
198+
199+
``eval`` can run arbitrary code which can make you vulnerable to code
200+
injection and untrusted data.
198201
199202
Parameters
200203
----------

pandas/core/dtypes/dtypes.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,7 +1666,7 @@ class SparseDtype(ExtensionDtype):
16661666
"""
16671667
Dtype for data stored in :class:`SparseArray`.
16681668
1669-
`SparseDtype` is used as the data type for :class:`SparseArray`, enabling
1669+
``SparseDtype`` is used as the data type for :class:`SparseArray`, enabling
16701670
more efficient storage of data that contains a significant number of
16711671
repetitive values typically represented by a fill value. It supports any
16721672
scalar dtype as the underlying data type of the non-fill values.
@@ -1677,19 +1677,20 @@ class SparseDtype(ExtensionDtype):
16771677
The dtype of the underlying array storing the non-fill value values.
16781678
fill_value : scalar, optional
16791679
The scalar value not stored in the SparseArray. By default, this
1680-
depends on `dtype`.
1680+
depends on ``dtype``.
16811681
16821682
=========== ==========
16831683
dtype na_value
16841684
=========== ==========
16851685
float ``np.nan``
1686+
complex ``np.nan``
16861687
int ``0``
16871688
bool ``False``
16881689
datetime64 ``pd.NaT``
16891690
timedelta64 ``pd.NaT``
16901691
=========== ==========
16911692
1692-
The default value may be overridden by specifying a `fill_value`.
1693+
The default value may be overridden by specifying a ``fill_value``.
16931694
16941695
Attributes
16951696
----------

pandas/core/dtypes/missing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
618618
nan
619619
>>> na_value_for_dtype(np.dtype("float64"))
620620
nan
621+
>>> na_value_for_dtype(np.dtype("complex128"))
622+
nan
621623
>>> na_value_for_dtype(np.dtype("bool"))
622624
False
623625
>>> na_value_for_dtype(np.dtype("datetime64[ns]"))
@@ -629,7 +631,7 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
629631
elif dtype.kind in "mM":
630632
unit = np.datetime_data(dtype)[0]
631633
return dtype.type("NaT", unit)
632-
elif dtype.kind == "f":
634+
elif dtype.kind in "fc":
633635
return np.nan
634636
elif dtype.kind in "iu":
635637
if compat:

pandas/io/parsers/readers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,14 @@ def _read(
674674
# Extract some of the arguments (pass chunksize on).
675675
iterator = kwds.get("iterator", False)
676676
chunksize = kwds.get("chunksize", None)
677+
678+
# Check type of encoding_errors
679+
errors = kwds.get("encoding_errors", "strict")
680+
if not isinstance(errors, str):
681+
raise ValueError(
682+
f"encoding_errors must be a string, got {type(errors).__name__}"
683+
)
684+
677685
if kwds.get("engine") == "pyarrow":
678686
if iterator:
679687
raise ValueError(

0 commit comments

Comments
 (0)