Skip to content

Commit 0d3d87c

Browse files
authored
Merge branch 'main' into bugfix-spss-kwargs
2 parents 1371a24 + 3c96b8f commit 0d3d87c

File tree

11 files changed

+42
-25
lines changed

11 files changed

+42
-25
lines changed

ci/code_checks.sh

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,8 @@ fi
6565
### DOCSTRINGS ###
6666
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
6767

68-
MSG='Validate docstrings (EX01, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
69-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
70-
RET=$(($RET + $?)) ; echo $MSG "DONE"
71-
72-
MSG='Partially validate docstrings (EX03)' ; echo $MSG
73-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX03 --ignore_functions \
74-
pandas.Series.plot.line \
75-
pandas.Series.to_sql \
76-
pandas.read_json \
77-
pandas.DataFrame.to_sql # There should be no backslash in the final line, please keep this comment in the last ignored function
68+
MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
69+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
7870
RET=$(($RET + $?)) ; echo $MSG "DONE"
7971

8072
MSG='Partially validate docstrings (PR02)' ; echo $MSG

doc/source/reference/frame.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ Conversion
4949
DataFrame.infer_objects
5050
DataFrame.copy
5151
DataFrame.bool
52+
DataFrame.to_numpy
5253

5354
Indexing, iteration
5455
~~~~~~~~~~~~~~~~~~~

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ Performance improvements
107107
- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
108108
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
109109
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
110+
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
110111
-
111112

112113
.. ---------------------------------------------------------------------------

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,9 @@ cdef class StringHashTable(HashTable):
933933
kh_destroy_str(self.table)
934934
self.table = NULL
935935

936+
def __len__(self) -> int:
937+
return self.table.size
938+
936939
def sizeof(self, deep: bool = False) -> int:
937940
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
938941
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)

pandas/_libs/index.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class UInt32Engine(IndexEngine): ...
5050
class UInt16Engine(IndexEngine): ...
5151
class UInt8Engine(IndexEngine): ...
5252
class ObjectEngine(IndexEngine): ...
53+
class StringEngine(IndexEngine): ...
5354
class DatetimeEngine(Int64Engine): ...
5455
class TimedeltaEngine(DatetimeEngine): ...
5556
class PeriodEngine(Int64Engine): ...

pandas/_libs/index.pyx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,17 @@ cdef class ObjectEngine(IndexEngine):
533533
return loc
534534

535535

536+
cdef class StringEngine(IndexEngine):
537+
538+
cdef _make_hash_table(self, Py_ssize_t n):
539+
return _hash.StringHashTable(n)
540+
541+
cdef _check_type(self, object val):
542+
if not isinstance(val, str):
543+
raise KeyError(val)
544+
return str(val)
545+
546+
536547
cdef class DatetimeEngine(Int64Engine):
537548

538549
cdef:

pandas/core/generic.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,7 +2999,7 @@ def to_sql(
29992999
3
30003000
>>> from sqlalchemy import text
30013001
>>> with engine.connect() as conn:
3002-
... conn.execute(text("SELECT * FROM users")).fetchall()
3002+
... conn.execute(text("SELECT * FROM users")).fetchall()
30033003
[(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
30043004
30053005
An `sqlalchemy.engine.Connection` can also be passed to `con`:
@@ -3016,7 +3016,7 @@ def to_sql(
30163016
>>> df2.to_sql(name='users', con=engine, if_exists='append')
30173017
2
30183018
>>> with engine.connect() as conn:
3019-
... conn.execute(text("SELECT * FROM users")).fetchall()
3019+
... conn.execute(text("SELECT * FROM users")).fetchall()
30203020
[(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),
30213021
(0, 'User 4'), (1, 'User 5'), (0, 'User 6'),
30223022
(1, 'User 7')]
@@ -3027,7 +3027,7 @@ def to_sql(
30273027
... index_label='id')
30283028
2
30293029
>>> with engine.connect() as conn:
3030-
... conn.execute(text("SELECT * FROM users")).fetchall()
3030+
... conn.execute(text("SELECT * FROM users")).fetchall()
30313031
[(0, 'User 6'), (1, 'User 7')]
30323032
30333033
Use ``method`` to define a callable insertion method to do nothing
@@ -3040,13 +3040,14 @@ def to_sql(
30403040
... stmt = insert(table.table).values(data).on_conflict_do_nothing(index_elements=["a"])
30413041
... result = conn.execute(stmt)
30423042
... return result.rowcount
3043-
>>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP
3043+
>>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", # noqa: F821
3044+
... method=insert_on_conflict_nothing) # doctest: +SKIP
30443045
0
30453046
30463047
For MySQL, a callable to update columns ``b`` and ``c`` if there's a conflict
30473048
on a primary key.
30483049
3049-
>>> from sqlalchemy.dialects.mysql import insert
3050+
>>> from sqlalchemy.dialects.mysql import insert # noqa: F811
30503051
>>> def insert_on_conflict_update(table, conn, keys, data_iter):
30513052
... # update columns "b" and "c" on primary key conflict
30523053
... data = [dict(zip(keys, row)) for row in data_iter]
@@ -3057,7 +3058,8 @@ def to_sql(
30573058
... stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c)
30583059
... result = conn.execute(stmt)
30593060
... return result.rowcount
3060-
>>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP
3061+
>>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", # noqa: F821
3062+
... method=insert_on_conflict_update) # doctest: +SKIP
30613063
2
30623064
30633065
Specify the dtype (especially useful for integers with missing values).
@@ -3078,7 +3080,7 @@ def to_sql(
30783080
3
30793081
30803082
>>> with engine.connect() as conn:
3081-
... conn.execute(text("SELECT * FROM integers")).fetchall()
3083+
... conn.execute(text("SELECT * FROM integers")).fetchall()
30823084
[(1,), (None,), (2,)]
30833085
""" # noqa: E501
30843086
from pandas.io import sql

pandas/core/indexes/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,8 @@ def _engine(
883883
# error: Item "ExtensionArray" of "Union[ExtensionArray,
884884
# ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]
885885
target_values = self._data._ndarray # type: ignore[union-attr]
886+
elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
887+
return libindex.StringEngine(target_values)
886888

887889
# error: Argument 1 to "ExtensionEngine" has incompatible type
888890
# "ndarray[Any, Any]"; expected "ExtensionArray"

pandas/io/json/_json.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ def read_json(
717717
"data":[["a","b"],["c","d"]]\
718718
}}\
719719
'
720-
>>> pd.read_json(StringIO(_), orient='split')
720+
>>> pd.read_json(StringIO(_), orient='split') # noqa: F821
721721
col 1 col 2
722722
row 1 a b
723723
row 2 c d
@@ -727,7 +727,7 @@ def read_json(
727727
>>> df.to_json(orient='index')
728728
'{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}'
729729
730-
>>> pd.read_json(StringIO(_), orient='index')
730+
>>> pd.read_json(StringIO(_), orient='index') # noqa: F821
731731
col 1 col 2
732732
row 1 a b
733733
row 2 c d
@@ -737,7 +737,7 @@ def read_json(
737737
738738
>>> df.to_json(orient='records')
739739
'[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]'
740-
>>> pd.read_json(StringIO(_), orient='records')
740+
>>> pd.read_json(StringIO(_), orient='records') # noqa: F821
741741
col 1 col 2
742742
0 a b
743743
1 c d

pandas/tests/io/xml/test_xml.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,16 +1361,18 @@ def test_stylesheet_with_etree(kml_cta_rail_lines, xsl_flatten_doc):
13611361

13621362
@pytest.mark.parametrize("val", ["", b""])
13631363
def test_empty_stylesheet(val):
1364-
pytest.importorskip("lxml")
1364+
lxml_etree = pytest.importorskip("lxml.etree")
1365+
13651366
msg = (
13661367
"Passing literal xml to 'read_xml' is deprecated and "
13671368
"will be removed in a future version. To read from a "
13681369
"literal string, wrap it in a 'StringIO' object."
13691370
)
13701371
kml = os.path.join("data", "xml", "cta_rail_lines.kml")
13711372

1372-
with pytest.raises(FutureWarning, match=msg):
1373-
read_xml(kml, stylesheet=val)
1373+
with pytest.raises(lxml_etree.XMLSyntaxError):
1374+
with tm.assert_produces_warning(FutureWarning, match=msg):
1375+
read_xml(kml, stylesheet=val)
13741376

13751377

13761378
# ITERPARSE

0 commit comments

Comments
 (0)