Skip to content

Commit 90d72c1

Browse files
committed
Fix CoW mode not to break groupby.
1 parent 1f758c2 commit 90d72c1

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed

python/pyspark/pandas/indexing.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,16 @@ def __setitem__(self, key: Any, value: Any) -> None:
587587
from pyspark.pandas.series import Series, first_series
588588

589589
if self._is_series:
590+
if LooseVersion(pd.__version__) >= "3.0.0":
591+
# pandas 3 CoW: mutating a Series view should not mutate the parent DataFrame.
592+
self._psdf_or_psser._update_anchor(
593+
DataFrame(
594+
self._psdf_or_psser._psdf._internal.select_column(
595+
self._psdf_or_psser._column_label
596+
)
597+
)
598+
)
599+
590600
if (
591601
isinstance(key, Series)
592602
and (isinstance(self, iLocIndexer) or not same_anchor(key, self._psdf_or_psser))
@@ -811,7 +821,11 @@ def __setitem__(self, key: Any, value: Any) -> None:
811821
internal = self._internal.with_new_columns(
812822
new_data_spark_columns, column_labels=column_labels, data_fields=new_fields
813823
)
814-
self._psdf_or_psser._update_internal_frame(internal, check_same_anchor=False)
824+
self._psdf_or_psser._update_internal_frame(
825+
internal,
826+
check_same_anchor=False,
827+
anchor_force_disconnect=LooseVersion(pd.__version__) >= "3.0.0",
828+
)
815829

816830

817831
class LocIndexer(LocIndexerLike):

python/pyspark/pandas/series.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -430,10 +430,7 @@ def __init__( # type: ignore[no-untyped-def]
430430
assert not copy
431431
assert fastpath is no_default
432432

433-
if LooseVersion(pd.__version__) < "3.0.0":
434-
self._anchor = data
435-
else:
436-
self._anchor = DataFrame(data)
433+
self._anchor = data
437434
self._col_label = index
438435

439436
elif isinstance(data, Series):

0 commit comments

Comments
 (0)