Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pandas/core/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ def preprocess_weights(obj: FrameOrSeries, weights, axis: int) -> np.ndarray:
if (weights < 0).any():
raise ValueError("weight vector many not include negative values")

weights[np.isnan(weights)] = 0
missing = np.isnan(weights)
if missing.any():
# Don't modify weights in place
weights = weights.copy()
weights[missing] = 0
return weights


Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/methods/test_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,24 @@ def test_sample_is_copy(self):
with tm.assert_produces_warning(None):
df2["d"] = 1

def test_sample_does_not_modify_weights(self):
# GH-42843
result = np.array([np.nan, 1, np.nan])
expected = result.copy()
ser = Series([1, 2, 3])

# Test numpy array weights won't be modified in place
ser.sample(weights=result)
tm.assert_numpy_array_equal(result, expected)

# Test DataFrame column won't be modified in place
df = DataFrame({"values": [1, 1, 1], "weights": [1, np.nan, np.nan]})
expected = df["weights"].copy()

df.sample(frac=1.0, replace=True, weights="weights")
result = df["weights"]
tm.assert_series_equal(result, expected)

def test_sample_ignore_index(self):
# GH 38581
df = DataFrame(
Expand Down