Skip to content

Commit 101d4fa

Browse files
authored
Merge branch 'main' into changes
2 parents e3d1310 + 197e8db commit 101d4fa

File tree

10 files changed

+155
-13
lines changed

10 files changed

+155
-13
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
131131
-i "pandas.Timestamp.nanosecond GL08" \
132132
-i "pandas.Timestamp.resolution PR02" \
133133
-i "pandas.Timestamp.tzinfo GL08" \
134-
-i "pandas.Timestamp.value GL08" \
135134
-i "pandas.Timestamp.year GL08" \
136135
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
137-
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
138136
-i "pandas.api.types.is_bool PR01,SA01" \
139137
-i "pandas.api.types.is_categorical_dtype SA01" \
140138
-i "pandas.api.types.is_complex PR01,SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Other enhancements
5353
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
56+
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5657
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5758
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5859
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@ cdef class _Timestamp(ABCTimestamp):
240240

241241
@property
242242
def value(self) -> int:
243+
"""
244+
Return the value of the Timestamp.
245+
246+
Returns
247+
-------
248+
int
249+
The integer representation of the Timestamp object in nanoseconds
250+
since the Unix epoch (1970-01-01 00:00:00 UTC).
251+
252+
See Also
253+
--------
254+
Timestamp.second : Return the second of the Timestamp.
255+
Timestamp.minute : Return the minute of the Timestamp.
256+
257+
Examples
258+
--------
259+
>>> ts = pd.Timestamp("2024-08-31 16:16:30")
260+
>>> ts.value
261+
1725120990000000000
262+
"""
263+
243264
try:
244265
return convert_reso(self._value, self._creso, NPY_FR_ns, False)
245266
except OverflowError:
@@ -1020,8 +1041,8 @@ cdef class _Timestamp(ABCTimestamp):
10201041

10211042
See Also
10221043
--------
1023-
Timestamp.day : Return the day of the year.
1024-
Timestamp.year : Return the year of the week.
1044+
Timestamp.day : Return the day of the Timestamp.
1045+
Timestamp.year : Return the year of the Timestamp.
10251046

10261047
Examples
10271048
--------

pandas/_testing/asserters.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def assert_index_equal(
188188
check_order: bool = True,
189189
rtol: float = 1.0e-5,
190190
atol: float = 1.0e-8,
191-
obj: str = "Index",
191+
obj: str | None = None,
192192
) -> None:
193193
"""
194194
Check that left and right Index are equal.
@@ -217,7 +217,7 @@ def assert_index_equal(
217217
Relative tolerance. Only used when check_exact is False.
218218
atol : float, default 1e-8
219219
Absolute tolerance. Only used when check_exact is False.
220-
obj : str, default 'Index'
220+
obj : str, default 'Index' or 'MultiIndex'
221221
Specify object name being compared, internally used to show appropriate
222222
assertion message.
223223
@@ -235,6 +235,9 @@ def assert_index_equal(
235235
"""
236236
__tracebackhide__ = True
237237

238+
if obj is None:
239+
obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
240+
238241
def _check_types(left, right, obj: str = "Index") -> None:
239242
if not exact:
240243
return
@@ -283,7 +286,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
283286
right = cast(MultiIndex, right)
284287

285288
for level in range(left.nlevels):
286-
lobj = f"MultiIndex level [{level}]"
289+
lobj = f"{obj} level [{level}]"
287290
try:
288291
# try comparison on levels/codes to avoid densifying MultiIndex
289292
assert_index_equal(
@@ -314,7 +317,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
314317
obj=lobj,
315318
)
316319
# get_level_values may change dtype
317-
_check_types(left.levels[level], right.levels[level], obj=obj)
320+
_check_types(left.levels[level], right.levels[level], obj=lobj)
318321

319322
# skip exact index checking when `check_categorical` is False
320323
elif check_exact and check_categorical:
@@ -527,7 +530,7 @@ def assert_interval_array_equal(
527530
kwargs["check_freq"] = False
528531

529532
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
530-
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
533+
assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
531534

532535
assert_attr_equal("closed", left, right, obj=obj)
533536

pandas/core/interchange/from_dataframe.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame:
6060
Returns
6161
-------
6262
pd.DataFrame
63+
A pandas DataFrame built from the provided interchange
64+
protocol object.
65+
66+
See Also
67+
--------
68+
pd.DataFrame : DataFrame class which can be created from various input data
69+
formats, including objects that support the interchange protocol.
6370
6471
Examples
6572
--------

pandas/plotting/_matplotlib/core.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,22 @@ def _make_plot(self, fig: Figure) -> None:
13431343
label = self.label
13441344
else:
13451345
label = None
1346+
1347+
# if a list of non color strings is passed in as c, color points
1348+
# by uniqueness of the strings, such same strings get same color
1349+
create_colors = not self._are_valid_colors(c_values)
1350+
if create_colors:
1351+
color_mapping = self._get_color_mapping(c_values)
1352+
c_values = [color_mapping[s] for s in c_values]
1353+
1354+
# build legend for labeling custom colors
1355+
ax.legend(
1356+
handles=[
1357+
mpl.patches.Circle((0, 0), facecolor=c, label=s)
1358+
for s, c in color_mapping.items()
1359+
]
1360+
)
1361+
13461362
scatter = ax.scatter(
13471363
data[x].values,
13481364
data[y].values,
@@ -1353,6 +1369,7 @@ def _make_plot(self, fig: Figure) -> None:
13531369
s=self.s,
13541370
**self.kwds,
13551371
)
1372+
13561373
if cb:
13571374
cbar_label = c if c_is_column else ""
13581375
cbar = self._plot_colorbar(ax, fig=fig, label=cbar_label)
@@ -1392,6 +1409,30 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool):
13921409
c_values = c
13931410
return c_values
13941411

1412+
def _are_valid_colors(self, c_values: Series) -> bool:
1413+
# check if c_values contains strings and if these strings are valid mpl colors.
1414+
# no need to check numerics as these (and mpl colors) will be validated for us
1415+
# in .Axes.scatter._parse_scatter_color_args(...)
1416+
unique = np.unique(c_values)
1417+
try:
1418+
if len(c_values) and all(isinstance(c, str) for c in unique):
1419+
mpl.colors.to_rgba_array(unique)
1420+
1421+
return True
1422+
1423+
except (TypeError, ValueError) as _:
1424+
return False
1425+
1426+
def _get_color_mapping(self, c_values: Series) -> dict[str, np.ndarray]:
1427+
unique = np.unique(c_values)
1428+
n_colors = len(unique)
1429+
1430+
# passing `None` here will default to :rc:`image.cmap`
1431+
cmap = mpl.colormaps.get_cmap(self.colormap)
1432+
colors = cmap(np.linspace(0, 1, n_colors)) # RGB tuples
1433+
1434+
return dict(zip(unique, colors))
1435+
13951436
def _get_norm_and_cmap(self, c_values, color_by_categorical: bool):
13961437
c = self.c
13971438
if self.colormap is not None:

pandas/tests/plotting/frame/test_frame_color.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,53 @@ def test_scatter_with_c_column_name_with_colors(self, cmap):
217217
ax = df.plot.scatter(x=0, y=1, cmap=cmap, c="species")
218218
else:
219219
ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap)
220+
221+
assert len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == 3 # r/g/b
222+
assert (
223+
np.unique(ax.collections[0].get_facecolor(), axis=0)
224+
== np.array(
225+
[[0.0, 0.0, 1.0, 1.0], [0.0, 0.5, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0]]
226+
) # r/g/b
227+
).all()
220228
assert ax.collections[0].colorbar is None
221229

230+
def test_scatter_with_c_column_name_without_colors(self):
231+
# Given
232+
colors = ["NY", "MD", "MA", "CA"]
233+
color_count = 4 # 4 unique colors
234+
235+
# When
236+
df = DataFrame(
237+
{
238+
"dataX": range(100),
239+
"dataY": range(100),
240+
"color": (colors[i % len(colors)] for i in range(100)),
241+
}
242+
)
243+
244+
# Then
245+
ax = df.plot.scatter("dataX", "dataY", c="color")
246+
assert len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == color_count
247+
248+
# Given
249+
colors = ["r", "g", "not-a-color"]
250+
color_count = 3
251+
# Also, since not all are mpl-colors, points matching 'r' or 'g'
252+
# are not necessarily red or green
253+
254+
# When
255+
df = DataFrame(
256+
{
257+
"dataX": range(100),
258+
"dataY": range(100),
259+
"color": (colors[i % len(colors)] for i in range(100)),
260+
}
261+
)
262+
263+
# Then
264+
ax = df.plot.scatter("dataX", "dataY", c="color")
265+
assert len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == color_count
266+
222267
def test_scatter_colors(self):
223268
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
224269
with pytest.raises(TypeError, match="Specify exactly one of `c` and `color`"):
@@ -229,7 +274,14 @@ def test_scatter_colors_not_raising_warnings(self):
229274
# provided via 'c'. Parameters 'cmap' will be ignored
230275
df = DataFrame({"x": [1, 2, 3], "y": [1, 2, 3]})
231276
with tm.assert_produces_warning(None):
232-
df.plot.scatter(x="x", y="y", c="b")
277+
ax = df.plot.scatter(x="x", y="y", c="b")
278+
assert (
279+
len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == 1
280+
) # blue
281+
assert (
282+
np.unique(ax.collections[0].get_facecolor(), axis=0)
283+
== np.array([[0.0, 0.0, 1.0, 1.0]])
284+
).all() # blue
233285

234286
def test_scatter_colors_default(self):
235287
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def test_frame_equal_shape_mismatch(df1, df2, frame_or_series):
7979
DataFrame.from_records(
8080
{"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"]
8181
),
82-
"MultiIndex level \\[0\\] are different",
82+
"DataFrame\\.index level \\[0\\] are different",
8383
),
8484
],
8585
)

pandas/tests/util/test_assert_interval_array_equal.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import pytest
22

3-
from pandas import interval_range
3+
from pandas import (
4+
Interval,
5+
interval_range,
6+
)
47
import pandas._testing as tm
8+
from pandas.arrays import IntervalArray
59

610

711
@pytest.mark.parametrize(
@@ -79,3 +83,18 @@ def test_interval_array_equal_start_mismatch():
7983

8084
with pytest.raises(AssertionError, match=msg):
8185
tm.assert_interval_array_equal(arr1, arr2)
86+
87+
88+
def test_interval_array_equal_end_mismatch_only():
89+
arr1 = IntervalArray([Interval(0, 1), Interval(0, 5)])
90+
arr2 = IntervalArray([Interval(0, 1), Interval(0, 6)])
91+
92+
msg = """\
93+
IntervalArray.right are different
94+
95+
IntervalArray.right values are different \\(50.0 %\\)
96+
\\[left\\]: \\[1, 5\\]
97+
\\[right\\]: \\[1, 6\\]"""
98+
99+
with pytest.raises(AssertionError, match=msg):
100+
tm.assert_interval_array_equal(arr1, arr2)

pandas/tests/util/test_assert_series_equal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def test_less_precise(data1, data2, any_float_dtype, decimals):
137137
DataFrame.from_records(
138138
{"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"]
139139
).c,
140-
"MultiIndex level \\[0\\] are different",
140+
"Series\\.index level \\[0\\] are different",
141141
),
142142
],
143143
)

0 commit comments

Comments
 (0)