More concise terminology and description of the supported input data formats.

timhoffm · timhoffm · commit 9de9eca9f233 · 2025-01-22T17:06:00.000+01:00
diff --git a/doc/_embedded_plots/grouped_bar.py b/doc/_embedded_plots/grouped_bar.py
@@ -1,14 +1,14 @@
 import matplotlib.pyplot as plt
 
-group_labels = ['group A', 'group B']
+categories = ['A', 'B']
 data0 = [1.0, 3.0]
 data1 = [1.4, 3.4]
 data2 = [1.8, 3.8]
 
 fig, ax = plt.subplots(figsize=(4, 2.2))
 ax.grouped_bar(
     [data0, data1, data2],
-    tick_labels=group_labels,
+    tick_labels=categories,
     labels=['dataset 0', 'dataset 1', 'dataset 2'],
     colors=['#1f77b4', '#58a1cf', '#abd0e6'],
 )
diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py
@@ -2506,6 +2506,7 @@ def bar(self, x, height, width=0.8, bottom=None, *, align="center",
         See Also
         --------
         barh : Plot a horizontal bar plot.
+        grouped_bar : Plot multiple datasets as grouped bar plot.
 
         Notes
         -----
@@ -3073,17 +3074,27 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing
         Make a grouped bar plot.
 
         .. note::
-            This function is new in v3.10, and the API is still provisional.
+            This function is new in v3.11, and the API is still provisional.
             We may still fine-tune some aspects based on user-feedback.
 
         This is a convenience function to plot bar charts for multiple datasets
         into one Axes. In particular, it simplifies positioning of the bars
         compared to individual `~.Axes.bar` plots.
 
-        Terminology: A bar *group* is a set of bars drawn next to each other. They
-        can be associated with a group name, which is visualized as the tick label
-        below that group. A *dataset*  is a set of values, one for each bar group.
-        This means *dataset_0* will be rendered as the first bar in each bar group.
+        Bar plots present categorical data as a sequence of bars, one bar per category.
+        We call one set of such values a *dataset* and it's bars all share the same
+        color. Grouped bar plots show multiple such datasets, where the values per
+        category are grouped together. The category names are drawn as a tick labels
+        below the bar groups. Each dataset has a distinct bar color, and can optionally
+        get a label that is used for the legend.
+
+        Here is an example call structure and the corresponding plot:
+
+        .. code-block:: python
+
+           grouped_bar([dataset_1, dataset_2, dataset_3],
+                       tick_labels=['A', 'B'],
+                       labels=['dataset 1', 'dataset 2', 'dataset 3'])
 
         .. plot:: _embedded_plots/grouped_bar.py
 
@@ -3097,60 +3108,67 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing
 
               .. code-block:: none
 
-                  #           group_A group_B
-                  dataset_0 = [ds0_a, ds0_b]
-                  dataset_1 = [ds1_a, ds1_b]
-                  dataset_2 = [ds2_a, ds2_b]
-
-                  heights = [dataset_0, dataset_1, dataset_2]
+                  #           category_A,  category_B
+                  dataset_0 = [ds0_A, ds0_B]
+                  dataset_1 = [ds1_A, ds1_B]
+                  dataset_2 = [ds2_A, ds2_B]
 
               Example call::
 
                   grouped_bar([dataset_0, dataset_1, dataset_2])
 
-            - dict of array-like: A mapping names to datasets. Each dataset
-              (dict value) must have the same number of elements elements.
+            - dict of array-like: A mapping from names to datasets. Each dataset
+              (dict value) must have the same number of elements.
 
               This is similar to passing a list of array-like, with the addition that
               each dataset gets a name.
 
-              Example call::
+              Example call:
+
+              .. code-block:: python
 
                 grouped_bar({'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2]})
 
               The names are used as *labels*, i.e. the following two calls are
-              equivalent::
+              equivalent:
+
+              .. code-block:: python
 
                 data_dict = {'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2]}
                 grouped_bar(data_dict)
                 grouped_bar(data_dict.values(), labels=data_dict.keys())
 
               When using a dict-like input, you must not pass *labels* explicitly.
 
-            - a 2D array: The columns are the different datasets.
+            - a 2D array: The rows are the categories, the columns are the different
+              datasets.
 
               .. code-block:: none
 
-                          dataset_0 dataset_1 dataset_2
-                 group_A    ds0_a     ds1_a     ds2_a
-                 group_B    ds0_b     ds1_b     ds2_b
+                             dataset_0 dataset_1 dataset_2
+                 category_A    ds0_a     ds1_a     ds2_a
+                 category_B    ds0_b     ds1_b     ds2_b
+
+              Example call:
 
-              .. code-block::
+              .. code-block:: python
 
                   group_labels = ["group_A", "group_B"]
                   dataset_labels = ["dataset_0", "dataset_1", "dataset_2"]
                   array = np.random.random((2, 3))
 
               Note that this is consistent with pandas. These two calls produce
-              the same bar plot structure::
+              the same bar plot structure:
 
-                  grouped_bar(array, tick_labels=group_labels, labels=dataset_labels)
-                  df = pd.DataFrame(array, index=group_labels, columns=dataset_labels)
+              .. code-block:: python
+
+                  grouped_bar(array, tick_labels=categories, labels=dataset_labels)
+                  df = pd.DataFrame(array, index=categories, columns=dataset_labels)
                   df.plot.bar()
 
             - a `pandas.DataFrame`.
 
-              .. code-block::
+              .. code-block:: python
 
                   df = pd.DataFrame(
                       np.random.random((2, 3))
@@ -3159,15 +3177,15 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing
                   )
                   grouped_bar(df)
 
-              Note that ``grouped_bar(df)`` produced a structurally equivalent plot like
-              ``df.plot.bar()`.
+              Note that ``grouped_bar(df)`` produces a structurally equivalent plot like
+              ``df.plot.bar()``.
 
         positions : array-like, optional
             The center positions of the bar groups. The values have to be equidistant.
             If not given, a sequence of integer positions 0, 1, 2, ... is used.
 
         tick_labels: list of str, optional
-            The group labels, which are placed on ticks at the center *positions*
+            The category labels, which are placed on ticks at the center *positions*
             of the bar groups.
 
             If not set, the axis ticks (positions and labels) are left unchanged.
@@ -3202,7 +3220,7 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing
             _GroupedBarReturn
 
                 A provisional result object. This will be refined in the future.
-                For now, the API is limited to
+                For now, the guaranteed API on the returned object is limited to
 
                 - the attribute ``bar_containers``, which is a list of
                   `.BarContainer`, i.e. the results of the individual `~.Axes.bar`
@@ -3211,6 +3229,65 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing
                 - a ``remove()`` method, that remove all bars from the Axes.
                   See also `.Artist.remove()`.
 
+        See Also
+        --------
+        bar : A lower-level API for bar plots, with more degrees of freedom like
+              individal bar sizes and colors.
+
+        Notes
+        -----
+        For a better understanding, we compare the `~.Axes.grouped_bar` API with
+        those of `~.Axes.bar` and `~.Axes.boxplot`.
+
+        **Comparison to ``bar()``**
+
+        ``grouped_bar`` intentionally deviates from the `~.Axes.bar()` API in some
+        aspects. ``bar(x, y)`` is a lower-level API and places bars with height *y*
+        at explicit positions *x*. It also allows to specify individual bar widths
+        and colors. This kind of detailed control and flexibility is difficult to
+        manage and often not needed when plotting multiple datasets as grouped bar
+        plots. Therefore, ``grouped_bar`` focusses on the abstraction of bar plots
+        as visualization of categorical data.
+
+        The following examples may help to transfer from ``bar()`` to
+        ``grouped_bar()``.
+
+        Pasitions are deemphasized due to categories, and default to integer values.
+        If you have used ``range(N)`` as positions, you can leave that value out::
+
+           bar(range(N), heights)
+           grouped_bar([heights])
+
+        If needed, positions can be passed as keyword arguments::
+
+           bar(x, heights)
+           grouped_bar(heights, positions=x)
+
+        To place category labels in ``bar()`` you could use the argument
+        *tick_label* or use a list of cateogry names as *x*. ``grouped_bar()``
+        expects them in the argument *tick_labels*::
+
+           bar(range(N), heights, tick_label=["A", "B"])
+           bar(["A", "B"], heights)
+           grouped_bar([heights], tick_labels=["A", "B"])
+
+        Dataset labels that are shown in the legend are still passed via the
+        *label* parameter::
+
+           bar(..., label="dataset")
+           grouped_bar(..., label=["dataset"])
+
+        **Comparison to ``boxplot()``**
+
+        Both, `~.Axes.grouped_bar()` and `.Axes.boxplot()` visualize categorical
+        from multiple datasets. The basic API on *tick_labels* and *positions*
+        is the same, so that you can easily switch between plotting all
+        individual values as ``grouped_bar()`` or the statistical distribution
+        per category as ``boxplot()``::
+
+           grouped_bar(values, positions=..., tick_labels=...)
+           boxplot(values, positions=..., tick_labels=...)
+
         """
         if cbook._is_pandas_dataframe(heights):
             if labels is None: