@@ -490,8 +490,6 @@ def hist_frame(
490
490
"""
491
491
492
492
493
- @Substitution (data = "data : DataFrame\n The data to visualize.\n " , backend = "" )
494
- @Appender (_boxplot_doc )
495
493
def boxplot (
496
494
data : DataFrame ,
497
495
column : str | list [str ] | None = None ,
@@ -505,6 +503,171 @@ def boxplot(
505
503
return_type : str | None = None ,
506
504
** kwargs ,
507
505
):
506
+ """
507
+ Make a box plot from DataFrame columns.
508
+
509
+ Make a box-and-whisker plot from DataFrame columns, optionally grouped
510
+ by some other columns. A box plot is a method for graphically depicting
511
+ groups of numerical data through their quartiles.
512
+ The box extends from the Q1 to Q3 quartile values of the data,
513
+ with a line at the median (Q2). The whiskers extend from the edges
514
+ of box to show the range of the data. By default, they extend no more than
515
+ `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest
516
+ data point within that interval. Outliers are plotted as separate dots.
517
+
518
+ For further details see
519
+ Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
520
+
521
+ Parameters
522
+ ----------
523
+ data : DataFrame
524
+ The data to visualize.
525
+ column : str or list of str, optional
526
+ Column name or list of names, or vector.
527
+ Can be any valid input to :meth:`pandas.DataFrame.groupby`.
528
+ by : str or array-like, optional
529
+ Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
530
+ One box-plot will be done per value of columns in `by`.
531
+ ax : object of class matplotlib.axes.Axes, optional
532
+ The matplotlib axes to be used by boxplot.
533
+ fontsize : float or str
534
+ Tick label font size in points or as a string (e.g., `large`).
535
+ rot : float, default 0
536
+ The rotation angle of labels (in degrees)
537
+ with respect to the screen coordinate system.
538
+ grid : bool, default True
539
+ Setting this to True will show the grid.
540
+ figsize : A tuple (width, height) in inches
541
+ The size of the figure to create in matplotlib.
542
+ layout : tuple (rows, columns), optional
543
+ For example, (3, 5) will display the subplots
544
+ using 3 rows and 5 columns, starting from the top-left.
545
+ return_type : {'axes', 'dict', 'both'} or None, default 'axes'
546
+ The kind of object to return. The default is ``axes``.
547
+
548
+ * 'axes' returns the matplotlib axes the boxplot is drawn on.
549
+ * 'dict' returns a dictionary whose values are the matplotlib
550
+ Lines of the boxplot.
551
+ * 'both' returns a namedtuple with the axes and dict.
552
+ * when grouping with ``by``, a Series mapping columns to
553
+ ``return_type`` is returned.
554
+
555
+ If ``return_type`` is `None`, a NumPy array
556
+ of axes with the same shape as ``layout`` is returned.
557
+
558
+ **kwargs
559
+ All other plotting keyword arguments to be passed to
560
+ :func:`matplotlib.pyplot.boxplot`.
561
+
562
+ Returns
563
+ -------
564
+ result
565
+ See Notes.
566
+
567
+ See Also
568
+ --------
569
+ Series.plot.hist: Make a histogram.
570
+ matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
571
+
572
+ Notes
573
+ -----
574
+ The return type depends on the `return_type` parameter:
575
+
576
+ * 'axes' : object of class matplotlib.axes.Axes
577
+ * 'dict' : dict of matplotlib.lines.Line2D objects
578
+ * 'both' : a namedtuple with structure (ax, lines)
579
+
580
+ For data grouped with ``by``, return a Series of the above or a numpy
581
+ array:
582
+
583
+ * :class:`~pandas.Series`
584
+ * :class:`~numpy.array` (for ``return_type = None``)
585
+
586
+ Use ``return_type='dict'`` when you want to tweak the appearance
587
+ of the lines after plotting. In this case a dict containing the Lines
588
+ making up the boxes, caps, fliers, medians, and whiskers is returned.
589
+
590
+ Examples
591
+ --------
592
+
593
+ Boxplots can be created for every column in the dataframe
594
+ by ``df.boxplot()`` or indicating the columns to be used:
595
+
596
+ .. plot::
597
+ :context: close-figs
598
+
599
+ >>> np.random.seed(1234)
600
+ >>> df = pd.DataFrame(np.random.randn(10, 4),
601
+ ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
602
+ >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP
603
+
604
+ Boxplots of variables distributions grouped by the values of a third
605
+ variable can be created using the option ``by``. For instance:
606
+
607
+ .. plot::
608
+ :context: close-figs
609
+
610
+ >>> df = pd.DataFrame(np.random.randn(10, 2),
611
+ ... columns=['Col1', 'Col2'])
612
+ >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
613
+ ... 'B', 'B', 'B', 'B', 'B'])
614
+ >>> boxplot = df.boxplot(by='X')
615
+
616
+ A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
617
+ in order to group the data by combination of the variables in the x-axis:
618
+
619
+ .. plot::
620
+ :context: close-figs
621
+
622
+ >>> df = pd.DataFrame(np.random.randn(10, 3),
623
+ ... columns=['Col1', 'Col2', 'Col3'])
624
+ >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
625
+ ... 'B', 'B', 'B', 'B', 'B'])
626
+ >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
627
+ ... 'B', 'A', 'B', 'A', 'B'])
628
+ >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
629
+
630
+ The layout of boxplot can be adjusted giving a tuple to ``layout``:
631
+
632
+ .. plot::
633
+ :context: close-figs
634
+
635
+ >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
636
+ ... layout=(2, 1))
637
+
638
+ Additional formatting can be done to the boxplot, like suppressing the grid
639
+ (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
640
+ or changing the fontsize (i.e. ``fontsize=15``):
641
+
642
+ .. plot::
643
+ :context: close-figs
644
+
645
+ >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP
646
+
647
+ The parameter ``return_type`` can be used to select the type of element
648
+ returned by `boxplot`. When ``return_type='axes'`` is selected,
649
+ the matplotlib axes on which the boxplot is drawn are returned:
650
+
651
+ >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
652
+ >>> type(boxplot)
653
+ <class 'matplotlib.axes._axes.Axes'>
654
+
655
+ When grouping with ``by``, a Series mapping columns to ``return_type``
656
+ is returned:
657
+
658
+ >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
659
+ ... return_type='axes')
660
+ >>> type(boxplot)
661
+ <class 'pandas.Series'>
662
+
663
+ If ``return_type`` is `None`, a NumPy array of axes with the same shape
664
+ as ``layout`` is returned:
665
+
666
+ >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
667
+ ... return_type=None)
668
+ >>> type(boxplot)
669
+ <class 'numpy.ndarray'>
670
+ """
508
671
plot_backend = _get_plot_backend ("matplotlib" )
509
672
return plot_backend .boxplot (
510
673
data ,
0 commit comments