Skip to content

Commit 59df367

Browse files
iampelle0verhead
andauthored
Release 3.1.0 (#152)
* Added boxplot feature (#151) * Add method for boxplot dataframe * Add method for plotting boxplot * Change default order to ascending * Add check for categorical_order_by * Remove show() from boxplot method * Fix wrong horizontal boxplot * Correctly set numeric axis range * Remove _sort_categories method and its usages * Add tests for boxplot * Change to not deprecated syntax * Improve formatting * Add doc to boxplot method * Add doc for _compute_boxplot_df * Rename test_standard to test_default * Updated version and added boxplot example to examples notebook --------- Co-authored-by: Quoc Duong Bui <35042166+vanHekthor@users.noreply.github.com>
1 parent 7af764a commit 59df367

File tree

6 files changed

+678
-152
lines changed

6 files changed

+678
-152
lines changed

HISTORY.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
History
33
=======
44

5+
3.1.0 (2023-03-22)
6+
------------------
7+
8+
* Added Boxplot Chart including example in examples notebook
9+
510
3.0.5 (2022-12-13)
611
------------------
712

chartify/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
__author__ = """Chris Halpert"""
2424
__email__ = 'chalpert@spotify.com'
25-
__version__ = '3.0.5'
25+
__version__ = '3.1.0'
2626

2727
_IPYTHON_INSTANCE = False
2828

chartify/_core/plot.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,64 @@ def _construct_source(self,
11071107

11081108
return source, factors, stack_values
11091109

1110+
@staticmethod
1111+
def _compute_boxplot_df(data_frame, categorical_columns, numeric_column):
1112+
"""Computes the data frames for a boxplot.
1113+
1114+
Returns:
1115+
quantlies_and_bounds: data frame for the boxes and whiskers of a
1116+
boxplot
1117+
outliers: data frame with outliers
1118+
"""
1119+
# compute quantiles
1120+
q_frame = data_frame.groupby(categorical_columns)[
1121+
numeric_column].quantile([0.25, 0.5, 0.75])
1122+
q_frame = q_frame.unstack().reset_index()
1123+
q_frame.columns = categorical_columns + \
1124+
['q1', 'q2', 'q3']
1125+
df_with_quantiles = pd.merge(
1126+
data_frame, q_frame, on=categorical_columns, how="left")
1127+
1128+
# compute IQR outlier bounds
1129+
iqr = df_with_quantiles.q3 - df_with_quantiles.q1
1130+
df_with_quantiles['upper'] = df_with_quantiles.q3 + 1.5 * iqr
1131+
df_with_quantiles['lower'] = df_with_quantiles.q1 - 1.5 * iqr
1132+
1133+
# adjust outlier bounds to closest observations still within bounds
1134+
# for upper bound
1135+
le_upper = df_with_quantiles[df_with_quantiles[numeric_column].le(
1136+
df_with_quantiles.upper)]
1137+
group_max_le_upper = le_upper.groupby(
1138+
categorical_columns, as_index=False)[numeric_column].max()
1139+
group_max_le_upper.columns = categorical_columns + ['upper']
1140+
1141+
df_with_quantiles.drop('upper', axis=1, inplace=True)
1142+
df_with_quantiles = pd.merge(
1143+
df_with_quantiles,
1144+
group_max_le_upper,
1145+
on=categorical_columns,
1146+
how='left')
1147+
1148+
# for lower bound
1149+
ge_lower = df_with_quantiles[df_with_quantiles[numeric_column].ge(
1150+
df_with_quantiles.lower)]
1151+
group_min_ge_lower = ge_lower.groupby(
1152+
categorical_columns, as_index=False)[numeric_column].min()
1153+
group_min_ge_lower.columns = categorical_columns + ['lower']
1154+
df_with_quantiles.drop('lower', axis=1, inplace=True)
1155+
df_with_quantiles = pd.merge(df_with_quantiles,
1156+
group_min_ge_lower,
1157+
on=categorical_columns,
1158+
how='left')
1159+
1160+
quantiles_and_bounds = df_with_quantiles.groupby(categorical_columns)[[
1161+
'q1', 'q2', 'q3', 'lower', 'upper']].first().reset_index()
1162+
1163+
outliers = df_with_quantiles[~df_with_quantiles[numeric_column].between(
1164+
df_with_quantiles.lower, df_with_quantiles.upper)]
1165+
1166+
return quantiles_and_bounds, outliers
1167+
11101168
def text(self,
11111169
data_frame,
11121170
categorical_columns,
@@ -2057,3 +2115,188 @@ def scatter(self,
20572115
self._chart.style._apply_settings('legend')
20582116

20592117
return self._chart
2118+
2119+
def boxplot(self,
2120+
data_frame,
2121+
categorical_columns,
2122+
numeric_column,
2123+
color_column=None,
2124+
color_order=None,
2125+
categorical_order_by='labels',
2126+
categorical_order_ascending=True,
2127+
outlier_marker='circle',
2128+
outlier_color='black',
2129+
outlier_alpha=0.3,
2130+
outlier_size=15):
2131+
"""Box-and-whisker plot.
2132+
2133+
Note:
2134+
To change the orientation set x_axis_type or y_axis_type
2135+
argument of the Chart object.
2136+
2137+
Args:
2138+
data_frame (pandas.DataFrame): Data source for the plot.
2139+
categorical_columns (str or list): Column name to plot on
2140+
the categorical axis.
2141+
numeric_column (str): Column name to plot on the numerical axis.
2142+
color_column (str, optional): Column name to group by on
2143+
the color dimension.
2144+
color_order (list, optional):
2145+
List of values within the 'color_column' for
2146+
specific color sort.
2147+
categorical_order_by (str or array-like, optional):
2148+
Dimension for ordering the categorical axis. Default 'labels'.
2149+
- 'labels': Order categorical axis by the categorical labels.
2150+
- array-like object (list, tuple, np.array): New labels
2151+
to conform the categorical axis to.
2152+
categorical_order_ascending (bool, optional):
2153+
Sort order of the categorical axis. Default True.
2154+
outlier_marker (str, optional): Outlier marker type. Valid types:
2155+
'asterisk', 'circle', 'circle_cross', 'circle_x', 'cross',
2156+
'diamond', 'diamond_cross', 'hex', 'inverted_triangle',
2157+
'square', 'square_x', 'square_cross', 'triangle',
2158+
'x', '*', '+', 'o', 'ox', 'o+' Default 'circle'
2159+
outlier_color (str, optional): Color name or hex value.
2160+
See chartify.color_palettes.show() for available color names.
2161+
Default 'black'
2162+
outlier_alpha (float, optional): Alpha value. Default 0.3
2163+
outlier_size (float, optional): Size of outlier markers.
2164+
Default 15
2165+
"""
2166+
2167+
# check categorical_order_by value
2168+
order_length = getattr(categorical_order_by, "__len__", None)
2169+
is_string = isinstance(categorical_order_by, str)
2170+
if ((not is_string and order_length is None)
2171+
or (is_string and categorical_order_by != 'labels')):
2172+
raise ValueError("""Argument categorical_order_by must be 'labels',
2173+
or a list of values.""")
2174+
2175+
df_intervals_and_floating_bars, outliers = self._compute_boxplot_df(
2176+
data_frame, categorical_columns, numeric_column)
2177+
2178+
# upper and lower bound
2179+
self.interval(df_intervals_and_floating_bars,
2180+
categorical_columns,
2181+
'lower',
2182+
'upper',
2183+
categorical_order_by=categorical_order_by,
2184+
categorical_order_ascending=categorical_order_ascending)
2185+
2186+
# boxes for q1 to q2 and q2 to q3
2187+
vertical = self._chart.axes._vertical
2188+
2189+
source_low, _, _ = self._construct_source(
2190+
df_intervals_and_floating_bars,
2191+
categorical_columns,
2192+
['q1', 'q2'],
2193+
categorical_order_by=categorical_order_by,
2194+
categorical_order_ascending=categorical_order_ascending,
2195+
color_column=color_column)
2196+
2197+
source_high, factors, _ = self._construct_source(
2198+
df_intervals_and_floating_bars,
2199+
categorical_columns,
2200+
['q2', 'q3'],
2201+
categorical_order_by=categorical_order_by,
2202+
categorical_order_ascending=categorical_order_ascending,
2203+
color_column=color_column)
2204+
2205+
colors, _ = self._get_color_and_order(df_intervals_and_floating_bars,
2206+
color_column,
2207+
color_order,
2208+
categorical_columns)
2209+
2210+
if color_column is None:
2211+
colors = colors[0]
2212+
2213+
self._set_categorical_axis_default_factors(vertical, factors)
2214+
self._set_categorical_axis_default_range(
2215+
vertical, data_frame, numeric_column)
2216+
2217+
bar_width = self._get_bar_width(factors)
2218+
2219+
if color_column:
2220+
legend = bokeh.core.properties.field('color_column')
2221+
legend = 'color_column'
2222+
else:
2223+
legend = None
2224+
2225+
if vertical:
2226+
self._plot_with_legend(
2227+
self._chart.figure.vbar,
2228+
legend_group=None,
2229+
x='factors',
2230+
width=bar_width,
2231+
top='q2',
2232+
bottom='q1',
2233+
line_color='white',
2234+
source=source_low,
2235+
fill_color=colors,
2236+
)
2237+
self._plot_with_legend(
2238+
self._chart.figure.vbar,
2239+
legend_group=legend,
2240+
x='factors',
2241+
width=bar_width,
2242+
top='q3',
2243+
bottom='q2',
2244+
line_color='white',
2245+
source=source_high,
2246+
fill_color=colors,
2247+
)
2248+
2249+
else:
2250+
2251+
self._plot_with_legend(
2252+
self._chart.figure.hbar,
2253+
legend_group=None,
2254+
y='factors',
2255+
height=bar_width,
2256+
right='q2',
2257+
left='q1',
2258+
line_color='white',
2259+
source=source_low,
2260+
fill_color=colors,
2261+
)
2262+
self._plot_with_legend(
2263+
self._chart.figure.hbar,
2264+
legend_group=legend,
2265+
y='factors',
2266+
height=bar_width,
2267+
right='q3',
2268+
left='q2',
2269+
line_color='white',
2270+
source=source_high,
2271+
fill_color=colors,
2272+
)
2273+
2274+
# outliers
2275+
factors = outliers.set_index(categorical_columns).index
2276+
outliers = (
2277+
outliers[
2278+
[col for col in outliers.columns if col == numeric_column]])
2279+
2280+
source_outliers = self._named_column_data_source(
2281+
outliers, series_name=None)
2282+
source_outliers.add(factors, 'factors')
2283+
2284+
if vertical:
2285+
x_value, y_value = 'factors', numeric_column
2286+
else:
2287+
y_value, x_value = 'factors', numeric_column
2288+
2289+
self._plot_with_legend(
2290+
self._chart.figure.scatter,
2291+
legend_label=None,
2292+
x=x_value,
2293+
y=y_value,
2294+
size=outlier_size,
2295+
fill_color=outlier_color,
2296+
line_color=outlier_color,
2297+
source=source_outliers,
2298+
marker=outlier_marker,
2299+
alpha=outlier_alpha
2300+
)
2301+
2302+
return self._chart

chartify/examples.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,38 @@ def _bar_example_4(quantity_by_fruit):
482482
plot_bar.__doc__ = _core.plot.PlotMixedTypeXY.bar.__doc__
483483

484484

485+
@_print_source
486+
def plot_boxplot():
487+
"""
488+
Interval example
489+
"""
490+
import chartify
491+
492+
# Generate example data
493+
data = chartify.examples.example_data()
494+
"""Print Break"""
495+
_boxplot_example_1(data)
496+
497+
498+
@_print_source
499+
def _boxplot_example_1(quantity_by_fruit_and_country):
500+
"""# Plot the data with labels"""
501+
ch = chartify.Chart(x_axis_type='categorical')
502+
ch.plot.boxplot(
503+
data_frame=quantity_by_fruit_and_country,
504+
categorical_columns=['fruit', 'country'],
505+
numeric_column='quantity',
506+
color_column='country'
507+
)
508+
ch.set_title('Distribution of number of fruits by day')
509+
ch.set_subtitle('split by fruit type and country')
510+
ch.axes.set_xaxis_label('Fruit and country')
511+
ch.axes.set_yaxis_label('Distribution of number of fruits by day')
512+
ch.show(_OUTPUT_FORMAT)
513+
514+
plot_boxplot.__doc__ = _core.plot.PlotMixedTypeXY.boxplot.__doc__
515+
516+
485517
@_print_source
486518
def plot_interval():
487519
"""

examples/Examples.ipynb

Lines changed: 255 additions & 151 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)