@@ -1107,6 +1107,64 @@ def _construct_source(self,
11071107
11081108 return source , factors , stack_values
11091109
1110+ @staticmethod
1111+ def _compute_boxplot_df (data_frame , categorical_columns , numeric_column ):
1112+ """Computes the data frames for a boxplot.
1113+
1114+ Returns:
1115+ quantlies_and_bounds: data frame for the boxes and whiskers of a
1116+ boxplot
1117+ outliers: data frame with outliers
1118+ """
1119+ # compute quantiles
1120+ q_frame = data_frame .groupby (categorical_columns )[
1121+ numeric_column ].quantile ([0.25 , 0.5 , 0.75 ])
1122+ q_frame = q_frame .unstack ().reset_index ()
1123+ q_frame .columns = categorical_columns + \
1124+ ['q1' , 'q2' , 'q3' ]
1125+ df_with_quantiles = pd .merge (
1126+ data_frame , q_frame , on = categorical_columns , how = "left" )
1127+
1128+ # compute IQR outlier bounds
1129+ iqr = df_with_quantiles .q3 - df_with_quantiles .q1
1130+ df_with_quantiles ['upper' ] = df_with_quantiles .q3 + 1.5 * iqr
1131+ df_with_quantiles ['lower' ] = df_with_quantiles .q1 - 1.5 * iqr
1132+
1133+ # adjust outlier bounds to closest observations still within bounds
1134+ # for upper bound
1135+ le_upper = df_with_quantiles [df_with_quantiles [numeric_column ].le (
1136+ df_with_quantiles .upper )]
1137+ group_max_le_upper = le_upper .groupby (
1138+ categorical_columns , as_index = False )[numeric_column ].max ()
1139+ group_max_le_upper .columns = categorical_columns + ['upper' ]
1140+
1141+ df_with_quantiles .drop ('upper' , axis = 1 , inplace = True )
1142+ df_with_quantiles = pd .merge (
1143+ df_with_quantiles ,
1144+ group_max_le_upper ,
1145+ on = categorical_columns ,
1146+ how = 'left' )
1147+
1148+ # for lower bound
1149+ ge_lower = df_with_quantiles [df_with_quantiles [numeric_column ].ge (
1150+ df_with_quantiles .lower )]
1151+ group_min_ge_lower = ge_lower .groupby (
1152+ categorical_columns , as_index = False )[numeric_column ].min ()
1153+ group_min_ge_lower .columns = categorical_columns + ['lower' ]
1154+ df_with_quantiles .drop ('lower' , axis = 1 , inplace = True )
1155+ df_with_quantiles = pd .merge (df_with_quantiles ,
1156+ group_min_ge_lower ,
1157+ on = categorical_columns ,
1158+ how = 'left' )
1159+
1160+ quantiles_and_bounds = df_with_quantiles .groupby (categorical_columns )[[
1161+ 'q1' , 'q2' , 'q3' , 'lower' , 'upper' ]].first ().reset_index ()
1162+
1163+ outliers = df_with_quantiles [~ df_with_quantiles [numeric_column ].between (
1164+ df_with_quantiles .lower , df_with_quantiles .upper )]
1165+
1166+ return quantiles_and_bounds , outliers
1167+
11101168 def text (self ,
11111169 data_frame ,
11121170 categorical_columns ,
@@ -2057,3 +2115,188 @@ def scatter(self,
20572115 self ._chart .style ._apply_settings ('legend' )
20582116
20592117 return self ._chart
2118+
2119+ def boxplot (self ,
2120+ data_frame ,
2121+ categorical_columns ,
2122+ numeric_column ,
2123+ color_column = None ,
2124+ color_order = None ,
2125+ categorical_order_by = 'labels' ,
2126+ categorical_order_ascending = True ,
2127+ outlier_marker = 'circle' ,
2128+ outlier_color = 'black' ,
2129+ outlier_alpha = 0.3 ,
2130+ outlier_size = 15 ):
2131+ """Box-and-whisker plot.
2132+
2133+ Note:
2134+ To change the orientation set x_axis_type or y_axis_type
2135+ argument of the Chart object.
2136+
2137+ Args:
2138+ data_frame (pandas.DataFrame): Data source for the plot.
2139+ categorical_columns (str or list): Column name to plot on
2140+ the categorical axis.
2141+ numeric_column (str): Column name to plot on the numerical axis.
2142+ color_column (str, optional): Column name to group by on
2143+ the color dimension.
2144+ color_order (list, optional):
2145+ List of values within the 'color_column' for
2146+ specific color sort.
2147+ categorical_order_by (str or array-like, optional):
2148+ Dimension for ordering the categorical axis. Default 'labels'.
2149+ - 'labels': Order categorical axis by the categorical labels.
2150+ - array-like object (list, tuple, np.array): New labels
2151+ to conform the categorical axis to.
2152+ categorical_order_ascending (bool, optional):
2153+ Sort order of the categorical axis. Default True.
2154+ outlier_marker (str, optional): Outlier marker type. Valid types:
2155+ 'asterisk', 'circle', 'circle_cross', 'circle_x', 'cross',
2156+ 'diamond', 'diamond_cross', 'hex', 'inverted_triangle',
2157+ 'square', 'square_x', 'square_cross', 'triangle',
2158+ 'x', '*', '+', 'o', 'ox', 'o+' Default 'circle'
2159+ outlier_color (str, optional): Color name or hex value.
2160+ See chartify.color_palettes.show() for available color names.
2161+ Default 'black'
2162+ outlier_alpha (float, optional): Alpha value. Default 0.3
2163+ outlier_size (float, optional): Size of outlier markers.
2164+ Default 15
2165+ """
2166+
2167+ # check categorical_order_by value
2168+ order_length = getattr (categorical_order_by , "__len__" , None )
2169+ is_string = isinstance (categorical_order_by , str )
2170+ if ((not is_string and order_length is None )
2171+ or (is_string and categorical_order_by != 'labels' )):
2172+ raise ValueError ("""Argument categorical_order_by must be 'labels',
2173+ or a list of values.""" )
2174+
2175+ df_intervals_and_floating_bars , outliers = self ._compute_boxplot_df (
2176+ data_frame , categorical_columns , numeric_column )
2177+
2178+ # upper and lower bound
2179+ self .interval (df_intervals_and_floating_bars ,
2180+ categorical_columns ,
2181+ 'lower' ,
2182+ 'upper' ,
2183+ categorical_order_by = categorical_order_by ,
2184+ categorical_order_ascending = categorical_order_ascending )
2185+
2186+ # boxes for q1 to q2 and q2 to q3
2187+ vertical = self ._chart .axes ._vertical
2188+
2189+ source_low , _ , _ = self ._construct_source (
2190+ df_intervals_and_floating_bars ,
2191+ categorical_columns ,
2192+ ['q1' , 'q2' ],
2193+ categorical_order_by = categorical_order_by ,
2194+ categorical_order_ascending = categorical_order_ascending ,
2195+ color_column = color_column )
2196+
2197+ source_high , factors , _ = self ._construct_source (
2198+ df_intervals_and_floating_bars ,
2199+ categorical_columns ,
2200+ ['q2' , 'q3' ],
2201+ categorical_order_by = categorical_order_by ,
2202+ categorical_order_ascending = categorical_order_ascending ,
2203+ color_column = color_column )
2204+
2205+ colors , _ = self ._get_color_and_order (df_intervals_and_floating_bars ,
2206+ color_column ,
2207+ color_order ,
2208+ categorical_columns )
2209+
2210+ if color_column is None :
2211+ colors = colors [0 ]
2212+
2213+ self ._set_categorical_axis_default_factors (vertical , factors )
2214+ self ._set_categorical_axis_default_range (
2215+ vertical , data_frame , numeric_column )
2216+
2217+ bar_width = self ._get_bar_width (factors )
2218+
2219+ if color_column :
2220+ legend = bokeh .core .properties .field ('color_column' )
2221+ legend = 'color_column'
2222+ else :
2223+ legend = None
2224+
2225+ if vertical :
2226+ self ._plot_with_legend (
2227+ self ._chart .figure .vbar ,
2228+ legend_group = None ,
2229+ x = 'factors' ,
2230+ width = bar_width ,
2231+ top = 'q2' ,
2232+ bottom = 'q1' ,
2233+ line_color = 'white' ,
2234+ source = source_low ,
2235+ fill_color = colors ,
2236+ )
2237+ self ._plot_with_legend (
2238+ self ._chart .figure .vbar ,
2239+ legend_group = legend ,
2240+ x = 'factors' ,
2241+ width = bar_width ,
2242+ top = 'q3' ,
2243+ bottom = 'q2' ,
2244+ line_color = 'white' ,
2245+ source = source_high ,
2246+ fill_color = colors ,
2247+ )
2248+
2249+ else :
2250+
2251+ self ._plot_with_legend (
2252+ self ._chart .figure .hbar ,
2253+ legend_group = None ,
2254+ y = 'factors' ,
2255+ height = bar_width ,
2256+ right = 'q2' ,
2257+ left = 'q1' ,
2258+ line_color = 'white' ,
2259+ source = source_low ,
2260+ fill_color = colors ,
2261+ )
2262+ self ._plot_with_legend (
2263+ self ._chart .figure .hbar ,
2264+ legend_group = legend ,
2265+ y = 'factors' ,
2266+ height = bar_width ,
2267+ right = 'q3' ,
2268+ left = 'q2' ,
2269+ line_color = 'white' ,
2270+ source = source_high ,
2271+ fill_color = colors ,
2272+ )
2273+
2274+ # outliers
2275+ factors = outliers .set_index (categorical_columns ).index
2276+ outliers = (
2277+ outliers [
2278+ [col for col in outliers .columns if col == numeric_column ]])
2279+
2280+ source_outliers = self ._named_column_data_source (
2281+ outliers , series_name = None )
2282+ source_outliers .add (factors , 'factors' )
2283+
2284+ if vertical :
2285+ x_value , y_value = 'factors' , numeric_column
2286+ else :
2287+ y_value , x_value = 'factors' , numeric_column
2288+
2289+ self ._plot_with_legend (
2290+ self ._chart .figure .scatter ,
2291+ legend_label = None ,
2292+ x = x_value ,
2293+ y = y_value ,
2294+ size = outlier_size ,
2295+ fill_color = outlier_color ,
2296+ line_color = outlier_color ,
2297+ source = source_outliers ,
2298+ marker = outlier_marker ,
2299+ alpha = outlier_alpha
2300+ )
2301+
2302+ return self ._chart
0 commit comments