@@ -105,12 +105,13 @@ defmodule Statistex do
105105
106106 ## Options
107107
108- * `percentiles`: percentiles to calculate (see `percentiles/2`).
108+ * `: percentiles`: percentiles to calculate (see `percentiles/2`).
109109 The percentiles 25th, 50th (median) and 75th are always calculated.
110-
111- * `exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
110+ * `:exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
112111 If this option is set to `true` the outliers are excluded from the calculation
113112 of the statistics.
113+ * `:sorted?`: indicating the samples you're passing in are already sorted. Only set this,
114+ if they are truly sorted - otherwise your results will be wrong.
114115
115116 ## Examples
116117
@@ -165,68 +166,44 @@ defmodule Statistex do
165166 end
166167
167168 def statistics ( samples , configuration ) do
168- sorted_samples = Enum . sort ( samples )
169-
170- # these statistics are required to do the outlier calculations
171- % { minimum: minimum , maximum: maximum , percentiles: percentiles } =
172- base_statistics ( sorted_samples , configuration )
169+ sorted_samples = maybe_sort ( samples , configuration )
173170
174- outlier_bounds =
175- do_outlier_bounds ( sorted_samples ,
176- percentiles: percentiles ,
177- minimum: minimum ,
178- maximum: maximum
179- )
171+ percentiles = calculate_percentiles ( sorted_samples , configuration )
172+ outlier_bounds = do_outlier_bounds ( sorted_samples , percentiles: percentiles )
180173
181- # make sure rest remains sorted and so can be used again to ok results
174+ # rest remains sorted here/it's an important property
182175 { outliers , rest } = do_outliers ( sorted_samples , outlier_bounds: outlier_bounds )
183176
184177 if exclude_outliers? ( configuration ) and Enum . any? ( outliers ) do
185- # figure out to avoid double sorting
186- rest = Enum . sort ( rest )
187178 # need to recalculate with the outliers removed
188- % { minimum: minimum , maximum: maximum , percentiles: percentiles } =
189- base_statistics ( rest , configuration )
179+ percentiles = calculate_percentiles ( rest , configuration )
190180
191- create_full_statistics ( rest , minimum , maximum , percentiles , outliers , outlier_bounds )
181+ create_full_statistics ( rest , percentiles , outliers , outlier_bounds )
192182 else
193- create_full_statistics (
194- sorted_samples ,
195- minimum ,
196- maximum ,
197- percentiles ,
198- outliers ,
199- outlier_bounds
200- )
183+ create_full_statistics ( sorted_samples , percentiles , outliers , outlier_bounds )
201184 end
202185 end
203186
204- defp base_statistics ( sorted_samples , configuration ) do
205- minimum = hd ( sorted_samples )
206- maximum = List . last ( sorted_samples )
207-
208- percentiles = calculate_percentiles ( sorted_samples , configuration )
209-
210- % { minimum: minimum , maximum: maximum , percentiles: percentiles }
211- end
212-
213187 defp exclude_outliers? ( configuration ) do
214188 Access . get ( configuration , :exclude_outliers ) == true
215189 end
216190
217191 # maybe make argument a map
218- defp create_full_statistics ( samples , minimum , maximum , percentiles , outliers , outlier_bounds ) do
219- total = total ( samples )
220- sample_size = length ( samples )
221- average = average ( samples , total: total , sample_size: sample_size )
222- variance = variance ( samples , average: average , sample_size: sample_size )
192+ defp create_full_statistics ( sorted_samples , percentiles , outliers , outlier_bounds ) do
193+ total = total ( sorted_samples )
194+ sample_size = length ( sorted_samples )
195+ minimum = hd ( sorted_samples )
196+ maximum = List . last ( sorted_samples )
197+
198+ average = average ( sorted_samples , total: total , sample_size: sample_size )
199+ variance = variance ( sorted_samples , average: average , sample_size: sample_size )
223200
224- frequency_distribution = frequency_distribution ( samples )
201+ frequency_distribution = frequency_distribution ( sorted_samples )
225202
226- standard_deviation = standard_deviation ( samples , variance: variance )
203+ standard_deviation = standard_deviation ( sorted_samples , variance: variance )
227204
228205 standard_deviation_ratio =
229- standard_deviation_ratio ( samples , standard_deviation: standard_deviation )
206+ standard_deviation_ratio ( sorted_samples , standard_deviation: standard_deviation )
230207
231208 { lower_outlier_bound , upper_outlier_bound } = outlier_bounds
232209
@@ -236,10 +213,10 @@ defmodule Statistex do
236213 variance: variance ,
237214 standard_deviation: standard_deviation ,
238215 standard_deviation_ratio: standard_deviation_ratio ,
239- median: median ( samples , percentiles: percentiles ) ,
216+ median: median ( sorted_samples , percentiles: percentiles ) ,
240217 percentiles: percentiles ,
241218 frequency_distribution: frequency_distribution ,
242- mode: mode ( samples , frequency_distribution: frequency_distribution ) ,
219+ mode: mode ( sorted_samples , frequency_distribution: frequency_distribution ) ,
243220 minimum: minimum ,
244221 maximum: maximum ,
245222 lower_outlier_bound: lower_outlier_bound ,
0 commit comments