@@ -96,18 +96,16 @@ defmodule Statistex do
9696
9797 The statistics themselves are described in the individual samples that can be used to calculate individual values.
9898
99- `Argumenterror ` is raised if the given list is empty.
99+ `ArgumentError ` is raised if the given list is empty.
100100
101101 ## Options
102102
103- In a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can
103+ With a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can
104104 be given. The percentiles 25th, 50th (median) and 75th are always calculated.
105105
106- The option `exclude_outliers` can be set to `:once`, `:repeatedly` or `nil`,
107- `nil` is the default. If this option set to `:once` the outliers are excluded
108- and the statistics are calculated with the rest of the samples. The value
109- `:repeatedly` repeats the outlier exclusion until the samples no longer
110- contains outliers.
106+ The option `exclude_outliers` can be set to `true`, `false`. Defaults to `false`.
107+ If this option is set to `true` the outliers are excluded
108+ and the statistics are calculated with the rest of the samples.
111109
112110 ## Examples
113111
@@ -167,62 +165,72 @@ defmodule Statistex do
167165 def statistics ( samples , configuration ) do
168166 samples = Enum . sort ( samples )
169167
170- minimum = hd ( samples )
171- maximum = List . last ( samples )
172-
173- percentiles = calculate_percentiles ( samples , configuration )
168+ # these statistics are required to do the outlier calculations
169+ % { minimum: minimum , maximum: maximum , percentiles: percentiles } =
170+ base_statistics ( samples , configuration )
174171
175172 outlier_bounds =
176173 do_outlier_bounds ( samples , percentiles: percentiles , minimum: minimum , maximum: maximum )
177174
175+ # make sure rest remains sorted and so can be used again to ok results
178176 { outliers , rest } = do_outliers ( samples , outlier_bounds: outlier_bounds )
179177
180178 if exclude_outliers? ( configuration ) and Enum . any? ( outliers ) do
181- configuration =
182- configuration
183- |> Keyword . update! ( :exclude_outliers , fn
184- :once -> :stop
185- :repeatedly -> :repeatedly
186- end )
187- |> Keyword . update ( :acc_outliers , outliers , fn list -> list ++ outliers end )
188-
189- statistics ( rest , configuration )
179+ # figure out to avoid double sorting
180+ rest = Enum . sort ( rest )
181+ # need to recalculate with the outliers removed
182+ % { minimum: minimum , maximum: maximum , percentiles: percentiles } =
183+ base_statistics ( rest , configuration )
184+
185+ create_full_statistics ( rest , minimum , maximum , percentiles , outliers , outlier_bounds )
190186 else
191- outliers = outliers ++ Keyword . get ( configuration , :acc_outliers , [ ] )
192-
193- total = total ( samples )
194- sample_size = length ( samples )
195- average = average ( samples , total: total , sample_size: sample_size )
196- variance = variance ( samples , average: average , sample_size: sample_size )
197-
198- frequency_distribution = frequency_distribution ( samples )
199-
200- standard_deviation = standard_deviation ( samples , variance: variance )
201-
202- standard_deviation_ratio =
203- standard_deviation_ratio ( samples , standard_deviation: standard_deviation )
204-
205- % __MODULE__ {
206- total: total ,
207- average: average ,
208- variance: variance ,
209- standard_deviation: standard_deviation ,
210- standard_deviation_ratio: standard_deviation_ratio ,
211- median: median ( samples , percentiles: percentiles ) ,
212- percentiles: percentiles ,
213- frequency_distribution: frequency_distribution ,
214- mode: mode ( samples , frequency_distribution: frequency_distribution ) ,
215- minimum: minimum ,
216- maximum: maximum ,
217- outlier_bounds: outlier_bounds ,
218- outliers: outliers ,
219- sample_size: sample_size
220- }
187+ create_full_statistics ( samples , minimum , maximum , percentiles , outliers , outlier_bounds )
221188 end
222189 end
223190
191+ defp base_statistics ( samples , configuration ) do
192+ minimum = hd ( samples )
193+ maximum = List . last ( samples )
194+
195+ percentiles = calculate_percentiles ( samples , configuration )
196+
197+ % { minimum: minimum , maximum: maximum , percentiles: percentiles }
198+ end
199+
224200 defp exclude_outliers? ( configuration ) do
225- Keyword . get ( configuration , :exclude_outliers ) in [ :once , :repeatedly ]
201+ Access . get ( configuration , :exclude_outliers ) == true
202+ end
203+
204+ # maybe make argument a map
205+ defp create_full_statistics ( samples , minimum , maximum , percentiles , outliers , outlier_bounds ) do
206+ total = total ( samples )
207+ sample_size = length ( samples )
208+ average = average ( samples , total: total , sample_size: sample_size )
209+ variance = variance ( samples , average: average , sample_size: sample_size )
210+
211+ frequency_distribution = frequency_distribution ( samples )
212+
213+ standard_deviation = standard_deviation ( samples , variance: variance )
214+
215+ standard_deviation_ratio =
216+ standard_deviation_ratio ( samples , standard_deviation: standard_deviation )
217+
218+ % __MODULE__ {
219+ total: total ,
220+ average: average ,
221+ variance: variance ,
222+ standard_deviation: standard_deviation ,
223+ standard_deviation_ratio: standard_deviation_ratio ,
224+ median: median ( samples , percentiles: percentiles ) ,
225+ percentiles: percentiles ,
226+ frequency_distribution: frequency_distribution ,
227+ mode: mode ( samples , frequency_distribution: frequency_distribution ) ,
228+ minimum: minimum ,
229+ maximum: maximum ,
230+ outlier_bounds: outlier_bounds ,
231+ outliers: outliers ,
232+ sample_size: sample_size
233+ }
226234 end
227235
228236 @ doc """
0 commit comments