Skip to content

Commit 690773f

Browse files
committed
Simplify statistics/2 as min/max aren't needed
Since we changed the rules for outlier bounds we don't need them any more. Helps simplify the code quite a bit.
1 parent fee2065 commit 690773f

File tree

1 file changed

+24
-47
lines changed

1 file changed

+24
-47
lines changed

lib/statistex.ex

Lines changed: 24 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,13 @@ defmodule Statistex do
105105
106106
## Options
107107
108-
* `percentiles`: percentiles to calculate (see `percentiles/2`).
108+
* `:percentiles`: percentiles to calculate (see `percentiles/2`).
109109
The percentiles 25th, 50th (median) and 75th are always calculated.
110-
111-
* `exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
110+
* `:exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
112111
If this option is set to `true` the outliers are excluded from the calculation
113112
of the statistics.
113+
* `:sorted?`: indicating the samples you're passing in are already sorted. Only set this,
114+
if they are truly sorted - otherwise your results will be wrong.
114115
115116
## Examples
116117
@@ -165,68 +166,44 @@ defmodule Statistex do
165166
end
166167

167168
def statistics(samples, configuration) do
168-
sorted_samples = Enum.sort(samples)
169-
170-
# these statistics are required to do the outlier calculations
171-
%{minimum: minimum, maximum: maximum, percentiles: percentiles} =
172-
base_statistics(sorted_samples, configuration)
169+
sorted_samples = maybe_sort(samples, configuration)
173170

174-
outlier_bounds =
175-
do_outlier_bounds(sorted_samples,
176-
percentiles: percentiles,
177-
minimum: minimum,
178-
maximum: maximum
179-
)
171+
percentiles = calculate_percentiles(sorted_samples, configuration)
172+
outlier_bounds = do_outlier_bounds(sorted_samples, percentiles: percentiles)
180173

181-
# make sure rest remains sorted and so can be used again to ok results
174+
# rest remains sorted here/it's an important property
182175
{outliers, rest} = do_outliers(sorted_samples, outlier_bounds: outlier_bounds)
183176

184177
if exclude_outliers?(configuration) and Enum.any?(outliers) do
185-
# figure out to avoid double sorting
186-
rest = Enum.sort(rest)
187178
# need to recalculate with the outliers removed
188-
%{minimum: minimum, maximum: maximum, percentiles: percentiles} =
189-
base_statistics(rest, configuration)
179+
percentiles = calculate_percentiles(rest, configuration)
190180

191-
create_full_statistics(rest, minimum, maximum, percentiles, outliers, outlier_bounds)
181+
create_full_statistics(rest, percentiles, outliers, outlier_bounds)
192182
else
193-
create_full_statistics(
194-
sorted_samples,
195-
minimum,
196-
maximum,
197-
percentiles,
198-
outliers,
199-
outlier_bounds
200-
)
183+
create_full_statistics(sorted_samples, percentiles, outliers, outlier_bounds)
201184
end
202185
end
203186

204-
defp base_statistics(sorted_samples, configuration) do
205-
minimum = hd(sorted_samples)
206-
maximum = List.last(sorted_samples)
207-
208-
percentiles = calculate_percentiles(sorted_samples, configuration)
209-
210-
%{minimum: minimum, maximum: maximum, percentiles: percentiles}
211-
end
212-
213187
defp exclude_outliers?(configuration) do
214188
Access.get(configuration, :exclude_outliers) == true
215189
end
216190

217191
# maybe make argument a map
218-
defp create_full_statistics(samples, minimum, maximum, percentiles, outliers, outlier_bounds) do
219-
total = total(samples)
220-
sample_size = length(samples)
221-
average = average(samples, total: total, sample_size: sample_size)
222-
variance = variance(samples, average: average, sample_size: sample_size)
192+
defp create_full_statistics(sorted_samples, percentiles, outliers, outlier_bounds) do
193+
total = total(sorted_samples)
194+
sample_size = length(sorted_samples)
195+
minimum = hd(sorted_samples)
196+
maximum = List.last(sorted_samples)
197+
198+
average = average(sorted_samples, total: total, sample_size: sample_size)
199+
variance = variance(sorted_samples, average: average, sample_size: sample_size)
223200

224-
frequency_distribution = frequency_distribution(samples)
201+
frequency_distribution = frequency_distribution(sorted_samples)
225202

226-
standard_deviation = standard_deviation(samples, variance: variance)
203+
standard_deviation = standard_deviation(sorted_samples, variance: variance)
227204

228205
standard_deviation_ratio =
229-
standard_deviation_ratio(samples, standard_deviation: standard_deviation)
206+
standard_deviation_ratio(sorted_samples, standard_deviation: standard_deviation)
230207

231208
{lower_outlier_bound, upper_outlier_bound} = outlier_bounds
232209

@@ -236,10 +213,10 @@ defmodule Statistex do
236213
variance: variance,
237214
standard_deviation: standard_deviation,
238215
standard_deviation_ratio: standard_deviation_ratio,
239-
median: median(samples, percentiles: percentiles),
216+
median: median(sorted_samples, percentiles: percentiles),
240217
percentiles: percentiles,
241218
frequency_distribution: frequency_distribution,
242-
mode: mode(samples, frequency_distribution: frequency_distribution),
219+
mode: mode(sorted_samples, frequency_distribution: frequency_distribution),
243220
minimum: minimum,
244221
maximum: maximum,
245222
lower_outlier_bound: lower_outlier_bound,

0 commit comments

Comments
 (0)