@@ -13,9 +13,10 @@ defmodule Statistex.StatistexTest do
1313 end
1414
1515 describe ".outlier_bounds/2" do
16+ # examples doubled up, maybe get rid of them?
1617 test "returns outlier bounds for samples without outliers" do
1718 assert Statistex . outlier_bounds ( [ 200 , 400 , 400 , 400 , 500 , 500 , 500 , 700 , 900 ] ) ==
18- { 200 , 900.0 }
19+ { 100.0 , 900.0 }
1920 end
2021
2122 test "returns outlier bounds for samples with outliers" do
@@ -30,7 +31,7 @@ defmodule Statistex.StatistexTest do
3031 % Statistex {
3132 total: 4500 ,
3233 average: 500.0 ,
33- variance: 40000 .0,
34+ variance: 40_000 .0,
3435 standard_deviation: 200.0 ,
3536 standard_deviation_ratio: 0.4 ,
3637 median: 500.0 ,
@@ -39,7 +40,7 @@ defmodule Statistex.StatistexTest do
3940 mode: [ 500 , 400 ] ,
4041 minimum: 200 ,
4142 maximum: 900 ,
42- outlier_bounds: { 200 , 900.0 } ,
43+ outlier_bounds: { 100.0 , 900.0 } ,
4344 outliers: [ ] ,
4445 sample_size: 9
4546 }
@@ -50,7 +51,7 @@ defmodule Statistex.StatistexTest do
5051 % Statistex {
5152 total: 4450 ,
5253 average: 445.0 ,
53- variance: 61361 .11111111111,
54+ variance: 61_361 .11111111111,
5455 standard_deviation: 247.71175004652304 ,
5556 standard_deviation_ratio: 0.5566556180820742 ,
5657 median: 475.0 ,
@@ -65,28 +66,77 @@ defmodule Statistex.StatistexTest do
6566 }
6667 end
6768
68- test "returns Statistex struct with excluded outliers once" do
69- assert Statistex . statistics ( [ 50 , 50 , 450 , 450 , 450 , 500 , 500 , 500 , 600 , 900 ] ,
70- exclude_outliers: true
71- ) ==
72- % Statistex {
73- total: 3450 ,
74- average: 492.85714285714283 ,
75- variance: 2857.142857142857 ,
76- standard_deviation: 53.452248382484875 ,
77- standard_deviation_ratio: 0.1084538372977954 ,
78- median: 500.0 ,
79- percentiles: % { 25 => 450.0 , 50 => 500.0 , 75 => 500.0 } ,
80- frequency_distribution: % { 450 => 3 , 500 => 3 , 600 => 1 } ,
81- mode: [ 500 , 450 ] ,
82- minimum: 450 ,
83- maximum: 600 ,
84- # check with other sources what is right and what isn't, I fear we may have calculated outliers twice before
85- outlier_bounds: { 450 , 575.0 } ,
86- # Either sort them or make the test ignorant of order
87- outliers: [ 600 , 50 , 50 , 900 ] ,
88- sample_size: 7
89- }
69+ # https://www.youtube.com/watch?v=rZJbj2I-_Ek
70+ test "gets outliers from the sample right" do
71+ # One could argue that this is controversial, R comes up with these results (by default):
72+ # > summary(c(9, 9, 10, 10, 10, 11, 12, 36))
73+ # Min. 1st Qu. Median Mean 3rd Qu. Max.
74+ # 9.00 9.75 10.00 13.38 11.25 36.00
75+ #
76+ # R by default uses type 7 interpolation, we implemented type 6 interpolation though. Which
77+ # R can also use:
78+ # > quantile(c(9, 9, 10, 10, 10, 11, 12, 36), probs = c(0.25, 0.5, 0.75), type = 6)
79+ # 25% 50% 75%
80+ # 9.25 10.00 11.75
81+ # Which is our result.
82+
83+ assert % Statistex {
84+ median: 10.0 ,
85+ percentiles: % { 25 => 9.25 , 50 => 10.0 , 75 => 11.75 } ,
86+ minimum: 9 ,
87+ maximum: 36 ,
88+ outlier_bounds: { 5.5 , 15.5 } ,
89+ outliers: [ 36 ]
90+ } = Statistex . statistics ( [ 9 , 9 , 10 , 10 , 10 , 11 , 12 , 36 ] , exclude_outliers: false )
91+ end
92+
93+ # https://en.wikipedia.org/wiki/Box_plot#Example_with_outliers
94+ test "another example with outliers" do
95+ data = [
96+ 52 ,
97+ 57 ,
98+ 57 ,
99+ 58 ,
100+ 63 ,
101+ 66 ,
102+ 66 ,
103+ 67 ,
104+ 67 ,
105+ 68 ,
106+ 69 ,
107+ 70 ,
108+ 70 ,
109+ 70 ,
110+ 70 ,
111+ 72 ,
112+ 73 ,
113+ 75 ,
114+ 75 ,
115+ 76 ,
116+ 76 ,
117+ 78 ,
118+ 79 ,
119+ 89
120+ ]
121+
122+ assert % Statistex {
123+ median: 70.0 ,
124+ percentiles: % { 25 => 66.0 , 50 => 70.0 , 75 => 75.0 } ,
125+ # report interquantile range?
126+ outlier_bounds: { 52.5 , 88.5 } ,
127+ outliers: [ 52 , 89 ]
128+ } = Statistex . statistics ( data , exclude_outliers: false )
129+ end
130+
131+ # https://en.wikipedia.org/wiki/Interquartile_range#Data_set_in_a_table
132+ test "quartile example" do
133+ assert % Statistex {
134+ median: 87.0 ,
135+ percentiles: % { 25 => 31.0 , 50 => 87.0 , 75 => 119.0 }
136+ } =
137+ Statistex . statistics ( [ 7 , 7 , 31 , 31 , 47 , 75 , 87 , 115 , 116 , 119 , 119 , 155 , 177 ] ,
138+ exclude_outliers: false
139+ )
90140 end
91141 end
92142
0 commit comments