@@ -40,6 +40,68 @@ def self.empty_recursive_array
40
40
a
41
41
end
42
42
43
+ # Chi squared critical values for tests with n degrees of freedom at 99% confidence.
44
+ # Values obtained from NIST Engineering Statistic Handbook at
45
+ # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
46
+
47
+ CHI_SQUARED_CRITICAL_VALUES = [
48
+ 0 ,
49
+ 6.635 , 9.210 , 11.345 , 13.277 , 15.086 , 16.812 , 18.475 , 20.090 , 21.666 , 23.209 ,
50
+ 24.725 , 26.217 , 27.688 , 29.141 , 30.578 , 32.000 , 33.409 , 34.805 , 36.191 , 37.566 ,
51
+ 38.932 , 40.289 , 41.638 , 42.980 , 44.314 , 45.642 , 46.963 , 48.278 , 49.588 , 50.892 ,
52
+ 52.191 , 53.486 , 54.776 , 56.061 , 57.342 , 58.619 , 59.893 , 61.162 , 62.428 , 63.691 ,
53
+ 64.950 , 66.206 , 67.459 , 68.710 , 69.957 , 71.201 , 72.443 , 73.683 , 74.919 , 76.154 ,
54
+ 77.386 , 78.616 , 79.843 , 81.069 , 82.292 , 83.513 , 84.733 , 85.950 , 87.166 , 88.379 ,
55
+ 89.591 , 90.802 , 92.010 , 93.217 , 94.422 , 95.626 , 96.828 , 98.028 , 99.228 , 100.425 ,
56
+ 101.621 , 102.816 , 104.010 , 105.202 , 106.393 , 107.583 , 108.771 , 109.958 , 111.144 , 112.329 ,
57
+ 113.512 , 114.695 , 115.876 , 117.057 , 118.236 , 119.414 , 120.591 , 121.767 , 122.942 , 124.116 ,
58
+ 125.289 , 126.462 , 127.633 , 128.803 , 129.973 , 131.141 , 132.309 , 133.476 , 134.642 , 135.807 ,
59
+ ]
60
+
61
+ def self . measure_sample_fairness ( size , samples , iters )
62
+ ary = Array . new ( size ) { |x | x }
63
+ ( samples ) . times do |i |
64
+ chi_results = [ ]
65
+ 3 . times do
66
+ counts = Array . new ( size ) { 0 }
67
+ expected = iters / size
68
+ iters . times do
69
+ x = ary . sample ( samples ) [ i ]
70
+ counts [ x ] += 1
71
+ end
72
+ chi_squared = 0.0
73
+ counts . each do |count |
74
+ chi_squared += ( ( ( count - expected ) ** 2 ) * 1.0 / expected )
75
+ end
76
+ chi_results << chi_squared
77
+ break if chi_squared <= CHI_SQUARED_CRITICAL_VALUES [ size ]
78
+ end
79
+
80
+ chi_results . min . should <= CHI_SQUARED_CRITICAL_VALUES [ size ]
81
+ end
82
+ end
83
+
84
+ def self . measure_sample_fairness_large_sample_size ( size , samples , iters )
85
+ ary = Array . new ( size ) { |x | x }
86
+ counts = Array . new ( size ) { 0 }
87
+ expected = iters * samples / size
88
+ iters . times do
89
+ ary . sample ( samples ) . each do |sample |
90
+ counts [ sample ] += 1
91
+ end
92
+ end
93
+ chi_squared = 0.0
94
+ counts . each do |count |
95
+ chi_squared += ( ( ( count - expected ) ** 2 ) * 1.0 / expected )
96
+ end
97
+
98
+ # Chi squared critical values for tests with 4 degrees of freedom
99
+ # Values obtained from NIST Engineering Statistic Handbook at
100
+ # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
101
+
102
+ chi_squared . should <= CHI_SQUARED_CRITICAL_VALUES [ size ]
103
+ end
104
+
43
105
class MyArray < Array
44
106
# The #initialize method has a different signature than Array to help
45
107
# catch places in the specs that do not assert the #initialize is not
0 commit comments