@@ -40,6 +40,63 @@ def self.empty_recursive_array
40
40
a
41
41
end
42
42
43
+ # Chi squared critical values for tests with n degrees of freedom at 99% confidence.
44
+ # Values obtained from NIST Engineering Statistic Handbook at
45
+ # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
46
+
47
+ CHI_SQUARED_CRITICAL_VALUES = [
48
+ 0 ,
49
+ 6.635 , 9.210 , 11.345 , 13.277 , 15.086 , 16.812 , 18.475 , 20.090 , 21.666 , 23.209 ,
50
+ 24.725 , 26.217 , 27.688 , 29.141 , 30.578 , 32.000 , 33.409 , 34.805 , 36.191 , 37.566 ,
51
+ 38.932 , 40.289 , 41.638 , 42.980 , 44.314 , 45.642 , 46.963 , 48.278 , 49.588 , 50.892 ,
52
+ 52.191 , 53.486 , 54.776 , 56.061 , 57.342 , 58.619 , 59.893 , 61.162 , 62.428 , 63.691 ,
53
+ 64.950 , 66.206 , 67.459 , 68.710 , 69.957 , 71.201 , 72.443 , 73.683 , 74.919 , 76.154 ,
54
+ 77.386 , 78.616 , 79.843 , 81.069 , 82.292 , 83.513 , 84.733 , 85.950 , 87.166 , 88.379 ,
55
+ 89.591 , 90.802 , 92.010 , 93.217 , 94.422 , 95.626 , 96.828 , 98.028 , 99.228 , 100.425 ,
56
+ 101.621 , 102.816 , 104.010 , 105.202 , 106.393 , 107.583 , 108.771 , 109.958 , 111.144 , 112.329 ,
57
+ 113.512 , 114.695 , 115.876 , 117.057 , 118.236 , 119.414 , 120.591 , 121.767 , 122.942 , 124.116 ,
58
+ 125.289 , 126.462 , 127.633 , 128.803 , 129.973 , 131.141 , 132.309 , 133.476 , 134.642 , 135.807 ,
59
+ ]
60
+
61
+ def self . measure_sample_fairness ( size , samples , iters )
62
+ ary = Array . new ( size ) { |x | x }
63
+ ( samples ) . times do |i |
64
+ counts = Array . new ( size ) { 0 }
65
+ expected = iters / size
66
+ iters . times do
67
+ x = ary . sample ( samples ) [ i ]
68
+ counts [ x ] += 1
69
+ end
70
+ chi_squared = 0.0
71
+ counts . each do |count |
72
+ chi_squared += ( ( ( count - expected ) ** 2 ) * 1.0 / expected )
73
+ end
74
+
75
+ chi_squared . should <= CHI_SQUARED_CRITICAL_VALUES [ size ]
76
+ end
77
+ end
78
+
79
+ def self . measure_sample_fairness_large_sample_size ( size , samples , iters )
80
+ ary = Array . new ( size ) { |x | x }
81
+ counts = Array . new ( size ) { 0 }
82
+ expected = iters * samples / size
83
+ iters . times do
84
+ ary . sample ( samples ) . each do |sample |
85
+ counts [ sample ] += 1
86
+ end
87
+ end
88
+ chi_squared = 0.0
89
+ counts . each do |count |
90
+ chi_squared += ( ( ( count - expected ) ** 2 ) * 1.0 / expected )
91
+ end
92
+
93
+ # Chi squared critical values for tests with 4 degrees of freedom
94
+ # Values obtained from NIST Engineering Statistic Handbook at
95
+ # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
96
+
97
+ chi_squared . should <= CHI_SQUARED_CRITICAL_VALUES [ size ]
98
+ end
99
+
43
100
class MyArray < Array
44
101
# The #initialize method has a different signature than Array to help
45
102
# catch places in the specs that do not assert the #initialize is not
0 commit comments