Skip to content

Commit 2e9cd15

Browse files
committed
Refactor sampler evenness spec and expand to more array and sample sizes.
1 parent 15f2cf5 commit 2e9cd15

File tree

2 files changed

+65
-21
lines changed

2 files changed

+65
-21
lines changed

spec/ruby/core/array/fixtures/classes.rb

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,63 @@ def self.empty_recursive_array
4040
a
4141
end
4242

43+
# Chi squared critical values for tests with n degrees of freedom at 99% confidence.
44+
# Values obtained from NIST Engineering Statistic Handbook at
45+
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
46+
47+
CHI_SQUARED_CRITICAL_VALUES = [
48+
0,
49+
6.635, 9.210, 11.345, 13.277, 15.086, 16.812, 18.475, 20.090, 21.666, 23.209,
50+
24.725, 26.217, 27.688, 29.141, 30.578, 32.000, 33.409, 34.805, 36.191, 37.566,
51+
38.932, 40.289, 41.638, 42.980, 44.314, 45.642, 46.963, 48.278, 49.588, 50.892,
52+
52.191, 53.486, 54.776, 56.061, 57.342, 58.619, 59.893, 61.162, 62.428, 63.691,
53+
64.950, 66.206, 67.459, 68.710, 69.957, 71.201, 72.443, 73.683, 74.919, 76.154,
54+
77.386, 78.616, 79.843, 81.069, 82.292, 83.513, 84.733, 85.950, 87.166, 88.379,
55+
89.591, 90.802, 92.010, 93.217, 94.422, 95.626, 96.828, 98.028, 99.228, 100.425,
56+
101.621, 102.816, 104.010, 105.202, 106.393, 107.583, 108.771, 109.958, 111.144, 112.329,
57+
113.512, 114.695, 115.876, 117.057, 118.236, 119.414, 120.591, 121.767, 122.942, 124.116,
58+
125.289, 126.462, 127.633, 128.803, 129.973, 131.141, 132.309, 133.476, 134.642, 135.807,
59+
]
60+
61+
def self.measure_sample_fairness(size, samples, iters)
62+
ary = Array.new(size) { |x| x }
63+
(samples).times do |i|
64+
counts = Array.new(size) { 0 }
65+
expected = iters / size
66+
iters.times do
67+
x = ary.sample(samples)[i]
68+
counts[x] += 1
69+
end
70+
chi_squared = 0.0
71+
counts.each do |count|
72+
chi_squared += (((count - expected) ** 2) * 1.0 / expected)
73+
end
74+
75+
chi_squared.should <= CHI_SQUARED_CRITICAL_VALUES[size]
76+
end
77+
end
78+
79+
def self.measure_sample_fairness_large_sample_size(size, samples, iters)
80+
ary = Array.new(size) { |x| x }
81+
counts = Array.new(size) { 0 }
82+
expected = iters * samples / size
83+
iters.times do
84+
ary.sample(samples).each do |sample|
85+
counts[sample] += 1
86+
end
87+
end
88+
chi_squared = 0.0
89+
counts.each do |count|
90+
chi_squared += (((count - expected) ** 2) * 1.0 / expected)
91+
end
92+
93+
# Chi squared critical values for tests with 4 degrees of freedom
94+
# Values obtained from NIST Engineering Statistic Handbook at
95+
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
96+
97+
chi_squared.should <= CHI_SQUARED_CRITICAL_VALUES[size]
98+
end
99+
43100
class MyArray < Array
44101
# The #initialize method has a different signature than Array to help
45102
# catch places in the specs that do not assert the #initialize is not

spec/ruby/core/array/sample_spec.rb

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,14 @@
33

44
describe "Array#sample" do
55
it "samples evenly" do
6-
ary = [0, 1, 2, 3]
7-
3.times do |i|
8-
counts = [0, 0, 0, 0]
9-
iters = 4000
10-
expected = iters / counts.size
11-
iters.times do
12-
x = ary.sample(3)[i]
13-
counts[x] += 1
14-
end
15-
chi_squared = 0.0
16-
counts.each do |count|
17-
chi_squared += (((count - expected) ** 2) * 1.0 / expected)
18-
end
19-
20-
# Chi squared critical values for tests with 4 degrees of freedom
21-
# Values obtained from NIST Engineering Statistic Handbook at
22-
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
23-
24-
chi_squared.should <= 9.488
25-
chi_squared.should >= 0.711
26-
end
6+
ArraySpecs.measure_sample_fairness(4, 1, 4000)
7+
ArraySpecs.measure_sample_fairness(4, 2, 4000)
8+
ArraySpecs.measure_sample_fairness(4, 3, 4000)
9+
ArraySpecs.measure_sample_fairness(40, 3, 4000)
10+
ArraySpecs.measure_sample_fairness(40, 4, 4000)
11+
ArraySpecs.measure_sample_fairness(40, 8, 4000)
12+
ArraySpecs.measure_sample_fairness(40, 16, 4000)
13+
ArraySpecs.measure_sample_fairness_large_sample_size(100, 80, 40000)
2714
end
2815

2916
it "returns nil for an empty Array" do

0 commit comments

Comments
 (0)