Skip to content

Commit 6274a92

Browse files
committed
[GR-17457] Array sample fix
PullRequest: truffleruby/3356
2 parents b0247d0 + 4c39a12 commit 6274a92

File tree

3 files changed

+74
-13
lines changed

3 files changed

+74
-13
lines changed

spec/ruby/core/array/fixtures/classes.rb

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,68 @@ def self.empty_recursive_array
4040
a
4141
end
4242

43+
# Chi squared critical values for tests with n degrees of freedom at 99% confidence.
44+
# Values obtained from NIST Engineering Statistic Handbook at
45+
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
46+
47+
CHI_SQUARED_CRITICAL_VALUES = [
48+
0,
49+
6.635, 9.210, 11.345, 13.277, 15.086, 16.812, 18.475, 20.090, 21.666, 23.209,
50+
24.725, 26.217, 27.688, 29.141, 30.578, 32.000, 33.409, 34.805, 36.191, 37.566,
51+
38.932, 40.289, 41.638, 42.980, 44.314, 45.642, 46.963, 48.278, 49.588, 50.892,
52+
52.191, 53.486, 54.776, 56.061, 57.342, 58.619, 59.893, 61.162, 62.428, 63.691,
53+
64.950, 66.206, 67.459, 68.710, 69.957, 71.201, 72.443, 73.683, 74.919, 76.154,
54+
77.386, 78.616, 79.843, 81.069, 82.292, 83.513, 84.733, 85.950, 87.166, 88.379,
55+
89.591, 90.802, 92.010, 93.217, 94.422, 95.626, 96.828, 98.028, 99.228, 100.425,
56+
101.621, 102.816, 104.010, 105.202, 106.393, 107.583, 108.771, 109.958, 111.144, 112.329,
57+
113.512, 114.695, 115.876, 117.057, 118.236, 119.414, 120.591, 121.767, 122.942, 124.116,
58+
125.289, 126.462, 127.633, 128.803, 129.973, 131.141, 132.309, 133.476, 134.642, 135.807,
59+
]
60+
61+
def self.measure_sample_fairness(size, samples, iters)
62+
ary = Array.new(size) { |x| x }
63+
(samples).times do |i|
64+
chi_results = []
65+
3.times do
66+
counts = Array.new(size) { 0 }
67+
expected = iters / size
68+
iters.times do
69+
x = ary.sample(samples)[i]
70+
counts[x] += 1
71+
end
72+
chi_squared = 0.0
73+
counts.each do |count|
74+
chi_squared += (((count - expected) ** 2) * 1.0 / expected)
75+
end
76+
chi_results << chi_squared
77+
break if chi_squared <= CHI_SQUARED_CRITICAL_VALUES[size]
78+
end
79+
80+
chi_results.min.should <= CHI_SQUARED_CRITICAL_VALUES[size]
81+
end
82+
end
83+
84+
def self.measure_sample_fairness_large_sample_size(size, samples, iters)
85+
ary = Array.new(size) { |x| x }
86+
counts = Array.new(size) { 0 }
87+
expected = iters * samples / size
88+
iters.times do
89+
ary.sample(samples).each do |sample|
90+
counts[sample] += 1
91+
end
92+
end
93+
chi_squared = 0.0
94+
counts.each do |count|
95+
chi_squared += (((count - expected) ** 2) * 1.0 / expected)
96+
end
97+
98+
# Chi squared critical values for tests with 4 degrees of freedom
99+
# Values obtained from NIST Engineering Statistic Handbook at
100+
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
101+
102+
chi_squared.should <= CHI_SQUARED_CRITICAL_VALUES[size]
103+
end
104+
43105
class MyArray < Array
44106
# The #initialize method has a different signature than Array to help
45107
# catch places in the specs that do not assert the #initialize is not

spec/ruby/core/array/sample_spec.rb

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,14 @@
33

44
describe "Array#sample" do
55
it "samples evenly" do
6-
ary = [0, 1, 2, 3]
7-
3.times do |i|
8-
counts = [0, 0, 0, 0]
9-
4000.times do
10-
counts[ary.sample(3)[i]] += 1
11-
end
12-
counts.each do |count|
13-
(800..1200).should include(count)
14-
end
15-
end
6+
ArraySpecs.measure_sample_fairness(4, 1, 4000)
7+
ArraySpecs.measure_sample_fairness(4, 2, 4000)
8+
ArraySpecs.measure_sample_fairness(4, 3, 4000)
9+
ArraySpecs.measure_sample_fairness(40, 3, 4000)
10+
ArraySpecs.measure_sample_fairness(40, 4, 4000)
11+
ArraySpecs.measure_sample_fairness(40, 8, 4000)
12+
ArraySpecs.measure_sample_fairness(40, 16, 4000)
13+
ArraySpecs.measure_sample_fairness_large_sample_size(100, 80, 40000)
1614
end
1715

1816
it "returns nil for an empty Array" do

src/main/ruby/truffleruby/core/array.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,11 +1073,12 @@ def sample(count = undefined, random: nil)
10731073
end
10741074

10751075
private def sample_many_swap(count, rng)
1076-
# linear dependence on array size, therefore very slow for small count / size
1076+
# linear dependence on count,
10771077
result = Array.new(self)
10781078

1079-
count.times do |c|
1080-
result.__send__ :swap, c, rng.rand(size)
1079+
count.times do |i|
1080+
r = i + rng.rand(result.size - i)
1081+
result.__send__ :swap, i, r
10811082
end
10821083

10831084
count == size ? result : result[0, count]

0 commit comments

Comments
 (0)