Skip to content

Commit 0919c2b

Browse files
committed
Adapt pivot sampling to relative sought rank.
1 parent e6b7b02 commit 0919c2b

File tree

2 files changed

+249
-71
lines changed

2 files changed

+249
-71
lines changed

src/sort.rs

Lines changed: 215 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ where
2020
/// No other assumptions should be made on the ordering of the
2121
/// elements after this computation.
2222
///
23-
/// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
24-
/// - average case: O(`n`);
25-
/// - worst case: O(`n`^2);
26-
/// where n is the number of elements in the array.
23+
/// This method performs a variant of [Sesquickselect] with pivot samples of 5 equally spaced
24+
/// elements around the center.
25+
///
26+
/// [Sesquickselect]: https://www.wild-inter.net/publications/martinez-nebel-wild-2019.pdf
2727
///
2828
/// **Panics** if `i` is greater than or equal to `n`.
2929
fn get_from_sorted_mut(&mut self, i: usize) -> A
@@ -48,9 +48,65 @@ where
4848
S: DataMut,
4949
S2: Data<Elem = usize>;
5050

51-
/// Partitions the array in increasing order at two skewed pivot values as 1st and 3rd element
52-
/// of a sorted sample of 5 equally spaced elements around the center and returns their indexes.
53-
/// For arrays of less than 42 elements the outermost elements serve as sample for pivot values.
51+
/// Sorts a sample of 5 equally spaced elements around the center and returns their indexes.
52+
///
53+
/// **Panics** for arrays of less than 7 elements.
54+
fn sample_mut(&mut self) -> [usize; 5]
55+
where
56+
A: Ord + Clone,
57+
S: DataMut;
58+
59+
/// Partitions the array in increasing order based on the value initially
60+
/// located at `pivot_index` and returns the new index of the value.
61+
///
62+
/// The elements are rearranged in such a way that the value initially
63+
/// located at `pivot_index` is moved to the position it would be in an
64+
/// array sorted in increasing order. The return value is the new index of
65+
/// the value after rearrangement. All elements smaller than the value are
66+
/// moved to its left and all elements equal or greater than the value are
67+
/// moved to its right. The ordering of the elements in the two partitions
68+
/// is undefined.
69+
///
70+
/// `self` is shuffled **in place** to operate the desired partition:
71+
/// no copy of the array is allocated.
72+
///
73+
/// The method uses Hoare's partition algorithm.
74+
/// Complexity: O(`n`), where `n` is the number of elements in the array.
75+
/// Average number of element swaps: n/6 - 1/3 (see
76+
/// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550))
77+
///
78+
/// **Panics** if `pivot_index` is greater than or equal to `n`.
79+
///
80+
/// # Example
81+
///
82+
/// ```
83+
/// use ndarray::array;
84+
/// use ndarray_stats::Sort1dExt;
85+
///
86+
/// let mut data = array![3, 1, 4, 5, 2];
87+
/// let pivot_index = 2;
88+
/// let pivot_value = data[pivot_index];
89+
///
90+
/// // Partition by the value located at `pivot_index`.
91+
/// let new_index = data.partition_mut(pivot_index);
92+
/// // The pivot value is now located at `new_index`.
93+
/// assert_eq!(data[new_index], pivot_value);
94+
/// // Elements less than that value are moved to the left.
95+
/// for i in 0..new_index {
96+
/// assert!(data[i] < pivot_value);
97+
/// }
98+
/// // Elements greater than or equal to that value are moved to the right.
99+
/// for i in (new_index + 1)..data.len() {
100+
/// assert!(data[i] >= pivot_value);
101+
/// }
102+
/// ```
103+
fn partition_mut(&mut self, pivot_index: usize) -> usize
104+
where
105+
A: Ord + Clone,
106+
S: DataMut;
107+
108+
/// Partitions the array in increasing order based on the values initially located at the two
109+
/// pivot indexes `lower` and `upper` and returns the new indexes of their values.
54110
///
55111
/// The elements are rearranged in such a way that the two pivot values are moved to the indexes
56112
/// they would be in an array sorted in increasing order. The return values are the new indexes
@@ -60,22 +116,24 @@ where
60116
///
61117
/// The array is shuffled **in place**, no copy of the array is allocated.
62118
///
63-
/// This method performs [dual-pivot partitioning] with skewed pivot sampling.
119+
/// This method performs [dual-pivot partitioning].
64120
///
65121
/// [dual-pivot partitioning]: https://www.wild-inter.net/publications/wild-2018b.pdf
66122
///
123+
/// **Panics** if `lower` or `upper` is out of bound.
124+
///
67125
/// # Example
68126
///
69127
/// ```
70128
/// use ndarray::array;
71129
/// use ndarray_stats::Sort1dExt;
72130
///
73131
/// let mut data = array![3, 1, 4, 5, 2];
74-
/// // Sorted pivot values.
75-
/// let (lower_value, upper_value) = (data[data.len() - 1], data[0]);
132+
/// // Skewed pivot values.
133+
/// let (lower_value, upper_value) = (1, 5);
76134
///
77-
/// // Partitions by the values located at `0` and `data.len() - 1`.
78-
/// let (lower_index, upper_index) = data.partition_mut();
135+
/// // Partitions by the values located at `1` and `3`.
136+
/// let (lower_index, upper_index) = data.dual_partition_mut(1, 3);
79137
/// // The pivot values are now located at `lower_index` and `upper_index`.
80138
/// assert_eq!(data[lower_index], lower_value);
81139
/// assert_eq!(data[upper_index], upper_value);
@@ -94,7 +152,7 @@ where
94152
/// assert!(upper_value <= data[i]);
95153
/// }
96154
/// ```
97-
fn partition_mut(&mut self) -> (usize, usize)
155+
fn dual_partition_mut(&mut self, lower: usize, upper: usize) -> (usize, usize)
98156
where
99157
A: Ord + Clone,
100158
S: DataMut;
@@ -112,23 +170,67 @@ where
112170
S: DataMut,
113171
{
114172
let n = self.len();
115-
if n == 1 {
116-
self[0].clone()
173+
// Recursion cutoff at integer multiple of sample space divider of 7 elements.
174+
if n < 21 {
175+
for mut index in 1..n {
176+
while index > 0 && self[index - 1] > self[index] {
177+
self.swap(index - 1, index);
178+
index -= 1;
179+
}
180+
}
181+
self[i].clone()
117182
} else {
118-
let (lower_index, upper_index) = self.partition_mut();
119-
if i < lower_index {
120-
self.slice_axis_mut(Axis(0), Slice::from(..lower_index))
121-
.get_from_sorted_mut(i)
122-
} else if i == lower_index {
123-
self[i].clone()
124-
} else if i < upper_index {
125-
self.slice_axis_mut(Axis(0), Slice::from(lower_index + 1..upper_index))
126-
.get_from_sorted_mut(i - (lower_index + 1))
127-
} else if i == upper_index {
128-
self[i].clone()
183+
// Adapt pivot sampling to relative sought rank and switch from dual-pivot to
184+
// single-pivot partitioning for extreme sought ranks.
185+
let sought_rank = i as f64 / n as f64;
186+
if (0.036..=0.964).contains(&sought_rank) {
187+
let (lower_index, upper_index) = if sought_rank <= 0.5 {
188+
if sought_rank <= 0.153 {
189+
(0, 1) // (0, 0, 3)
190+
} else {
191+
(0, 2) // (0, 1, 2)
192+
}
193+
} else {
194+
if sought_rank <= 0.847 {
195+
(2, 4) // (2, 1, 0)
196+
} else {
197+
(3, 4) // (3, 0, 0)
198+
}
199+
};
200+
let sample = self.sample_mut();
201+
let (lower_index, upper_index) =
202+
self.dual_partition_mut(sample[lower_index], sample[upper_index]);
203+
if i < lower_index {
204+
self.slice_axis_mut(Axis(0), Slice::from(..lower_index))
205+
.get_from_sorted_mut(i)
206+
} else if i == lower_index {
207+
self[i].clone()
208+
} else if i < upper_index {
209+
self.slice_axis_mut(Axis(0), Slice::from(lower_index + 1..upper_index))
210+
.get_from_sorted_mut(i - (lower_index + 1))
211+
} else if i == upper_index {
212+
self[i].clone()
213+
} else {
214+
self.slice_axis_mut(Axis(0), Slice::from(upper_index + 1..))
215+
.get_from_sorted_mut(i - (upper_index + 1))
216+
}
129217
} else {
130-
self.slice_axis_mut(Axis(0), Slice::from(upper_index + 1..))
131-
.get_from_sorted_mut(i - (upper_index + 1))
218+
let sample = self.sample_mut();
219+
let pivot_index = if sought_rank <= 0.5 {
220+
0 // (0, 4)
221+
} else {
222+
4 // (4, 0)
223+
};
224+
let pivot_index = self.partition_mut(sample[pivot_index]);
225+
if i < pivot_index {
226+
self.slice_axis_mut(Axis(0), Slice::from(..pivot_index))
227+
.get_from_sorted_mut(i)
228+
} else if i == pivot_index {
229+
self[i].clone()
230+
} else {
231+
self.slice_axis_mut(Axis(0), Slice::from(pivot_index + 1..))
232+
.get_from_sorted_mut(i - (pivot_index + 1))
233+
}
132234
}
133235
}
134236
}
@@ -146,38 +248,80 @@ where
146248
get_many_from_sorted_mut_unchecked(self, &deduped_indexes)
147249
}
148250

149-
fn partition_mut(&mut self) -> (usize, usize)
251+
fn sample_mut(&mut self) -> [usize; 5]
150252
where
151253
A: Ord + Clone,
152254
S: DataMut,
153255
{
154-
let lowermost = 0;
155-
let uppermost = self.len() - 1;
156-
if self.len() < 42 {
157-
// Sort outermost elements and use them as pivots.
158-
if self[lowermost] > self[uppermost] {
159-
self.swap(lowermost, uppermost);
256+
// Space between sample indexes.
257+
let space = self.len() / 7;
258+
assert!(space > 0, "Cannot sample array of less then 7 elements");
259+
// Initialize sample indexes with their lowermost index.
260+
let mut samples = [self.len() / 2 - 2 * space; 5];
261+
// Equally space sample indexes and sort by their values by looking up their indexes.
262+
for mut index in 1..samples.len() {
263+
// Equally space sample indexes based on their lowermost index.
264+
samples[index] += index * space;
265+
// Insertion sort looking up only the already equally spaced sample indexes.
266+
while index > 0 && self[samples[index - 1]] > self[samples[index]] {
267+
self.swap(samples[index - 1], samples[index]);
268+
index -= 1;
160269
}
161-
} else {
162-
// Sample indexes of 5 evenly spaced elements around the center element.
163-
let mut samples = [0; 5];
164-
// Assume array of at least 7 elements.
165-
let seventh = self.len() / (samples.len() + 2);
166-
samples[2] = self.len() / 2;
167-
samples[1] = samples[2] - seventh;
168-
samples[0] = samples[1] - seventh;
169-
samples[3] = samples[2] + seventh;
170-
samples[4] = samples[3] + seventh;
171-
// Use insertion sort for sample elements by looking up their indexes.
172-
for mut index in 1..samples.len() {
173-
while index > 0 && self[samples[index - 1]] > self[samples[index]] {
174-
self.swap(samples[index - 1], samples[index]);
175-
index -= 1;
270+
}
271+
samples
272+
}
273+
274+
fn partition_mut(&mut self, pivot_index: usize) -> usize
275+
where
276+
A: Ord + Clone,
277+
S: DataMut,
278+
{
279+
let pivot_value = self[pivot_index].clone();
280+
self.swap(pivot_index, 0);
281+
let n = self.len();
282+
let mut i = 1;
283+
let mut j = n - 1;
284+
loop {
285+
loop {
286+
if i > j {
287+
break;
176288
}
289+
if self[i] >= pivot_value {
290+
break;
291+
}
292+
i += 1;
293+
}
294+
while pivot_value <= self[j] {
295+
if j == 1 {
296+
break;
297+
}
298+
j -= 1;
299+
}
300+
if i >= j {
301+
break;
302+
} else {
303+
self.swap(i, j);
304+
i += 1;
305+
j -= 1;
177306
}
178-
// Use 1st and 3rd element of sorted sample as skewed pivots.
179-
self.swap(lowermost, samples[0]);
180-
self.swap(uppermost, samples[2]);
307+
}
308+
self.swap(0, i - 1);
309+
i - 1
310+
}
311+
312+
fn dual_partition_mut(&mut self, lower: usize, upper: usize) -> (usize, usize)
313+
where
314+
A: Ord + Clone,
315+
S: DataMut,
316+
{
317+
let lowermost = 0;
318+
let uppermost = self.len() - 1;
319+
// Swap pivots with outermost elements.
320+
self.swap(lowermost, lower);
321+
self.swap(uppermost, upper);
322+
if self[lowermost] > self[uppermost] {
323+
// Sort pivots instead of panicking via assertion.
324+
self.swap(lowermost, uppermost);
181325
}
182326
// Increasing running and partition index starting after lower pivot.
183327
let mut index = lowermost + 1;
@@ -274,16 +418,26 @@ fn _get_many_from_sorted_mut_unchecked<A>(
274418
return;
275419
}
276420

277-
// At this point, `n >= 1` since `indexes.len() >= 1`.
278-
if n == 1 {
279-
// We can only reach this point if `indexes.len() == 1`, so we only
280-
// need to assign the single value, and then we're done.
281-
debug_assert_eq!(indexes.len(), 1);
282-
values[0] = array[0].clone();
421+
// Recursion cutoff at integer multiple of sample space divider of 7 elements.
422+
if n < 21 {
423+
for mut index in 1..n {
424+
while index > 0 && array[index - 1] > array[index] {
425+
array.swap(index - 1, index);
426+
index -= 1;
427+
}
428+
}
429+
for (value, index) in values.iter_mut().zip(indexes.iter()) {
430+
*value = array[*index].clone();
431+
}
283432
return;
284433
}
285434

286-
// We partition the array with respect to the two pivot values. The pivot values move to
435+
// Since there is no single sought rank to adapt pivot sampling to, the recommended skewed pivot
436+
// sampling of dual-pivot Quicksort is used.
437+
let sample = array.sample_mut();
438+
let (lower_index, upper_index) = (sample[0], sample[2]); // (0, 1, 2)
439+
440+
// We partition the array with respect to the two pivot values. The pivot values move to the new
287441
// `lower_index` and `upper_index`.
288442
//
289443
// Elements strictly less than the lower pivot value have indexes < `lower_index`.
@@ -292,7 +446,7 @@ fn _get_many_from_sorted_mut_unchecked<A>(
292446
// value have indexes > `lower_index` and < `upper_index`.
293447
//
294448
// Elements less than or equal the upper pivot value have indexes > `upper_index`.
295-
let (lower_index, upper_index) = array.partition_mut();
449+
let (lower_index, upper_index) = array.dual_partition_mut(lower_index, upper_index);
296450

297451
// We use a divide-and-conquer strategy, splitting the indexes we are searching for (`indexes`)
298452
// and the corresponding portions of the output slice (`values`) into partitions with respect to

0 commit comments

Comments
 (0)