@@ -20,10 +20,10 @@ where
20
20
/// No other assumptions should be made on the ordering of the
21
21
/// elements after this computation.
22
22
///
23
- /// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
24
- /// - average case: O(`n`);
25
- /// - worst case: O(`n`^2);
26
- /// where n is the number of elements in the array.
23
+ /// This method performs a variant of [Sesquickselect] with pivot samples of 5 equally spaced
24
+ /// elements around the center.
25
+ ///
26
+ /// [Sesquickselect]: https://www.wild-inter.net/publications/martinez-nebel-wild-2019.pdf
27
27
///
28
28
/// **Panics** if `i` is greater than or equal to `n`.
29
29
fn get_from_sorted_mut ( & mut self , i : usize ) -> A
48
48
S : DataMut ,
49
49
S2 : Data < Elem = usize > ;
50
50
51
- /// Partitions the array in increasing order at two skewed pivot values as 1st and 3rd element
52
- /// of a sorted sample of 5 equally spaced elements around the center and returns their indexes.
53
- /// For arrays of less than 42 elements the outermost elements serve as sample for pivot values.
51
+ /// Sorts a sample of 5 equally spaced elements around the center and returns their indexes.
52
+ ///
53
+ /// **Panics** for arrays of less than 7 elements.
54
+ fn sample_mut ( & mut self ) -> [ usize ; 5 ]
55
+ where
56
+ A : Ord + Clone ,
57
+ S : DataMut ;
58
+
59
+ /// Partitions the array in increasing order based on the value initially
60
+ /// located at `pivot_index` and returns the new index of the value.
61
+ ///
62
+ /// The elements are rearranged in such a way that the value initially
63
+ /// located at `pivot_index` is moved to the position it would be in an
64
+ /// array sorted in increasing order. The return value is the new index of
65
+ /// the value after rearrangement. All elements smaller than the value are
66
+ /// moved to its left and all elements equal or greater than the value are
67
+ /// moved to its right. The ordering of the elements in the two partitions
68
+ /// is undefined.
69
+ ///
70
+ /// `self` is shuffled **in place** to operate the desired partition:
71
+ /// no copy of the array is allocated.
72
+ ///
73
+ /// The method uses Hoare's partition algorithm.
74
+ /// Complexity: O(`n`), where `n` is the number of elements in the array.
75
+ /// Average number of element swaps: n/6 - 1/3 (see
76
+ /// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550))
77
+ ///
78
+ /// **Panics** if `pivot_index` is greater than or equal to `n`.
79
+ ///
80
+ /// # Example
81
+ ///
82
+ /// ```
83
+ /// use ndarray::array;
84
+ /// use ndarray_stats::Sort1dExt;
85
+ ///
86
+ /// let mut data = array![3, 1, 4, 5, 2];
87
+ /// let pivot_index = 2;
88
+ /// let pivot_value = data[pivot_index];
89
+ ///
90
+ /// // Partition by the value located at `pivot_index`.
91
+ /// let new_index = data.partition_mut(pivot_index);
92
+ /// // The pivot value is now located at `new_index`.
93
+ /// assert_eq!(data[new_index], pivot_value);
94
+ /// // Elements less than that value are moved to the left.
95
+ /// for i in 0..new_index {
96
+ /// assert!(data[i] < pivot_value);
97
+ /// }
98
+ /// // Elements greater than or equal to that value are moved to the right.
99
+ /// for i in (new_index + 1)..data.len() {
100
+ /// assert!(data[i] >= pivot_value);
101
+ /// }
102
+ /// ```
103
+ fn partition_mut ( & mut self , pivot_index : usize ) -> usize
104
+ where
105
+ A : Ord + Clone ,
106
+ S : DataMut ;
107
+
108
+ /// Partitions the array in increasing order based on the values initially located at the two
109
+ /// pivot indexes `lower` and `upper` and returns the new indexes of their values.
54
110
///
55
111
/// The elements are rearranged in such a way that the two pivot values are moved to the indexes
56
112
/// they would be in an array sorted in increasing order. The return values are the new indexes
@@ -60,22 +116,24 @@ where
60
116
///
61
117
/// The array is shuffled **in place**, no copy of the array is allocated.
62
118
///
63
- /// This method performs [dual-pivot partitioning] with skewed pivot sampling .
119
+ /// This method performs [dual-pivot partitioning].
64
120
///
65
121
/// [dual-pivot partitioning]: https://www.wild-inter.net/publications/wild-2018b.pdf
66
122
///
123
+ /// **Panics** if `lower` or `upper` is out of bound.
124
+ ///
67
125
/// # Example
68
126
///
69
127
/// ```
70
128
/// use ndarray::array;
71
129
/// use ndarray_stats::Sort1dExt;
72
130
///
73
131
/// let mut data = array![3, 1, 4, 5, 2];
74
- /// // Sorted pivot values.
75
- /// let (lower_value, upper_value) = (data[data.len() - 1], data[0] );
132
+ /// // Skewed pivot values.
133
+ /// let (lower_value, upper_value) = (1, 5 );
76
134
///
77
- /// // Partitions by the values located at `0 ` and `data.len() - 1 `.
78
- /// let (lower_index, upper_index) = data.partition_mut( );
135
+ /// // Partitions by the values located at `1 ` and `3 `.
136
+ /// let (lower_index, upper_index) = data.dual_partition_mut(1, 3 );
79
137
/// // The pivot values are now located at `lower_index` and `upper_index`.
80
138
/// assert_eq!(data[lower_index], lower_value);
81
139
/// assert_eq!(data[upper_index], upper_value);
94
152
/// assert!(upper_value <= data[i]);
95
153
/// }
96
154
/// ```
97
- fn partition_mut ( & mut self ) -> ( usize , usize )
155
+ fn dual_partition_mut ( & mut self , lower : usize , upper : usize ) -> ( usize , usize )
98
156
where
99
157
A : Ord + Clone ,
100
158
S : DataMut ;
@@ -112,23 +170,67 @@ where
112
170
S : DataMut ,
113
171
{
114
172
let n = self . len ( ) ;
115
- if n == 1 {
116
- self [ 0 ] . clone ( )
173
+ // Recursion cutoff at integer multiple of sample space divider of 7 elements.
174
+ if n < 21 {
175
+ for mut index in 1 ..n {
176
+ while index > 0 && self [ index - 1 ] > self [ index] {
177
+ self . swap ( index - 1 , index) ;
178
+ index -= 1 ;
179
+ }
180
+ }
181
+ self [ i] . clone ( )
117
182
} else {
118
- let ( lower_index, upper_index) = self . partition_mut ( ) ;
119
- if i < lower_index {
120
- self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( ..lower_index) )
121
- . get_from_sorted_mut ( i)
122
- } else if i == lower_index {
123
- self [ i] . clone ( )
124
- } else if i < upper_index {
125
- self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( lower_index + 1 ..upper_index) )
126
- . get_from_sorted_mut ( i - ( lower_index + 1 ) )
127
- } else if i == upper_index {
128
- self [ i] . clone ( )
183
+ // Adapt pivot sampling to relative sought rank and switch from dual-pivot to
184
+ // single-pivot partitioning for extreme sought ranks.
185
+ let sought_rank = i as f64 / n as f64 ;
186
+ if ( 0.036 ..=0.964 ) . contains ( & sought_rank) {
187
+ let ( lower_index, upper_index) = if sought_rank <= 0.5 {
188
+ if sought_rank <= 0.153 {
189
+ ( 0 , 1 ) // (0, 0, 3)
190
+ } else {
191
+ ( 0 , 2 ) // (0, 1, 2)
192
+ }
193
+ } else {
194
+ if sought_rank <= 0.847 {
195
+ ( 2 , 4 ) // (2, 1, 0)
196
+ } else {
197
+ ( 3 , 4 ) // (3, 0, 0)
198
+ }
199
+ } ;
200
+ let sample = self . sample_mut ( ) ;
201
+ let ( lower_index, upper_index) =
202
+ self . dual_partition_mut ( sample[ lower_index] , sample[ upper_index] ) ;
203
+ if i < lower_index {
204
+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( ..lower_index) )
205
+ . get_from_sorted_mut ( i)
206
+ } else if i == lower_index {
207
+ self [ i] . clone ( )
208
+ } else if i < upper_index {
209
+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( lower_index + 1 ..upper_index) )
210
+ . get_from_sorted_mut ( i - ( lower_index + 1 ) )
211
+ } else if i == upper_index {
212
+ self [ i] . clone ( )
213
+ } else {
214
+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( upper_index + 1 ..) )
215
+ . get_from_sorted_mut ( i - ( upper_index + 1 ) )
216
+ }
129
217
} else {
130
- self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( upper_index + 1 ..) )
131
- . get_from_sorted_mut ( i - ( upper_index + 1 ) )
218
+ let sample = self . sample_mut ( ) ;
219
+ let pivot_index = if sought_rank <= 0.5 {
220
+ 0 // (0, 4)
221
+ } else {
222
+ 4 // (4, 0)
223
+ } ;
224
+ let pivot_index = self . partition_mut ( sample[ pivot_index] ) ;
225
+ if i < pivot_index {
226
+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( ..pivot_index) )
227
+ . get_from_sorted_mut ( i)
228
+ } else if i == pivot_index {
229
+ self [ i] . clone ( )
230
+ } else {
231
+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( pivot_index + 1 ..) )
232
+ . get_from_sorted_mut ( i - ( pivot_index + 1 ) )
233
+ }
132
234
}
133
235
}
134
236
}
@@ -146,38 +248,80 @@ where
146
248
get_many_from_sorted_mut_unchecked ( self , & deduped_indexes)
147
249
}
148
250
149
- fn partition_mut ( & mut self ) -> ( usize , usize )
251
+ fn sample_mut ( & mut self ) -> [ usize ; 5 ]
150
252
where
151
253
A : Ord + Clone ,
152
254
S : DataMut ,
153
255
{
154
- let lowermost = 0 ;
155
- let uppermost = self . len ( ) - 1 ;
156
- if self . len ( ) < 42 {
157
- // Sort outermost elements and use them as pivots.
158
- if self [ lowermost] > self [ uppermost] {
159
- self . swap ( lowermost, uppermost) ;
256
+ // Space between sample indexes.
257
+ let space = self . len ( ) / 7 ;
258
+ assert ! ( space > 0 , "Cannot sample array of less then 7 elements" ) ;
259
+ // Initialize sample indexes with their lowermost index.
260
+ let mut samples = [ self . len ( ) / 2 - 2 * space; 5 ] ;
261
+ // Equally space sample indexes and sort by their values by looking up their indexes.
262
+ for mut index in 1 ..samples. len ( ) {
263
+ // Equally space sample indexes based on their lowermost index.
264
+ samples[ index] += index * space;
265
+ // Insertion sort looking up only the already equally spaced sample indexes.
266
+ while index > 0 && self [ samples[ index - 1 ] ] > self [ samples[ index] ] {
267
+ self . swap ( samples[ index - 1 ] , samples[ index] ) ;
268
+ index -= 1 ;
160
269
}
161
- } else {
162
- // Sample indexes of 5 evenly spaced elements around the center element.
163
- let mut samples = [ 0 ; 5 ] ;
164
- // Assume array of at least 7 elements.
165
- let seventh = self . len ( ) / ( samples. len ( ) + 2 ) ;
166
- samples[ 2 ] = self . len ( ) / 2 ;
167
- samples[ 1 ] = samples[ 2 ] - seventh;
168
- samples[ 0 ] = samples[ 1 ] - seventh;
169
- samples[ 3 ] = samples[ 2 ] + seventh;
170
- samples[ 4 ] = samples[ 3 ] + seventh;
171
- // Use insertion sort for sample elements by looking up their indexes.
172
- for mut index in 1 ..samples. len ( ) {
173
- while index > 0 && self [ samples[ index - 1 ] ] > self [ samples[ index] ] {
174
- self . swap ( samples[ index - 1 ] , samples[ index] ) ;
175
- index -= 1 ;
270
+ }
271
+ samples
272
+ }
273
+
274
+ fn partition_mut ( & mut self , pivot_index : usize ) -> usize
275
+ where
276
+ A : Ord + Clone ,
277
+ S : DataMut ,
278
+ {
279
+ let pivot_value = self [ pivot_index] . clone ( ) ;
280
+ self . swap ( pivot_index, 0 ) ;
281
+ let n = self . len ( ) ;
282
+ let mut i = 1 ;
283
+ let mut j = n - 1 ;
284
+ loop {
285
+ loop {
286
+ if i > j {
287
+ break ;
176
288
}
289
+ if self [ i] >= pivot_value {
290
+ break ;
291
+ }
292
+ i += 1 ;
293
+ }
294
+ while pivot_value <= self [ j] {
295
+ if j == 1 {
296
+ break ;
297
+ }
298
+ j -= 1 ;
299
+ }
300
+ if i >= j {
301
+ break ;
302
+ } else {
303
+ self . swap ( i, j) ;
304
+ i += 1 ;
305
+ j -= 1 ;
177
306
}
178
- // Use 1st and 3rd element of sorted sample as skewed pivots.
179
- self . swap ( lowermost, samples[ 0 ] ) ;
180
- self . swap ( uppermost, samples[ 2 ] ) ;
307
+ }
308
+ self . swap ( 0 , i - 1 ) ;
309
+ i - 1
310
+ }
311
+
312
+ fn dual_partition_mut ( & mut self , lower : usize , upper : usize ) -> ( usize , usize )
313
+ where
314
+ A : Ord + Clone ,
315
+ S : DataMut ,
316
+ {
317
+ let lowermost = 0 ;
318
+ let uppermost = self . len ( ) - 1 ;
319
+ // Swap pivots with outermost elements.
320
+ self . swap ( lowermost, lower) ;
321
+ self . swap ( uppermost, upper) ;
322
+ if self [ lowermost] > self [ uppermost] {
323
+ // Sort pivots instead of panicking via assertion.
324
+ self . swap ( lowermost, uppermost) ;
181
325
}
182
326
// Increasing running and partition index starting after lower pivot.
183
327
let mut index = lowermost + 1 ;
@@ -274,16 +418,26 @@ fn _get_many_from_sorted_mut_unchecked<A>(
274
418
return ;
275
419
}
276
420
277
- // At this point, `n >= 1` since `indexes.len() >= 1`.
278
- if n == 1 {
279
- // We can only reach this point if `indexes.len() == 1`, so we only
280
- // need to assign the single value, and then we're done.
281
- debug_assert_eq ! ( indexes. len( ) , 1 ) ;
282
- values[ 0 ] = array[ 0 ] . clone ( ) ;
421
+ // Recursion cutoff at integer multiple of sample space divider of 7 elements.
422
+ if n < 21 {
423
+ for mut index in 1 ..n {
424
+ while index > 0 && array[ index - 1 ] > array[ index] {
425
+ array. swap ( index - 1 , index) ;
426
+ index -= 1 ;
427
+ }
428
+ }
429
+ for ( value, index) in values. iter_mut ( ) . zip ( indexes. iter ( ) ) {
430
+ * value = array[ * index] . clone ( ) ;
431
+ }
283
432
return ;
284
433
}
285
434
286
- // We partition the array with respect to the two pivot values. The pivot values move to
435
+ // Since there is no single sought rank to adapt pivot sampling to, the recommended skewed pivot
436
+ // sampling of dual-pivot Quicksort is used.
437
+ let sample = array. sample_mut ( ) ;
438
+ let ( lower_index, upper_index) = ( sample[ 0 ] , sample[ 2 ] ) ; // (0, 1, 2)
439
+
440
+ // We partition the array with respect to the two pivot values. The pivot values move to the new
287
441
// `lower_index` and `upper_index`.
288
442
//
289
443
// Elements strictly less than the lower pivot value have indexes < `lower_index`.
@@ -292,7 +446,7 @@ fn _get_many_from_sorted_mut_unchecked<A>(
292
446
// value have indexes > `lower_index` and < `upper_index`.
293
447
//
294
448
// Elements less than or equal the upper pivot value have indexes > `upper_index`.
295
- let ( lower_index, upper_index) = array. partition_mut ( ) ;
449
+ let ( lower_index, upper_index) = array. dual_partition_mut ( lower_index , upper_index ) ;
296
450
297
451
// We use a divide-and-conquer strategy, splitting the indexes we are searching for (`indexes`)
298
452
// and the corresponding portions of the output slice (`values`) into partitions with respect to
0 commit comments