@@ -131,8 +131,8 @@ static inline zmm_t cmp_merge(zmm_t in1, zmm_t in2, opmask_t mask)
131
131
return vtype::mask_mov (min, mask, max); // 0 -> min, 1 -> max
132
132
}
133
133
/*
134
- * Parition one ZMM register based on the pivot and returns the index of the
135
- * last element that is less than equal to the pivot.
134
+ * Parition one ZMM register based on the pivot and returns the
135
+ * number of elements that are greater than or equal to the pivot.
136
136
*/
137
137
template <typename vtype, typename type_t , typename zmm_t >
138
138
static inline int32_t partition_vec (type_t *arr,
@@ -143,20 +143,20 @@ static inline int32_t partition_vec(type_t *arr,
143
143
zmm_t *smallest_vec,
144
144
zmm_t *biggest_vec)
145
145
{
146
- /* which elements are larger than the pivot */
147
- typename vtype::opmask_t gt_mask = vtype::ge (curr_vec, pivot_vec);
148
- int32_t amount_gt_pivot = _mm_popcnt_u32 ((int32_t )gt_mask );
146
+ /* which elements are larger than or equal to the pivot */
147
+ typename vtype::opmask_t ge_mask = vtype::ge (curr_vec, pivot_vec);
148
+ int32_t amount_ge_pivot = _mm_popcnt_u32 ((int32_t )ge_mask );
149
149
vtype::mask_compressstoreu (
150
- arr + left, vtype::knot_opmask (gt_mask ), curr_vec);
150
+ arr + left, vtype::knot_opmask (ge_mask ), curr_vec);
151
151
vtype::mask_compressstoreu (
152
- arr + right - amount_gt_pivot, gt_mask , curr_vec);
152
+ arr + right - amount_ge_pivot, ge_mask , curr_vec);
153
153
*smallest_vec = vtype::min (curr_vec, *smallest_vec);
154
154
*biggest_vec = vtype::max (curr_vec, *biggest_vec);
155
- return amount_gt_pivot ;
155
+ return amount_ge_pivot ;
156
156
}
157
157
/*
158
158
* Parition an array based on the pivot and returns the index of the
159
- * last element that is less than equal to the pivot.
159
+ * first element that is greater than or equal to the pivot.
160
160
*/
161
161
template <typename vtype, typename type_t >
162
162
static inline int64_t partition_avx512 (type_t *arr,
@@ -188,7 +188,7 @@ static inline int64_t partition_avx512(type_t *arr,
188
188
189
189
if (right - left == vtype::numlanes) {
190
190
zmm_t vec = vtype::loadu (arr + left);
191
- int32_t amount_gt_pivot = partition_vec<vtype>(arr,
191
+ int32_t amount_ge_pivot = partition_vec<vtype>(arr,
192
192
left,
193
193
left + vtype::numlanes,
194
194
vec,
@@ -197,7 +197,7 @@ static inline int64_t partition_avx512(type_t *arr,
197
197
&max_vec);
198
198
*smallest = vtype::reducemin (min_vec);
199
199
*biggest = vtype::reducemax (max_vec);
200
- return left + (vtype::numlanes - amount_gt_pivot );
200
+ return left + (vtype::numlanes - amount_ge_pivot );
201
201
}
202
202
203
203
// first and last vtype::numlanes values are partitioned at the end
@@ -225,7 +225,7 @@ static inline int64_t partition_avx512(type_t *arr,
225
225
left += vtype::numlanes;
226
226
}
227
227
// partition the current vector and save it on both sides of the array
228
- int32_t amount_gt_pivot
228
+ int32_t amount_ge_pivot
229
229
= partition_vec<vtype>(arr,
230
230
l_store,
231
231
r_store + vtype::numlanes,
@@ -234,27 +234,27 @@ static inline int64_t partition_avx512(type_t *arr,
234
234
&min_vec,
235
235
&max_vec);
236
236
;
237
- r_store -= amount_gt_pivot ;
238
- l_store += (vtype::numlanes - amount_gt_pivot );
237
+ r_store -= amount_ge_pivot ;
238
+ l_store += (vtype::numlanes - amount_ge_pivot );
239
239
}
240
240
241
241
/* partition and save vec_left and vec_right */
242
- int32_t amount_gt_pivot = partition_vec<vtype>(arr,
242
+ int32_t amount_ge_pivot = partition_vec<vtype>(arr,
243
243
l_store,
244
244
r_store + vtype::numlanes,
245
245
vec_left,
246
246
pivot_vec,
247
247
&min_vec,
248
248
&max_vec);
249
- l_store += (vtype::numlanes - amount_gt_pivot );
250
- amount_gt_pivot = partition_vec<vtype>(arr,
249
+ l_store += (vtype::numlanes - amount_ge_pivot );
250
+ amount_ge_pivot = partition_vec<vtype>(arr,
251
251
l_store,
252
252
l_store + vtype::numlanes,
253
253
vec_right,
254
254
pivot_vec,
255
255
&min_vec,
256
256
&max_vec);
257
- l_store += (vtype::numlanes - amount_gt_pivot );
257
+ l_store += (vtype::numlanes - amount_ge_pivot );
258
258
*smallest = vtype::reducemin (min_vec);
259
259
*biggest = vtype::reducemax (max_vec);
260
260
return l_store;
0 commit comments