@@ -427,6 +427,14 @@ static inline xfarray_idx_t *xfarray_sortinfo_hi(struct xfarray_sortinfo *si)
427
427
return xfarray_sortinfo_lo (si ) + si -> max_stack_depth ;
428
428
}
429
429
430
+ /* Size of each element in the quicksort pivot array. */
431
+ static inline size_t
432
+ xfarray_pivot_rec_sz (
433
+ struct xfarray * array )
434
+ {
435
+ return round_up (array -> obj_size , 8 ) + sizeof (xfarray_idx_t );
436
+ }
437
+
430
438
/* Allocate memory to handle the sort. */
431
439
static inline int
432
440
xfarray_sortinfo_alloc (
@@ -437,8 +445,16 @@ xfarray_sortinfo_alloc(
437
445
{
438
446
struct xfarray_sortinfo * si ;
439
447
size_t nr_bytes = sizeof (struct xfarray_sortinfo );
448
+ size_t pivot_rec_sz = xfarray_pivot_rec_sz (array );
440
449
int max_stack_depth ;
441
450
451
+ /*
452
+ * The median-of-nine pivot algorithm doesn't work if a subset has
453
+ * fewer than 9 items. Make sure the in-memory sort will always take
454
+ * over for subsets where this wouldn't be the case.
455
+ */
456
+ BUILD_BUG_ON (XFARRAY_QSORT_PIVOT_NR >= XFARRAY_ISORT_NR );
457
+
442
458
/*
443
459
* Tail-call recursion during the partitioning phase means that
444
460
* quicksort will never recurse more than log2(nr) times. We need one
@@ -453,8 +469,10 @@ xfarray_sortinfo_alloc(
453
469
/* Each level of quicksort uses a lo and a hi index */
454
470
nr_bytes += max_stack_depth * sizeof (xfarray_idx_t ) * 2 ;
455
471
456
- /* Scratchpad for in-memory sort, or one record for the pivot */
457
- nr_bytes += (XFARRAY_ISORT_NR * array -> obj_size );
472
+ /* Scratchpad for in-memory sort, or finding the pivot */
473
+ nr_bytes += max_t (size_t ,
474
+ (XFARRAY_QSORT_PIVOT_NR + 1 ) * pivot_rec_sz ,
475
+ XFARRAY_ISORT_NR * array -> obj_size );
458
476
459
477
si = kvzalloc (nr_bytes , XCHK_GFP_FLAGS );
460
478
if (!si )
@@ -632,91 +650,143 @@ static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
632
650
return xfarray_sortinfo_hi (si ) + si -> max_stack_depth ;
633
651
}
634
652
653
+ /* Return a pointer to the start of the pivot array. */
654
+ static inline void *
655
+ xfarray_sortinfo_pivot_array (
656
+ struct xfarray_sortinfo * si )
657
+ {
658
+ return xfarray_sortinfo_pivot (si ) + si -> array -> obj_size ;
659
+ }
660
+
661
+ /* The xfarray record is stored at the start of each pivot array element. */
662
+ static inline void *
663
+ xfarray_pivot_array_rec (
664
+ void * pa ,
665
+ size_t pa_recsz ,
666
+ unsigned int pa_idx )
667
+ {
668
+ return pa + (pa_recsz * pa_idx );
669
+ }
670
+
671
+ /* The xfarray index is stored at the end of each pivot array element. */
672
+ static inline xfarray_idx_t *
673
+ xfarray_pivot_array_idx (
674
+ void * pa ,
675
+ size_t pa_recsz ,
676
+ unsigned int pa_idx )
677
+ {
678
+ return xfarray_pivot_array_rec (pa , pa_recsz , pa_idx + 1 ) -
679
+ sizeof (xfarray_idx_t );
680
+ }
681
+
635
682
/*
636
683
* Find a pivot value for quicksort partitioning, swap it with a[lo], and save
637
684
* the cached pivot record for the next step.
638
685
*
639
- * Select the median value from a[lo], a[mid], and a[hi]. Put the median in
640
- * a[lo], the lowest in a[mid], and the highest in a[hi] . Using the median of
641
- * the three reduces the chances that we pick the worst case pivot value, since
642
- * it's likely that our array values are nearly sorted.
686
+ * Load evenly-spaced records within the given range into memory, sort them,
687
+ * and choose the pivot from the median record . Using multiple points will
688
+ * improve the quality of the pivot selection, and hopefully avoid the worst
689
+ * quicksort behavior, since our array values are nearly always evenly sorted.
643
690
*/
644
691
STATIC int
645
692
xfarray_qsort_pivot (
646
693
struct xfarray_sortinfo * si ,
647
694
xfarray_idx_t lo ,
648
695
xfarray_idx_t hi )
649
696
{
650
- void * a = xfarray_sortinfo_pivot (si );
651
- void * b = xfarray_scratch (si -> array );
652
- xfarray_idx_t mid = lo + ((hi - lo ) / 2 );
697
+ void * pivot = xfarray_sortinfo_pivot (si );
698
+ void * parray = xfarray_sortinfo_pivot_array (si );
699
+ void * recp ;
700
+ xfarray_idx_t * idxp ;
701
+ xfarray_idx_t step = (hi - lo ) / (XFARRAY_QSORT_PIVOT_NR - 1 );
702
+ size_t pivot_rec_sz = xfarray_pivot_rec_sz (si -> array );
703
+ int i , j ;
653
704
int error ;
654
705
655
- /* if a[mid] < a[lo], swap a[mid] and a[lo]. */
656
- error = xfarray_sort_load (si , mid , a );
657
- if (error )
658
- return error ;
659
- error = xfarray_sort_load (si , lo , b );
660
- if (error )
661
- return error ;
662
- if (xfarray_sort_cmp (si , a , b ) < 0 ) {
663
- error = xfarray_sort_store (si , lo , a );
664
- if (error )
665
- return error ;
666
- error = xfarray_sort_store (si , mid , b );
667
- if (error )
668
- return error ;
669
- }
706
+ ASSERT (step > 0 );
670
707
671
- /* if a[hi] < a[mid], swap a[mid] and a[hi]. */
672
- error = xfarray_sort_load (si , hi , a );
673
- if (error )
674
- return error ;
675
- error = xfarray_sort_load (si , mid , b );
676
- if (error )
677
- return error ;
678
- if (xfarray_sort_cmp (si , a , b ) < 0 ) {
679
- error = xfarray_sort_store (si , mid , a );
680
- if (error )
681
- return error ;
682
- error = xfarray_sort_store (si , hi , b );
683
- if (error )
684
- return error ;
685
- } else {
686
- goto move_front ;
708
+ /*
709
+ * Load the xfarray indexes of the records we intend to sample into the
710
+ * pivot array.
711
+ */
712
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , 0 );
713
+ * idxp = lo ;
714
+ for (i = 1 ; i < XFARRAY_QSORT_PIVOT_NR - 1 ; i ++ ) {
715
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , i );
716
+ * idxp = lo + (i * step );
687
717
}
718
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz ,
719
+ XFARRAY_QSORT_PIVOT_NR - 1 );
720
+ * idxp = hi ;
688
721
689
- /* if a[mid] < a[lo], swap a[mid] and a[lo]. */
690
- error = xfarray_sort_load (si , mid , a );
691
- if (error )
692
- return error ;
693
- error = xfarray_sort_load (si , lo , b );
694
- if (error )
695
- return error ;
696
- if (xfarray_sort_cmp (si , a , b ) < 0 ) {
697
- error = xfarray_sort_store (si , lo , a );
698
- if (error )
699
- return error ;
700
- error = xfarray_sort_store (si , mid , b );
722
+ /* Load the selected xfarray records into the pivot array. */
723
+ for (i = 0 ; i < XFARRAY_QSORT_PIVOT_NR ; i ++ ) {
724
+ xfarray_idx_t idx ;
725
+
726
+ recp = xfarray_pivot_array_rec (parray , pivot_rec_sz , i );
727
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , i );
728
+
729
+ /* No unset records; load directly into the array. */
730
+ if (likely (si -> array -> unset_slots == 0 )) {
731
+ error = xfarray_sort_load (si , * idxp , recp );
732
+ if (error )
733
+ return error ;
734
+ continue ;
735
+ }
736
+
737
+ /*
738
+ * Load non-null records into the scratchpad without changing
739
+ * the xfarray_idx_t in the pivot array.
740
+ */
741
+ idx = * idxp ;
742
+ xfarray_sort_bump_loads (si );
743
+ error = xfarray_load_next (si -> array , & idx , recp );
701
744
if (error )
702
745
return error ;
703
746
}
704
747
705
- move_front :
748
+ xfarray_sort_bump_heapsorts (si );
749
+ sort (parray , XFARRAY_QSORT_PIVOT_NR , pivot_rec_sz , si -> cmp_fn , NULL );
750
+
706
751
/*
707
- * Move our selected pivot to a[lo]. Recall that a == si->pivot, so
708
- * this leaves us with the pivot cached in the sortinfo structure.
752
+ * We sorted the pivot array records (which includes the xfarray
753
+ * indices) in xfarray record order. The median element of the pivot
754
+ * array contains the xfarray record that we will use as the pivot.
755
+ * Copy that xfarray record to the designated space.
709
756
*/
710
- error = xfarray_sort_load (si , lo , b );
711
- if (error )
712
- return error ;
713
- error = xfarray_sort_load (si , mid , a );
714
- if (error )
715
- return error ;
716
- error = xfarray_sort_store (si , mid , b );
757
+ recp = xfarray_pivot_array_rec (parray , pivot_rec_sz ,
758
+ XFARRAY_QSORT_PIVOT_NR / 2 );
759
+ memcpy (pivot , recp , si -> array -> obj_size );
760
+
761
+ /* If the pivot record we chose was already in a[lo] then we're done. */
762
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz ,
763
+ XFARRAY_QSORT_PIVOT_NR / 2 );
764
+ if (* idxp == lo )
765
+ return 0 ;
766
+
767
+ /*
768
+ * Find the cached copy of a[lo] in the pivot array so that we can swap
769
+ * a[lo] and a[pivot].
770
+ */
771
+ for (i = 0 , j = -1 ; i < XFARRAY_QSORT_PIVOT_NR ; i ++ ) {
772
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , i );
773
+ if (* idxp == lo )
774
+ j = i ;
775
+ }
776
+ if (j < 0 ) {
777
+ ASSERT (j >= 0 );
778
+ return - EFSCORRUPTED ;
779
+ }
780
+
781
+ /* Swap a[lo] and a[pivot]. */
782
+ error = xfarray_sort_store (si , lo , pivot );
717
783
if (error )
718
784
return error ;
719
- return xfarray_sort_store (si , lo , a );
785
+
786
+ recp = xfarray_pivot_array_rec (parray , pivot_rec_sz , j );
787
+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz ,
788
+ XFARRAY_QSORT_PIVOT_NR / 2 );
789
+ return xfarray_sort_store (si , * idxp , recp );
720
790
}
721
791
722
792
/*
@@ -828,7 +898,7 @@ xfarray_sort_load_cached(
828
898
* particularly expensive in the kernel.
829
899
*
830
900
* 2. For arrays with records in arbitrary or user-controlled order, choose the
831
- * pivot element using a median-of-three decision tree. This reduces the
901
+ * pivot element using a median-of-nine decision tree. This reduces the
832
902
* probability of selecting a bad pivot value which causes worst case
833
903
* behavior (i.e. partition sizes of 1).
834
904
*
0 commit comments