@@ -545,6 +545,87 @@ xfarray_isort(
545
545
return xfile_obj_store (si -> array -> xfile , scratch , len , lo_pos );
546
546
}
547
547
548
+ /* Grab a page for sorting records. */
549
+ static inline int
550
+ xfarray_sort_get_page (
551
+ struct xfarray_sortinfo * si ,
552
+ loff_t pos ,
553
+ uint64_t len )
554
+ {
555
+ int error ;
556
+
557
+ error = xfile_get_page (si -> array -> xfile , pos , len , & si -> xfpage );
558
+ if (error )
559
+ return error ;
560
+
561
+ /*
562
+ * xfile pages must never be mapped into userspace, so we skip the
563
+ * dcache flush when mapping the page.
564
+ */
565
+ si -> page_kaddr = kmap_local_page (si -> xfpage .page );
566
+ return 0 ;
567
+ }
568
+
569
+ /* Release a page we grabbed for sorting records. */
570
+ static inline int
571
+ xfarray_sort_put_page (
572
+ struct xfarray_sortinfo * si )
573
+ {
574
+ if (!si -> page_kaddr )
575
+ return 0 ;
576
+
577
+ kunmap_local (si -> page_kaddr );
578
+ si -> page_kaddr = NULL ;
579
+
580
+ return xfile_put_page (si -> array -> xfile , & si -> xfpage );
581
+ }
582
+
583
+ /* Decide if these records are eligible for in-page sorting. */
584
+ static inline bool
585
+ xfarray_want_pagesort (
586
+ struct xfarray_sortinfo * si ,
587
+ xfarray_idx_t lo ,
588
+ xfarray_idx_t hi )
589
+ {
590
+ pgoff_t lo_page ;
591
+ pgoff_t hi_page ;
592
+ loff_t end_pos ;
593
+
594
+ /* We can only map one page at a time. */
595
+ lo_page = xfarray_pos (si -> array , lo ) >> PAGE_SHIFT ;
596
+ end_pos = xfarray_pos (si -> array , hi ) + si -> array -> obj_size - 1 ;
597
+ hi_page = end_pos >> PAGE_SHIFT ;
598
+
599
+ return lo_page == hi_page ;
600
+ }
601
+
602
+ /* Sort a bunch of records that all live in the same memory page. */
603
+ STATIC int
604
+ xfarray_pagesort (
605
+ struct xfarray_sortinfo * si ,
606
+ xfarray_idx_t lo ,
607
+ xfarray_idx_t hi )
608
+ {
609
+ void * startp ;
610
+ loff_t lo_pos = xfarray_pos (si -> array , lo );
611
+ uint64_t len = xfarray_pos (si -> array , hi - lo );
612
+ int error = 0 ;
613
+
614
+ trace_xfarray_pagesort (si , lo , hi );
615
+
616
+ xfarray_sort_bump_loads (si );
617
+ error = xfarray_sort_get_page (si , lo_pos , len );
618
+ if (error )
619
+ return error ;
620
+
621
+ xfarray_sort_bump_heapsorts (si );
622
+ startp = si -> page_kaddr + offset_in_page (lo_pos );
623
+ sort (startp , hi - lo + 1 , si -> array -> obj_size , si -> cmp_fn , NULL );
624
+
625
+ xfarray_sort_bump_stores (si );
626
+ return xfarray_sort_put_page (si );
627
+ }
628
+
548
629
/* Return a pointer to the xfarray pivot record within the sortinfo struct. */
549
630
static inline void * xfarray_sortinfo_pivot (struct xfarray_sortinfo * si )
550
631
{
@@ -699,6 +780,10 @@ xfarray_qsort_push(
699
780
* 4. For small sets, load the records into the scratchpad and run heapsort on
700
781
* them because that is very fast. In the author's experience, this yields
701
782
* a ~10% reduction in runtime.
783
+ *
784
+ * If a small set is contained entirely within a single xfile memory page,
785
+ * map the page directly and run heap sort directly on the xfile page
786
+ * instead of using the load/store interface. This halves the runtime.
702
787
*/
703
788
704
789
/*
@@ -744,6 +829,18 @@ xfarray_sort(
744
829
continue ;
745
830
}
746
831
832
+ /*
833
+ * If directly mapping the page and sorting can solve our
834
+ * problems, we're done.
835
+ */
836
+ if (xfarray_want_pagesort (si , lo , hi )) {
837
+ error = xfarray_pagesort (si , lo , hi );
838
+ if (error )
839
+ goto out_free ;
840
+ si -> stack_depth -- ;
841
+ continue ;
842
+ }
843
+
747
844
/* If insertion sort can solve our problems, we're done. */
748
845
if (xfarray_want_isort (si , lo , hi )) {
749
846
error = xfarray_isort (si , lo , hi );
0 commit comments