Skip to content

Commit cf36f4f

Browse files
author
Darrick J. Wong
committed
xfs: cache pages used for xfarray quicksort convergence
After quicksort picks a pivot item for a particular subsort, it walks the records in that subset from the outside in, rearranging them so that every record less than the pivot comes before it, and every record greater than the pivot comes after it. This scan has a lot of locality, so we can speed it up quite a bit by grabbing the xfile backing page and holding onto it as long as we possibly can. Doing so reduces the runtime by another 5% on the author's computer. Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Kent Overstreet <[email protected]> Reviewed-by: Dave Chinner <[email protected]>
1 parent e5b46c7 commit cf36f4f

File tree

2 files changed

+86
-10
lines changed

2 files changed

+86
-10
lines changed

fs/xfs/scrub/xfarray.c

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,66 @@ xfarray_qsort_push(
759759
return 0;
760760
}
761761

762+
/*
763+
* Load an element from the array into the first scratchpad and cache the page,
764+
* if possible.
765+
*/
766+
static inline int
767+
xfarray_sort_load_cached(
768+
struct xfarray_sortinfo *si,
769+
xfarray_idx_t idx,
770+
void *ptr)
771+
{
772+
loff_t idx_pos = xfarray_pos(si->array, idx);
773+
pgoff_t startpage;
774+
pgoff_t endpage;
775+
int error = 0;
776+
777+
/*
778+
* If this load would split a page, release the cached page, if any,
779+
* and perform a traditional read.
780+
*/
781+
startpage = idx_pos >> PAGE_SHIFT;
782+
endpage = (idx_pos + si->array->obj_size - 1) >> PAGE_SHIFT;
783+
if (startpage != endpage) {
784+
error = xfarray_sort_put_page(si);
785+
if (error)
786+
return error;
787+
788+
if (xfarray_sort_terminated(si, &error))
789+
return error;
790+
791+
return xfile_obj_load(si->array->xfile, ptr,
792+
si->array->obj_size, idx_pos);
793+
}
794+
795+
/* If the cached page is not the one we want, release it. */
796+
if (xfile_page_cached(&si->xfpage) &&
797+
xfile_page_index(&si->xfpage) != startpage) {
798+
error = xfarray_sort_put_page(si);
799+
if (error)
800+
return error;
801+
}
802+
803+
/*
804+
* If we don't have a cached page (and we know the load is contained
805+
* in a single page) then grab it.
806+
*/
807+
if (!xfile_page_cached(&si->xfpage)) {
808+
if (xfarray_sort_terminated(si, &error))
809+
return error;
810+
811+
error = xfarray_sort_get_page(si, startpage << PAGE_SHIFT,
812+
PAGE_SIZE);
813+
if (error)
814+
return error;
815+
}
816+
817+
memcpy(ptr, si->page_kaddr + offset_in_page(idx_pos),
818+
si->array->obj_size);
819+
return 0;
820+
}
821+
762822
/*
763823
* Sort the array elements via quicksort. This implementation incorporates
764824
* four optimizations discussed in Sedgewick:
@@ -784,6 +844,10 @@ xfarray_qsort_push(
784844
* If a small set is contained entirely within a single xfile memory page,
785845
* map the page directly and run heap sort directly on the xfile page
786846
* instead of using the load/store interface. This halves the runtime.
847+
*
848+
* 5. This optimization is specific to the implementation. When converging lo
849+
* and hi after selecting a pivot, we will try to retain the xfile memory
850+
* page between load calls, which reduces run time by 50%.
787851
*/
788852

789853
/*
@@ -865,19 +929,20 @@ xfarray_sort(
865929
* Decrement hi until it finds an a[hi] less than the
866930
* pivot value.
867931
*/
868-
error = xfarray_sort_load(si, hi, scratch);
932+
error = xfarray_sort_load_cached(si, hi, scratch);
869933
if (error)
870934
goto out_free;
871935
while (xfarray_sort_cmp(si, scratch, pivot) >= 0 &&
872936
lo < hi) {
873-
if (xfarray_sort_terminated(si, &error))
874-
goto out_free;
875-
876937
hi--;
877-
error = xfarray_sort_load(si, hi, scratch);
938+
error = xfarray_sort_load_cached(si, hi,
939+
scratch);
878940
if (error)
879941
goto out_free;
880942
}
943+
error = xfarray_sort_put_page(si);
944+
if (error)
945+
goto out_free;
881946

882947
if (xfarray_sort_terminated(si, &error))
883948
goto out_free;
@@ -893,19 +958,20 @@ xfarray_sort(
893958
* Increment lo until it finds an a[lo] greater than
894959
* the pivot value.
895960
*/
896-
error = xfarray_sort_load(si, lo, scratch);
961+
error = xfarray_sort_load_cached(si, lo, scratch);
897962
if (error)
898963
goto out_free;
899964
while (xfarray_sort_cmp(si, scratch, pivot) <= 0 &&
900965
lo < hi) {
901-
if (xfarray_sort_terminated(si, &error))
902-
goto out_free;
903-
904966
lo++;
905-
error = xfarray_sort_load(si, lo, scratch);
967+
error = xfarray_sort_load_cached(si, lo,
968+
scratch);
906969
if (error)
907970
goto out_free;
908971
}
972+
error = xfarray_sort_put_page(si);
973+
if (error)
974+
goto out_free;
909975

910976
if (xfarray_sort_terminated(si, &error))
911977
goto out_free;

fs/xfs/scrub/xfile.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ struct xfile_page {
1212
loff_t pos;
1313
};
1414

15+
static inline bool xfile_page_cached(const struct xfile_page *xfpage)
16+
{
17+
return xfpage->page != NULL;
18+
}
19+
20+
static inline pgoff_t xfile_page_index(const struct xfile_page *xfpage)
21+
{
22+
return xfpage->page->index;
23+
}
24+
1525
struct xfile {
1626
struct file *file;
1727
};

0 commit comments

Comments
 (0)