@@ -61,7 +61,7 @@ Indicates that a sorting function should use the paged merge sort
6161algorithm. Paged merge sort uses is a merge sort, that uses different
6262merge routines to achieve stable sorting with a scratch space of size O(√n).
6363The merge routine for merging large subarrays merges
64- blocks/ pages of size O(√n) almost in place, before reordering them using a page table.
64+ pages of size O(√n) almost in place, before reordering them using a page table.
6565At deeper recursion levels, where the scratch space is big enough,
6666normal merging is used, where one input is copied into the scratch space.
6767When the scratch space is large enough to hold the complete subarray,
@@ -776,25 +776,28 @@ function merge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer, o::O
776776 end
777777end
778778
779- # macro used for block management in pagedMerge!
780- # use next block in A (left subarray) if it is free,
781- # otherwise use next block in B
782- macro getNextBlock! ()
783- quote
784- if a > nextBlockA * blocksize + lo
785- currentBlock = nextBlockA
786- nextBlockA += 1
787- else
788- currentBlock = nextBlockB
789- nextBlockB += 1
790- end
791- blockLocation[currentBlockIdx] = currentBlock
792- currentBlockIdx += 1
793- end |> esc
779+ struct Pages
780+ current:: Int # current page being merged into
781+ nextA:: Int # next possible page in A
782+ nextB:: Int # next possible page in B
794783end
795784
796- # merge v[lo:m] and v[m+1:hi] using buffer buf in O(sqrt(n)) space
797- function pagedMerge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , blockLocation:: AbstractVector{<:Integer} ) where T
785+ next_page_A (pages:: Pages ) = Pages (pages. nextA, pages. nextA + 1 , pages. nextB)
786+ next_page_B (pages:: Pages ) = Pages (pages. nextB, pages. nextA, pages. nextB + 1 )
787+
788+ function next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
789+ if a > pages. nextA * pagesize + lo
790+ pages = next_page_A (pages)
791+ else
792+ pages = next_page_B (pages)
793+ end
794+ pageLocations[currentPageIndex] = pages. current
795+ currentPageIndex += 1
796+ pages, currentPageIndex
797+ end
798+
799+ # merge v[lo:m] (A) and v[m+1:hi] (B) using buffer buf in O(sqrt(n)) space
800+ function paged_merge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , pageLocations:: AbstractVector{<:Integer} ) where T
798801 @assert lo < m < hi
799802 a = lo
800803 b = m + 1
@@ -813,142 +816,144 @@ function pagedMerge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer,
813816 end
814817
815818 len = lenA + lenB
816- blocksize = isqrt (len)
817- nBlocks = len ÷ blocksize
818- @assert length (buf) >= 3 blocksize
819- @assert length (blockLocation ) >= nBlocks + 1
819+ pagesize = isqrt (len)
820+ nPages = len ÷ pagesize
821+ @assert length (buf) >= 3 pagesize
822+ @assert length (pageLocations ) >= nPages + 1
820823
821- @inline getBlockOffset (block ) = (block - 1 )* blocksize + lo - 1
824+ @inline page_offset (page ) = (page - 1 )* pagesize + lo - 1
822825
823826 @inbounds begin
824827 # #################
825828 # merge
826829 # #################
827- # merge into buf until full
828- a,b,k = merge! ((_,_,k) -> k<= 3 blocksize,buf,v,v,o,a,b,1 )
829-
830- nextBlockA = 1
831- nextBlockB = (m + blocksize- lo) ÷ blocksize + 1
832- blockLocation .= 0
833- blockLocation[1 : 3 ] = - 1 : - 1 : - 3
834-
830+ # merge the first 3 pages into buf
831+ a,b,k = merge! ((_,_,k) -> k<= 3 pagesize,buf,v,v,o,a,b,1 )
832+ # initialize variable for merging into pages
833+ pageLocations .= 0
834+ pageLocations[1 : 3 ] = - 1 : - 1 : - 3
835+ currentPageIndex = 4
836+ currentPage = 0
837+ nextPageA = 1
838+ nextPageB = (m + pagesize- lo) ÷ pagesize + 1
839+ pages = Pages (currentPage, nextPageA, nextPageB)
835840 k = 1
836- currentBlock = 0
837- currentBlockIdx = 4
838- # more efficient loop while more than blocksize elements of A and B are remaining
839- while_condition1 (offset) = (_,_,k) -> k <= offset + blocksize
840- while a < m- blocksize && b < hi- blocksize
841- @getNextBlock!
842- offset = getBlockOffset (currentBlock)
841+ # more efficient loop while more than pagesize elements of A and B are remaining
842+ while_condition1 (offset) = (_,_,k) -> k <= offset + pagesize
843+ while a < m- pagesize && b < hi- pagesize
844+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
845+ offset = page_offset (pages. current)
843846 a,b,k = merge! (while_condition1 (offset),v,v,v,o,a,b,offset+ 1 )
844847 end
845848 # merge until either A or B is empty
846- while_condition2 (offset) = (a,b,k) -> k <= offset + blocksize && a <= m && b <= hi
849+ while_condition2 (offset) = (a,b,k) -> k <= offset + pagesize && a <= m && b <= hi
847850 while a <= m && b <= hi
848- @getNextBlock!
849- offset = getBlockOffset (currentBlock )
851+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
852+ offset = page_offset (pages . current )
850853 a,b,k = merge! (while_condition2 (offset),v,v,v,o,a,b,offset+ 1 )
851854 end
852- k_block = k - getBlockOffset (currentBlock )
855+ k_page = k - page_offset (pages . current )
853856 # copy remaining elements
854857 # either A or B is empty
855858 # copy rest of A
856859 while a <= m
857- if k_block > blocksize
858- @getNextBlock!
859- k_block = 1
860+ if k_page > pagesize
861+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
862+ k_page = 1
860863 end
861- offset = getBlockOffset (currentBlock )
862- while k_block <= blocksize && a <= m
863- v[offset + k_block ] = v[a]
864+ offset = page_offset (pages . current )
865+ while k_page <= pagesize && a <= m
866+ v[offset + k_page ] = v[a]
864867 a += 1
865- k_block += 1
868+ k_page += 1
866869 end
867870 end
868871 # copy rest of B
869872 while b <= hi
870- if k_block > blocksize
871- @getNextBlock!
872- k_block = 1
873+ if k_page > pagesize
874+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
875+ k_page = 1
873876 end
874- offset = getBlockOffset (currentBlock )
875- while k_block <= blocksize && b <= hi
876- v[offset + k_block ] = v[b]
877+ offset = page_offset (pages . current )
878+ while k_page <= pagesize && b <= hi
879+ v[offset + k_page ] = v[b]
877880 b += 1
878- k_block += 1
881+ k_page += 1
879882 end
880883 end
881- # copy last partial block to end
882- partialBlockPresent = k_block <= blocksize
883- if partialBlockPresent
884- offset = getBlockOffset (currentBlock )
885- offset2 = nBlocks * blocksize + lo - 1
886- for j = 1 : k_block - 1
884+ # copy last partial page to end
885+ partialPagePresent = k_page <= pagesize
886+ if partialPagePresent
887+ offset = page_offset (pages . current )
888+ offset2 = nPages * pagesize + lo - 1
889+ for j = 1 : k_page - 1
887890 v[offset2 + j] = v[offset + j]
888891 end
889- blockLocation[currentBlockIdx - 1 ] = 0
892+ pageLocations[currentPageIndex - 1 ] = 0
890893 end
891894 # ########################################
892- # calculate location of the 3 free blocks
895+ # calculate location of the 3 free pages
893896 # ########################################
894- nFreeBlocksB = nBlocks + 1 - nextBlockB
895- nFreeBlocksA = 3 - nFreeBlocksB - Int (partialBlockPresent )
896- freeBlocks = MVector {3,Int} (undef)
897+ nFreePagesB = nPages + 1 - pages . nextB
898+ nFreePagesA = 3 - nFreePagesB - Int (partialPagePresent )
899+ freePages = MVector {3,Int} (undef)
897900 i = 1
898- for j = 0 : nFreeBlocksA - 1
899- freeBlocks [i] = nextBlockA + j
901+ for j = 0 : nFreePagesA - 1
902+ freePages [i] = pages . nextA + j
900903 i += 1
901904 end
902- for j = 0 : nFreeBlocksB - 1
903- freeBlocks [i] = nextBlockB + j
905+ for j = 0 : nFreePagesB - 1
906+ freePages [i] = pages . nextB + j
904907 i += 1
905908 end
906- if partialBlockPresent
907- freeBlocks [i] = currentBlock
909+ if partialPagePresent
910+ freePages [i] = pages . current
908911 end
909- freeBlocksIdx = 3
910- doneBlockIdx = 1
911- currentBlock = freeBlocks[end ]
912+ freePagesIndex = 3
913+ donePageIndex = 1
914+ # use currentPage instead of pages.current because
915+ # pages.nextA and pages.nextB are no longer needed
916+ currentPage = freePages[end ]
912917 # #################
913- # rearrange blocks
918+ # rearrange pages
914919 # #################
915920 while true
916- blc = blockLocation[currentBlock ] # index of block with data belonging to currentBlock
917- if blc > 0
918- # data for currentBlock is in v
919- offset = getBlockOffset (currentBlock )
920- offset2 = getBlockOffset (blc )
921- for j = 1 : blocksize
921+ plc = pageLocations[currentPage ] # page with data belonging to currentPage
922+ if plc > 0
923+ # data for currentPage is in v
924+ offset = page_offset (currentPage )
925+ offset2 = page_offset (plc )
926+ for j = 1 : pagesize
922927 v[offset + j] = v[offset2 + j]
923928 end
924- blockLocation[currentBlock ] = 0
925- currentBlock = blc
929+ pageLocations[currentPage ] = 0
930+ currentPage = plc
926931 else
927- # data for currentBlock is in buf
928- offset = getBlockOffset (currentBlock )
929- offset2 = (- blc - 1 )* blocksize
930- for j = 1 : blocksize
932+ # data for currentPage is in buf
933+ offset = page_offset (currentPage )
934+ offset2 = (- plc - 1 )* pagesize
935+ for j = 1 : pagesize
931936 v[offset + j] = buf[offset2 + j]
932937 end
933- blockLocation[currentBlock ] = 0
934- if freeBlocksIdx > 1
935- # get next free block
936- freeBlocksIdx -= 1
937- currentBlock = freeBlocks[freeBlocksIdx ]
938+ pageLocations[currentPage ] = 0
939+ if freePagesIndex > 1
940+ # get next free page
941+ freePagesIndex -= 1
942+ currentPage = freePages[freePagesIndex ]
938943 else
939- # no free block remains
940- # make sure that all blocks are done
941- while blockLocation[doneBlockIdx ] == 0 || blockLocation[doneBlockIdx ] == doneBlockIdx
942- doneBlockIdx += 1
943- doneBlockIdx == nBlocks && return
944+ # no free page remains
945+ # make sure that all pages are done
946+ while pageLocations[donePageIndex ] == 0 || pageLocations[donePageIndex ] == donePageIndex
947+ donePageIndex += 1
948+ donePageIndex == nPages && return
944949 end
945- # copy misplaced block into buf and continue
946- currentBlock = blockLocation[doneBlockIdx ]
947- offset = getBlockOffset (currentBlock )
948- for j = 1 : blocksize
950+ # copy misplaced page into buf and continue
951+ currentPage = pageLocations[donePageIndex ]
952+ offset = page_offset (currentPage )
953+ for j = 1 : pagesize
949954 buf[j] = v[offset + j]
950955 end
951- blockLocation[doneBlockIdx ] = - 1
956+ pageLocations[donePageIndex ] = - 1
952957 end
953958 end
954959 end
@@ -959,59 +964,59 @@ end
959964# -> redefine for compatibility with earlier versions
960965midpoint (lo:: Integer , hi:: Integer ) = lo + ((hi - lo) >>> 0x01 )
961966
962- function pagedmergesort! (v:: AbstractVector{T} , lo:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , blockLocation ) where T
967+ function pagedmergesort! (v:: AbstractVector{T} , lo:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , pageLocations ) where T
963968 len = hi + 1 - lo
964969 if len <= Base. SMALL_THRESHOLD
965970 return Base. Sort. sort! (v, lo, hi, Base. Sort. InsertionSortAlg (), o)
966971 end
967972 m = midpoint (lo, hi- 1 ) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
968- pagedmergesort! (v, lo, m, o, buf, blockLocation )
969- pagedmergesort! (v, m+ 1 , hi, o, buf, blockLocation )
973+ pagedmergesort! (v, lo, m, o, buf, pageLocations )
974+ pagedmergesort! (v, m+ 1 , hi, o, buf, pageLocations )
970975 if len <= length (buf)
971976 twoended_merge! (v, lo, m, hi, o, buf)
972977 else
973- pagedMerge ! (v, lo, m, hi, o, buf, blockLocation )
978+ paged_merge ! (v, lo, m, hi, o, buf, pageLocations )
974979 end
975980 return v
976981end
977982
978983function sort! (v:: AbstractVector , lo:: Integer , hi:: Integer , :: PagedMergeSortAlg , o:: Ordering )
979984 lo >= hi && return v
980985 n = hi + 1 - lo
981- blocksize = isqrt (n)
982- buf = Vector {eltype(v)} (undef, 3 blocksize )
983- nBlocks = n ÷ blocksize
984- blockLocation = Vector {Int} (undef, nBlocks + 1 )
985- pagedmergesort! (v, lo, hi, o, buf, blockLocation )
986+ pagesize = isqrt (n)
987+ buf = Vector {eltype(v)} (undef, 3 pagesize )
988+ nPages = n ÷ pagesize
989+ pageLocations = Vector {Int} (undef, nPages + 1 )
990+ pagedmergesort! (v, lo, hi, o, buf, pageLocations )
986991 return v
987992end
988993
989994Base. @static if VERSION >= v " 1.3"
990995const PAGEDMERGESORT_THREADING_THRESHOLD = 2 ^ 13
991- function threaded_pagedmergesort! (v:: AbstractVector , lo:: Integer , hi:: Integer , o:: Ordering , bufs, blockLocations , c:: Channel , threadingThreshold:: Integer )
996+ function threaded_pagedmergesort! (v:: AbstractVector , lo:: Integer , hi:: Integer , o:: Ordering , bufs, pageLocations , c:: Channel , threadingThreshold:: Integer )
992997 len = hi + 1 - lo
993998 if len <= Base. SMALL_THRESHOLD
994999 return Base. Sort. sort! (v, lo, hi, Base. Sort. InsertionSortAlg (), o)
9951000 end
9961001 m = midpoint (lo, hi- 1 ) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
9971002 if len > threadingThreshold
998- thr = Threads. @spawn threaded_pagedmergesort! (v, lo, m, o, bufs, blockLocations , c, threadingThreshold)
999- threaded_pagedmergesort! (v, m+ 1 , hi, o, bufs, blockLocations , c, threadingThreshold)
1003+ thr = Threads. @spawn threaded_pagedmergesort! (v, lo, m, o, bufs, pageLocations , c, threadingThreshold)
1004+ threaded_pagedmergesort! (v, m+ 1 , hi, o, bufs, pageLocations , c, threadingThreshold)
10001005 wait (thr)
10011006 id = take! (c)
10021007 buf = bufs[id]
1003- blockLocation = blockLocations [id]
1008+ pageLocations = pageLocations [id]
10041009 else
10051010 id = take! (c)
10061011 buf = bufs[id]
1007- blockLocation = blockLocations [id]
1008- pagedmergesort! (v, lo, m, o, buf, blockLocation )
1009- pagedmergesort! (v, m+ 1 , hi, o, buf, blockLocation )
1012+ pageLocations = pageLocations [id]
1013+ pagedmergesort! (v, lo, m, o, buf, pageLocations )
1014+ pagedmergesort! (v, m+ 1 , hi, o, buf, pageLocations )
10101015 end
10111016 if len <= length (buf)
10121017 twoended_merge! (v, lo, m, hi, o, buf)
10131018 else
1014- pagedMerge ! (v, lo, m, hi, o, buf, blockLocation )
1019+ paged_merge ! (v, lo, m, hi, o, buf, pageLocations )
10151020 end
10161021 put! (c, id)
10171022 return v
@@ -1022,15 +1027,15 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::ThreadedPagedMerge
10221027 nThreads= Threads. nthreads ()
10231028 (n < PAGEDMERGESORT_THREADING_THRESHOLD || nThreads < 2 ) && return sort! (v, lo, hi, PagedMergeSortAlg (), o)
10241029 threadingThreshold = max (n ÷ 4 nThreads, PAGEDMERGESORT_THREADING_THRESHOLD)
1025- blocksize = isqrt (n)
1026- nBlocks = n ÷ blocksize
1027- bufs = [Vector {eltype(v)} (undef, 3 blocksize ) for _ in 1 : nThreads] # allocate buffer for each thread
1028- blockLocation = [Vector {Int} (undef, nBlocks + 1 ) for _ in 1 : nThreads]
1030+ pagesize = isqrt (n)
1031+ nPages = n ÷ pagesize
1032+ bufs = [Vector {eltype(v)} (undef, 3 pagesize ) for _ in 1 : nThreads] # allocate buffer for each thread
1033+ pageLocations = [Vector {Int} (undef, nPages + 1 ) for _ in 1 : nThreads]
10291034 c = Channel {Int} (nThreads) # channel holds indices of available buffers
10301035 for i= 1 : nThreads
10311036 put! (c, i)
10321037 end
1033- threaded_pagedmergesort! (v, lo, hi, o, bufs, blockLocation , c, threadingThreshold)
1038+ threaded_pagedmergesort! (v, lo, hi, o, bufs, pageLocations , c, threadingThreshold)
10341039 return v
10351040end
10361041else
0 commit comments