From c49a7e833377f64c7005f0c56fa95eb9d3270f66 Mon Sep 17 00:00:00 2001 From: Dingding Date: Thu, 26 Jun 2025 14:49:23 +0800 Subject: [PATCH 1/2] [MemDep] Optimize SortNonLocalDepInfoCache sorting strategy for large caches with few unsorted entries --- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index d6f490cb69a52..d43ba41a22669 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -983,33 +983,41 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock( static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { - switch (Cache.size() - NumSortedEntries) { - case 0: - // done, no new entries. - break; - case 2: { - // Two new entries, insert the last one into place. - NonLocalDepEntry Val = Cache.back(); - Cache.pop_back(); - MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache.begin(), Cache.end() - 1, Val); - Cache.insert(Entry, Val); - [[fallthrough]]; + + // Output number of sorted entries and size of cache for each sort. + LLVM_DEBUG(dbgs() << "NumSortedEntries: " << NumSortedEntries + << ", Cache.size: " << Cache.size() << "\n"); + + // If only one entry, don't sort. + if (Cache.size() < 2) + return; + + unsigned s = Cache.size() - NumSortedEntries; + + // If the cache is already sorted, don't sort it again. + if (s == 0) + return; + + // If no entry is sorted, sort the whole cache. + if (NumSortedEntries == 0) { + llvm::sort(Cache); + return; } - case 1: - // One new entry, Just insert the new value at the appropriate position. - if (Cache.size() != 1) { + + // If the number of unsorted entires is small and the cache size is big, use + // insertion sort is faster. Here use Log2_32 to quickly choose the sort + // method. + if (s < Log2_32(Cache.size())) { + while (s > 0) { NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - llvm::upper_bound(Cache, Val); + std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val); Cache.insert(Entry, Val); + s--; } - break; - default: - // Added many values, do a full scale sort. + } else { llvm::sort(Cache); - break; } } From fec951d5270c84612235ddb98d75489aa8c7517e Mon Sep 17 00:00:00 2001 From: DingdWang Date: Fri, 25 Jul 2025 18:50:57 +0800 Subject: [PATCH 2/2] fix comments from nikic --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index d43ba41a22669..de1bce054865f 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -984,10 +984,6 @@ static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { - // Output number of sorted entries and size of cache for each sort. - LLVM_DEBUG(dbgs() << "NumSortedEntries: " << NumSortedEntries - << ", Cache.size: " << Cache.size() << "\n"); - // If only one entry, don't sort. if (Cache.size() < 2) return; @@ -1004,7 +1000,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, return; } - // If the number of unsorted entires is small and the cache size is big, use + // If the number of unsorted entires is small and the cache size is big, using // insertion sort is faster. Here use Log2_32 to quickly choose the sort // method. if (s < Log2_32(Cache.size())) {