Skip to content

Commit 40be8d5

Browse files
committed
add AMDGPU_VECTOR_IDIOM_MAX_TRANSFORMATIONS
1 parent 1e18937 commit 40be8d5

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUVectorIdiom.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
#include "llvm/Support/Debug.h"
6666
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
6767
#include "llvm/Transforms/Utils/Local.h"
68+
#include <atomic>
69+
#include <cstdlib>
6870

6971
using namespace llvm;
7072
using namespace PatternMatch;
@@ -78,6 +80,42 @@ static cl::opt<bool>
7880
cl::desc("Enable pass AMDGPUVectorIdiom"),
7981
cl::init(true));
8082

83+
// Static counter to track transformations performed across all instances
84+
static std::atomic<unsigned> TransformationCounter{0};
85+
86+
// Get maximum transformations from environment variable
87+
static unsigned getMaxTransformationsFromEnv() {
88+
const char *envVar = std::getenv("AMDGPU_VECTOR_IDIOM_MAX_TRANSFORMATIONS");
89+
if (!envVar)
90+
return 0; // Default: unlimited
91+
92+
char *endPtr;
93+
unsigned long value = std::strtoul(envVar, &endPtr, 10);
94+
95+
// Check for conversion errors
96+
if (endPtr == envVar || *endPtr != '\0') {
97+
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Invalid AMDGPU_VECTOR_IDIOM_MAX_TRANSFORMATIONS value: "
98+
<< envVar << ", using unlimited\n");
99+
return 0;
100+
}
101+
102+
return static_cast<unsigned>(value);
103+
}
104+
105+
// Helper function to check if transformations should be performed
106+
static bool shouldPerformTransformation() {
107+
unsigned maxTransformations = getMaxTransformationsFromEnv();
108+
if (maxTransformations == 0)
109+
return true; // Unlimited transformations
110+
111+
return TransformationCounter.load() < maxTransformations;
112+
}
113+
114+
// Helper function to increment transformation counter
115+
static void incrementTransformationCounter() {
116+
TransformationCounter.fetch_add(1);
117+
}
118+
81119
// Selects an integer or integer-vector element type matching NBytes, using the
82120
// minimum proven alignment to decide the widest safe element width.
83121
// Assumptions:
@@ -181,6 +219,15 @@ struct AMDGPUVectorIdiomImpl {
181219
AssumptionCache *AC) {
182220
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Considering memcpy(select-src): "
183221
<< MT << '\n');
222+
223+
if (!shouldPerformTransformation()) {
224+
unsigned maxTransformations = getMaxTransformationsFromEnv();
225+
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Skip: transformation limit reached ("
226+
<< TransformationCounter.load() << "/"
227+
<< maxTransformations << ")\n");
228+
return false;
229+
}
230+
184231
IRBuilder<> B(&MT);
185232
Value *Dst = MT.getRawDest();
186233
Value *A = Sel.getTrueValue();
@@ -202,6 +249,7 @@ struct AMDGPUVectorIdiomImpl {
202249
(isa<ConstantPointerNull>(ICmp->getOperand(0)) ||
203250
isa<ConstantPointerNull>(ICmp->getOperand(1)))) {
204251
splitCFGForMemcpy(MT, Sel.getCondition(), A, Bv, true);
252+
incrementTransformationCounter();
205253
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Null check pattern - "
206254
"using CFG split\n");
207255
return true;
@@ -266,13 +314,15 @@ struct AMDGPUVectorIdiomImpl {
266314
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Rewrote memcpy(select-src) to "
267315
"value-select loads/stores: "
268316
<< MT << '\n');
317+
incrementTransformationCounter();
269318
MT.eraseFromParent();
270319
return true;
271320
}
272321

273322
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Falling back to CFG split for "
274323
<< "memcpy(select-src); speculation unsafe\n");
275324
splitCFGForMemcpy(MT, Sel.getCondition(), A, Bv, true);
325+
incrementTransformationCounter();
276326
LLVM_DEBUG(
277327
dbgs()
278328
<< "[AMDGPUVectorIdiom] Rewrote memcpy(select-src) by CFG split\n");
@@ -283,13 +333,22 @@ struct AMDGPUVectorIdiomImpl {
283333
// speculative stores, always splits the CFG and emits a memcpy per branch.
284334
// Assumptions mirror the source case.
285335
bool transformSelectMemcpyDest(MemCpyInst &MT, SelectInst &Sel) {
336+
if (!shouldPerformTransformation()) {
337+
unsigned maxTransformations = getMaxTransformationsFromEnv();
338+
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Skip: transformation limit reached ("
339+
<< TransformationCounter.load() << "/"
340+
<< maxTransformations << ")\n");
341+
return false;
342+
}
343+
286344
Value *DA = Sel.getTrueValue();
287345
Value *DB = Sel.getFalseValue();
288346
LLVM_DEBUG(dbgs() << "[AMDGPUVectorIdiom] Rewriting memcpy(select-dst) via "
289347
<< "CFG split to avoid speculative stores: " << MT
290348
<< '\n');
291349

292350
splitCFGForMemcpy(MT, Sel.getCondition(), DA, DB, false);
351+
incrementTransformationCounter();
293352
LLVM_DEBUG(
294353
dbgs()
295354
<< "[AMDGPUVectorIdiom] Rewrote memcpy(select-dst) by CFG split\n");
@@ -360,6 +419,19 @@ AMDGPUVectorIdiomCombinePass::run(Function &F, FunctionAnalysisManager &FAM) {
360419
if (!AMDGPUVectorIdiomEnable)
361420
return PreservedAnalyses::all();
362421

422+
LLVM_DEBUG({
423+
unsigned currentCount = TransformationCounter.load();
424+
unsigned maxTransformations = getMaxTransformationsFromEnv();
425+
if (maxTransformations > 0) {
426+
dbgs() << "[AMDGPUVectorIdiom] Starting pass on function " << F.getName()
427+
<< " (transformations: " << currentCount << "/"
428+
<< maxTransformations << ")\n";
429+
} else {
430+
dbgs() << "[AMDGPUVectorIdiom] Starting pass on function " << F.getName()
431+
<< " (transformations: " << currentCount << "/unlimited)\n";
432+
}
433+
});
434+
363435
SmallVector<MemCpyInst *, 8> Worklist;
364436
for (Instruction &I : instructions(F)) {
365437
if (auto *MC = dyn_cast<MemCpyInst>(&I))

0 commit comments

Comments
 (0)