6565#include " llvm/Support/Debug.h"
6666#include " llvm/Transforms/Utils/BasicBlockUtils.h"
6767#include " llvm/Transforms/Utils/Local.h"
68+ #include < atomic>
69+ #include < cstdlib>
6870
6971using namespace llvm ;
7072using namespace PatternMatch ;
@@ -78,6 +80,42 @@ static cl::opt<bool>
7880 cl::desc (" Enable pass AMDGPUVectorIdiom" ),
7981 cl::init(true ));
8082
83+ // Static counter to track transformations performed across all instances
84+ static std::atomic<unsigned > TransformationCounter{0 };
85+
86+ // Get maximum transformations from environment variable
87+ static unsigned getMaxTransformationsFromEnv () {
88+ const char *envVar = std::getenv (" AMDGPU_VECTOR_IDIOM_MAX_TRANSFORMATIONS" );
89+ if (!envVar)
90+ return 0 ; // Default: unlimited
91+
92+ char *endPtr;
93+ unsigned long value = std::strtoul (envVar, &endPtr, 10 );
94+
95+ // Check for conversion errors
96+ if (endPtr == envVar || *endPtr != ' \0 ' ) {
97+ LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Invalid AMDGPU_VECTOR_IDIOM_MAX_TRANSFORMATIONS value: "
98+ << envVar << " , using unlimited\n " );
99+ return 0 ;
100+ }
101+
102+ return static_cast <unsigned >(value);
103+ }
104+
105+ // Helper function to check if transformations should be performed
106+ static bool shouldPerformTransformation () {
107+ unsigned maxTransformations = getMaxTransformationsFromEnv ();
108+ if (maxTransformations == 0 )
109+ return true ; // Unlimited transformations
110+
111+ return TransformationCounter.load () < maxTransformations;
112+ }
113+
114+ // Helper function to increment transformation counter
115+ static void incrementTransformationCounter () {
116+ TransformationCounter.fetch_add (1 );
117+ }
118+
81119// Selects an integer or integer-vector element type matching NBytes, using the
82120// minimum proven alignment to decide the widest safe element width.
83121// Assumptions:
@@ -181,6 +219,15 @@ struct AMDGPUVectorIdiomImpl {
181219 AssumptionCache *AC) {
182220 LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Considering memcpy(select-src): "
183221 << MT << ' \n ' );
222+
223+ if (!shouldPerformTransformation ()) {
224+ unsigned maxTransformations = getMaxTransformationsFromEnv ();
225+ LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Skip: transformation limit reached ("
226+ << TransformationCounter.load () << " /"
227+ << maxTransformations << " )\n " );
228+ return false ;
229+ }
230+
184231 IRBuilder<> B (&MT);
185232 Value *Dst = MT.getRawDest ();
186233 Value *A = Sel.getTrueValue ();
@@ -202,6 +249,7 @@ struct AMDGPUVectorIdiomImpl {
202249 (isa<ConstantPointerNull>(ICmp->getOperand (0 )) ||
203250 isa<ConstantPointerNull>(ICmp->getOperand (1 )))) {
204251 splitCFGForMemcpy (MT, Sel.getCondition (), A, Bv, true );
252+ incrementTransformationCounter ();
205253 LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Null check pattern - "
206254 " using CFG split\n " );
207255 return true ;
@@ -266,13 +314,15 @@ struct AMDGPUVectorIdiomImpl {
266314 LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Rewrote memcpy(select-src) to "
267315 " value-select loads/stores: "
268316 << MT << ' \n ' );
317+ incrementTransformationCounter ();
269318 MT.eraseFromParent ();
270319 return true ;
271320 }
272321
273322 LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Falling back to CFG split for "
274323 << " memcpy(select-src); speculation unsafe\n " );
275324 splitCFGForMemcpy (MT, Sel.getCondition (), A, Bv, true );
325+ incrementTransformationCounter ();
276326 LLVM_DEBUG (
277327 dbgs ()
278328 << " [AMDGPUVectorIdiom] Rewrote memcpy(select-src) by CFG split\n " );
@@ -283,13 +333,22 @@ struct AMDGPUVectorIdiomImpl {
283333 // speculative stores, always splits the CFG and emits a memcpy per branch.
284334 // Assumptions mirror the source case.
285335 bool transformSelectMemcpyDest (MemCpyInst &MT, SelectInst &Sel) {
336+ if (!shouldPerformTransformation ()) {
337+ unsigned maxTransformations = getMaxTransformationsFromEnv ();
338+ LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Skip: transformation limit reached ("
339+ << TransformationCounter.load () << " /"
340+ << maxTransformations << " )\n " );
341+ return false ;
342+ }
343+
286344 Value *DA = Sel.getTrueValue ();
287345 Value *DB = Sel.getFalseValue ();
288346 LLVM_DEBUG (dbgs () << " [AMDGPUVectorIdiom] Rewriting memcpy(select-dst) via "
289347 << " CFG split to avoid speculative stores: " << MT
290348 << ' \n ' );
291349
292350 splitCFGForMemcpy (MT, Sel.getCondition (), DA, DB, false );
351+ incrementTransformationCounter ();
293352 LLVM_DEBUG (
294353 dbgs ()
295354 << " [AMDGPUVectorIdiom] Rewrote memcpy(select-dst) by CFG split\n " );
@@ -360,6 +419,19 @@ AMDGPUVectorIdiomCombinePass::run(Function &F, FunctionAnalysisManager &FAM) {
360419 if (!AMDGPUVectorIdiomEnable)
361420 return PreservedAnalyses::all ();
362421
422+ LLVM_DEBUG ({
423+ unsigned currentCount = TransformationCounter.load ();
424+ unsigned maxTransformations = getMaxTransformationsFromEnv ();
425+ if (maxTransformations > 0 ) {
426+ dbgs () << " [AMDGPUVectorIdiom] Starting pass on function " << F.getName ()
427+ << " (transformations: " << currentCount << " /"
428+ << maxTransformations << " )\n " ;
429+ } else {
430+ dbgs () << " [AMDGPUVectorIdiom] Starting pass on function " << F.getName ()
431+ << " (transformations: " << currentCount << " /unlimited)\n " ;
432+ }
433+ });
434+
363435 SmallVector<MemCpyInst *, 8 > Worklist;
364436 for (Instruction &I : instructions (F)) {
365437 if (auto *MC = dyn_cast<MemCpyInst>(&I))
0 commit comments