Bump LLVM to 4ac4726d00644f6c6b0e2de1df0d00deed0015bf (#21175)

nicolasvasilache · Groverkss · web-flow · commit 6965a0d5658d · 2025-06-26T17:17:11.000-04:00
Carrying reverts from #21162 Also added a revert for [4d21da002a056c64231fb89ee9e4eba90080e9bb](http://github.com/llvm/llvm-project/pull/144158) (not a hard fix, just is a load bearing change that should be done as a seperate cherry pick) Adds a local commit to stablehlo to allow compiling with the new llvm patch Co-authored-by: Kunwar Grover <groverkss@gmail.com>
diff --git a/compiler/plugins/input/StableHLO/Conversion/MapStableHLOToScalarOp.h b/compiler/plugins/input/StableHLO/Conversion/MapStableHLOToScalarOp.h
@@ -489,7 +489,7 @@ inline Value mapStableHloOpToStdScalarOp<stablehlo::ReducePrecisionOp>(
   expBitsMask = ((expBitsMask << srcExponentBits) - 1) << srcMantissaBits;
 
   auto createConstant = [&](const APInt &v) {
-    return b.create<arith::ConstantIntOp>(v.getZExtValue(), intType)
+    return b.create<arith::ConstantIntOp>(intType, v.getZExtValue())
         .getResult();
   };
 
@@ -510,7 +510,7 @@ inline Value mapStableHloOpToStdScalarOp<stablehlo::ReducePrecisionOp>(
     APInt baseRoundingBias = lastMantissaBitMask.lshr(1) - 1;
 
     Value mantissaDiff = b.create<arith::ConstantIntOp>(
-        srcMantissaBits - destMantissaBits, intType);
+        intType, srcMantissaBits - destMantissaBits);
     Value highestMantissaMaskVal = createConstant(lastMantissaBitMask);
     Value baseRoundingBiasVal = createConstant(baseRoundingBias);
     Value xLastMantissaBit = b.create<arith::ShRUIOp>(
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp
@@ -157,7 +157,7 @@ struct LowerInnerTiledMmaToUKernelPattern
       return rewriter.create<arith::IndexCastOp>(loc, I32Type, val);
     };
     auto constI32 = [&](int val) {
-      return rewriter.create<arith::ConstantIntOp>(loc, val, I32Type);
+      return rewriter.create<arith::ConstantIntOp>(loc, I32Type, val);
     };
     int64_t sharedMemoryBytes = ukernelAttr.getSharedMemoryBytes();
     auto sharedMemory = createSharedMemory(rewriter, loc, sharedMemoryBytes);
diff --git a/compiler/src/iree/compiler/Codegen/Common/SpecializeExports.cpp b/compiler/src/iree/compiler/Codegen/Common/SpecializeExports.cpp
@@ -284,7 +284,7 @@ static void specializeExportedFunction(
       builder.setInsertionPointToStart(newCondition);
 
       Value exportCondition =
-          builder.create<arith::ConstantIntOp>(loc, 1, builder.getI1Type());
+          builder.create<arith::ConstantIntOp>(loc, builder.getI1Type(), 1);
 
       for (auto [range, assumedSize] :
            llvm::zip(specializationRange, workloadMapping)) {
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp
@@ -551,7 +551,7 @@ static bool isNonMatvecContraction(Operation *op) {
   if (!linalgOp) {
     return false;
   }
-  SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();
   FailureOr<mlir::linalg::ContractionDimensions> contractionDims =
       mlir::linalg::inferContractionDims(linalgOp);
   if (failed(contractionDims)) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -352,7 +352,7 @@ getVectorDistributeReductionConfig(
   op.getParallelDims(parallelDims);
   op.getReductionDims(reductionDims);
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
 
   SmallVector<int64_t> workgroupTileSizes(op.getNumLoops(), 0);
   SmallVector<int64_t> threadTileSizes(op.getNumLoops(), 0);
@@ -724,7 +724,7 @@ setReductionVectorDistributionConfig(IREE::GPU::TargetAttr target,
   op.getParallelDims(parallelDims);
   op.getReductionDims(reductionDims);
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   IREE::GPU::TargetWgpAttr wgp = target.getWgp();
   int64_t reductionSize = bounds[reductionDims.back()];
 
@@ -836,7 +836,7 @@ setConvolutionVectorDistributionConfig(IREE::GPU::TargetAttr target,
 
   const int64_t targetSubgroupSize = target.getPreferredSubgroupSize();
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   FailureOr<mlir::linalg::ConvolutionDimensions> convolutionDims =
       mlir::linalg::inferConvolutionDims(op);
   if (failed(convolutionDims)) {
@@ -1040,7 +1040,7 @@ setMatmulVectorDistributionConfig(IREE::GPU::TargetAttr target,
 
   const int64_t targetSubgroupSize = target.getPreferredSubgroupSize();
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   FailureOr<mlir::linalg::ContractionDimensions> contractionDims =
       mlir::linalg::inferContractionDims(op);
   if (failed(contractionDims)) {
@@ -1881,7 +1881,7 @@ static LogicalResult setContractConfig(IREE::GPU::TargetAttr target,
   // They should go down different pipelines.
   // Currently dynamic dimensions are tiled with size=1 in codegen.
   int staticNonUnitParallelDimCount = 0;
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   FailureOr<mlir::linalg::ContractionDimensions> contractionDims =
       mlir::linalg::inferContractionDims(op);
   assert(succeeded(contractionDims) && "Could not infer contraction dims");
@@ -2424,7 +2424,7 @@ setWarpReductionConfig(IREE::GPU::TargetAttr target,
   op.getParallelDims(parallelDims);
   op.getReductionDims(reductionDims);
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   int64_t numParallelDims = op.getNumParallelLoops();
 
   if (reductionDims.empty())
@@ -2714,7 +2714,7 @@ static LogicalResult setArgmaxUkernelConfig(
     return failure();
 
   // Make sure reduction dimensions are static and innermost ones.
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   int64_t numParallelDims = op.getNumParallelLoops();
   int64_t numDynamicReductionDims = 0;
   for (unsigned dim : reductionDims) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLKernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLKernelConfig.cpp
@@ -55,7 +55,7 @@ static bool isMatvecLike(linalg::LinalgOp linalgOp) {
 
   // Check if the first parallel dimension has bound 1, indicating we found a
   // vector shape.
-  SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();
   if (bounds[dims->m.front()] != 1)
     return false;
 
@@ -74,7 +74,7 @@ setWarpReductionConfig(IREE::GPU::TargetAttr target,
   op.getParallelDims(parallelDims);
   op.getReductionDims(reductionDims);
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   int64_t numParallelDims = op.getNumParallelLoops();
 
   if (reductionDims.empty())
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
@@ -1062,7 +1062,7 @@ static LogicalResult setReductionConfig(IREE::GPU::TargetAttr target,
   op.getParallelDims(parallelDims);
   op.getReductionDims(reductionDims);
 
-  SmallVector<int64_t, 4> bounds = op.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = op.getStaticLoopRanges();
   int64_t numParallelDims = op.getNumParallelLoops();
 
   // We should have reduction dimensions.
diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.cpp b/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.cpp
@@ -133,7 +133,7 @@ bool isMatmulOrBatchMatmul(linalg::LinalgOp linalgOp) {
   // Also exclude the case of matvec, which has only one non-unit parallel dim.
   // They should go down different pipelines.
   int nonUnitParallelDimCount = 0;
-  SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();
   FailureOr<mlir::linalg::ContractionDimensions> contractionDims =
       mlir::linalg::inferContractionDims(linalgOp);
   assert(succeeded(contractionDims) && "Could not infer contraction dims");
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp
@@ -196,7 +196,7 @@ static bool isMatvecLike(linalg::LinalgOp linalgOp) {
     return false;
 
   // One of the input should have all the parallel dimensions with size one.
-  SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();
   SmallVector<AffineMap> maps = linalgOp.getIndexingMapsArray();
   SmallVector<utils::IteratorType> iterators = linalgOp.getIteratorTypesArray();
 
diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/IR/TilingInterfaceImpl.cpp b/compiler/src/iree/compiler/Dialect/LinalgExt/IR/TilingInterfaceImpl.cpp
@@ -764,7 +764,7 @@ void FftOp::generateScalarImplWithoutCoeffBuf(OpBuilder &b, Location loc,
   // We will need exp(-2 * PI * j / m * I), compute "-2 * PI / m" for imag part
   // first.
   Value coeff = b.create<arith::ConstantFloatOp>(
-      loc, llvm::APFloat(static_cast<float>(-2 * acos(-1))), f32Type);
+      loc, f32Type, llvm::APFloat(static_cast<float>(-2 * acos(-1))));
   coeff = b.create<arith::DivFOp>(loc, coeff, indexToF32(b, loc, wholeSize));
 
   b.create<linalg::GenericOp>(
diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/SplitReduction.cpp b/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/SplitReduction.cpp
@@ -209,7 +209,7 @@ Value offsetParallelIndices(Location loc, RewriterBase &rewriter,
   SmallVector<utils::IteratorType> iterators(parallelIndicesRank,
                                              utils::IteratorType::parallel);
   Value mSplitVal = rewriter.create<arith::ConstantIntOp>(
-      loc, kDimParallelSize, parallelIndicesType.getElementType());
+      loc, parallelIndicesType.getElementType(), kDimParallelSize);
   return rewriter
       .create<linalg::GenericOp>(
           loc,
@@ -308,8 +308,8 @@ struct TopkSplitReductionPass final
 
     TopkSplitReductionControlFn splitReductionFn =
         [&](int64_t splitReductionDepth) -> int64_t {
-      SmallVector<int64_t, 4> reductionRatios(splitRatios.begin(),
-                                              splitRatios.end());
+      SmallVector<int64_t> reductionRatios(splitRatios.begin(),
+                                           splitRatios.end());
       if (splitReductionDepth >= reductionRatios.size()) {
         return -1;
       } else {
@@ -556,7 +556,7 @@ splitArgmaxReduction(RewriterBase &rewriter, linalg::GenericOp genericOp,
     insertSplitDimension = reductionDim + 1;
   }
 
-  SmallVector<int64_t, 4> loopRanges = genericOp.getStaticLoopRanges();
+  SmallVector<int64_t> loopRanges = genericOp.getStaticLoopRanges();
   int64_t reductionDimSize = loopRanges[reductionDim];
   if (ShapedType::isDynamic(reductionDimSize) ||
       reductionDimSize % ratio != 0) {
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp
@@ -144,7 +144,7 @@ static LogicalResult canonicalizeAssumeIntOp(AssumeIntOp op,
             rewriter.create<arith::ConstantIndexOp>(op.getLoc(), *fixedValue);
       } else {
         constantValue = rewriter.create<arith::ConstantIntOp>(
-            op.getLoc(), *fixedValue, result.getType());
+            op.getLoc(), result.getType(), *fixedValue);
       }
       rewriter.replaceAllUsesWith(result, constantValue);
       madeChange = true;
diff --git a/compiler/src/iree/compiler/DispatchCreation/FuseHorizontalContractions.cpp b/compiler/src/iree/compiler/DispatchCreation/FuseHorizontalContractions.cpp
@@ -179,8 +179,8 @@ static bool checkContractionOpEquivalence(MLIRContext *context, Operation *aOp,
     return false;
   }
 
-  SmallVector<int64_t, 4> aStaticDims = aLinalgOp.getStaticLoopRanges();
-  SmallVector<int64_t, 4> bStaticDims = bLinalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> aStaticDims = aLinalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bStaticDims = bLinalgOp.getStaticLoopRanges();
   if (bPermutationVector) {
     applyPermutationToVector(bStaticDims, bPermutationVector.value());
   }
diff --git a/compiler/src/iree/compiler/DispatchCreation/SetEncoding.cpp b/compiler/src/iree/compiler/DispatchCreation/SetEncoding.cpp
@@ -218,7 +218,7 @@ class SetContractionOpEncoding final
     // combination of `iteration_sizes` and `user_indexing_maps`, we can later
     // derive information such as the iteration size of the M/N dimensions of a
     // matmul-like operation for example.
-    FailureOr<SmallVector<int64_t, 4>> maybeIterationSizes =
+    FailureOr<SmallVector<int64_t>> maybeIterationSizes =
         linalgOp.getStaticLoopRanges();
     if (failed(maybeIterationSizes)) {
       return failure();
@@ -518,7 +518,7 @@ SmallVector<unsigned> getOperandsToPad(Operation *op) {
   int64_t parallelDimSize = 1;
   llvm::SmallSetVector<int32_t, 4> reductionDimsSet;
   reductionDimsSet.insert_range(reductionDims);
-  SmallVector<int64_t, 4> loopRanges = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> loopRanges = linalgOp.getStaticLoopRanges();
   for (auto [idx, dimSize] : llvm::enumerate(loopRanges)) {
     if (reductionDimsSet.contains(idx)) {
       // Bail if the reduction dimension is dynamic.
diff --git a/compiler/src/iree/compiler/GlobalOptimization/ConvertStridedContractionToContraction.cpp b/compiler/src/iree/compiler/GlobalOptimization/ConvertStridedContractionToContraction.cpp
@@ -56,7 +56,7 @@ class ConvertStridedContractionToContraction
     }
     if (inputMap.isProjectedPermutation())
       return failure();
-    SmallVector<int64_t, 4> staticShape = op.getStaticLoopRanges();
+    SmallVector<int64_t> staticShape = op.getStaticLoopRanges();
 
     llvm::SmallDenseMap<unsigned, int64_t> strides;
     SmallVector<AffineExpr> replacementExprs;
diff --git a/compiler/src/iree/compiler/GlobalOptimization/Utils.cpp b/compiler/src/iree/compiler/GlobalOptimization/Utils.cpp
@@ -142,7 +142,7 @@ Value sumReduceDimensionSubset(ImplicitLocOpBuilder &rewriter, Value val,
   // Create a zero-filled accumulator.
   Value initAcc =
       rewriter.create<tensor::EmptyOp>(staticSizes, accETy, dynSizes);
-  Value zeroInt = rewriter.create<arith::ConstantIntOp>(0, accETy).getResult();
+  Value zeroInt = rewriter.create<arith::ConstantIntOp>(accETy, 0).getResult();
   Value zeroAcc =
       rewriter.create<linalg::FillOp>(zeroInt, initAcc).getResult(0);
 
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp
@@ -137,8 +137,8 @@ struct ResourceReleaseOpPattern
   matchAndRewrite(IREE::Stream::ResourceReleaseOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     // Allocation tracking not supported in the inline HAL.
-    rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(op, 0,
-                                                      rewriter.getI1Type());
+    rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(op, rewriter.getI1Type(),
+                                                      0);
     return success();
   }
 };
@@ -150,8 +150,8 @@ struct ResourceIsTerminalOpPattern
   matchAndRewrite(IREE::Stream::ResourceIsTerminalOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     // Allocation tracking not supported in the inline HAL.
-    rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(op, 0,
-                                                      rewriter.getI1Type());
+    rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(op, rewriter.getI1Type(),
+                                                      0);
     return success();
   }
 };
diff --git a/compiler/src/iree/compiler/Preprocessing/Common/PadToIntrinsics.cpp b/compiler/src/iree/compiler/Preprocessing/Common/PadToIntrinsics.cpp
@@ -180,7 +180,7 @@ padConvOp(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
     return;
 
   // Check that conv has met conditions to go down mfma.
-  SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();
   FailureOr<mlir::linalg::ConvolutionDimensions> convolutionDims =
       mlir::linalg::inferConvolutionDims(linalgOp);
   assert(succeeded(convolutionDims) && "Could not infer contraction dims");
@@ -359,7 +359,7 @@ static void padContractionLikeOp(
   int64_t kDim = contractionDims->k.back();
 
   // If none of the shape is dynamic, we'd fallback to using pad to intrinsics.
-  SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();
+  SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();
   int64_t mSize = bounds[mDim];
   int64_t nSize = bounds[nDim];
   int64_t kSize = bounds[kDim];
diff --git a/third_party/llvm-project b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit fc016801216d186a9c0c8c288360da818c3968fe
+Subproject commit bb2fbc0142b099c9af922ce344a8563c83a5bc60
diff --git a/third_party/stablehlo b/third_party/stablehlo
@@ -1 +1 @@
-Subproject commit 5837b2a6ce192622d31625a41af67519607417a6
+Subproject commit 2ccffbb8ed6f6c5406828bdffbe62e6a2968f656

Original file line number	Diff line number	Diff line change
`@@ -551,7 +551,7 @@ static bool isNonMatvecContraction(Operation *op) {`
`551`	`551`	`if (!linalgOp) {`
`552`	`552`	`return false;`
`553`	`553`	`}`
`554`		`- SmallVector<int64_t, 4> bounds = linalgOp.getStaticLoopRanges();`
	`554`	`+ SmallVector<int64_t> bounds = linalgOp.getStaticLoopRanges();`
`555`	`555`	`FailureOr<mlir::linalg::ContractionDimensions> contractionDims =`
`556`	`556`	`mlir::linalg::inferContractionDims(linalgOp);`
`557`	`557`	`if (failed(contractionDims)) {`