Bump llvm to llvm/llvm-project@95d993a (#19811)

nirvedhmeshram · AWoloszyn · commit 1d97492f4932 · 2025-11-20T10:44:40.000-08:00
There are no llvm reverts/cherry-picks. Bump llvm to llvm/llvm-project@95d993a Bumps stablehlo to openxla/stablehlo@c27ba67 torch-mlir carries forward fixes from llvm/torch-mlir#3982 Additional forward fixes at iree-org/torch-mlir@fd34bc5 Some C++ API changes to `getStridesAndOffset` from llvm/llvm-project#123465 --------- Signed-off-by: Nirvedh Meshram <nirvedh@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
@@ -96,7 +96,7 @@ struct FlattenMemRefTypeConverter final : public TypeConverter {
     addConversion([](MemRefType type) -> std::optional<Type> {
       int64_t offset;
       SmallVector<int64_t> strides;
-      if (failed(getStridesAndOffset(type, strides, offset))) {
+      if (failed(type.getStridesAndOffset(strides, offset))) {
         return nullptr;
       }
       // Since the memref gets linearized, use a stride 1, offset 0.
@@ -354,7 +354,7 @@ static Value linearizeIndices(Value sourceValue, ValueRange indices,
   // dynamic.
   SmallVector<int64_t> strides;
   int64_t offset;
-  if (succeeded(getStridesAndOffset(sourceType, strides, offset))) {
+  if (succeeded(sourceType.getStridesAndOffset(strides, offset))) {
     // The memref itself might have an offset, but we should not account for it
     // when computing the linearization. The original memref might be
     // `memref<?x?xf32, strided<[?, ?], offset: ?>`
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -781,7 +781,7 @@ MemRefDescriptor HALDispatchABI::loadBinding(Operation *forOp, int64_t ordinal,
   // Construct the MemRefDescriptor type based on the information we have.
   // NOTE: we could use the binding length to clamp this/check that the
   // requested range is valid.
-  auto [strides, offset] = getStridesAndOffset(memRefType);
+  auto [strides, offset] = memRefType.getStridesAndOffset();
   if (memRefType.hasStaticShape() &&
       !llvm::any_of(strides, ShapedType::isDynamic) &&
       !ShapedType::isDynamic(offset)) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
@@ -392,7 +392,7 @@ class ConvertIREEBindingSubspanOp : public ConvertToLLVMPattern {
     // Add the byte offset.
     Value llvmBufferBasePtr = llvmBufferArg;
 
-    auto [strides, offset] = getStridesAndOffset(memrefType);
+    auto [strides, offset] = memrefType.getStridesAndOffset();
     if (memrefType.hasStaticShape() &&
         !llvm::any_of(strides, ShapedType::isDynamic) &&
         !ShapedType::isDynamic(offset)) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir
@@ -71,7 +71,7 @@ hal.executable @mma_fused_fp16 {
 //          CHECK:   llvm.br
 //  CHECK-COUNT-2:   nvvm.ldmatrix {{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)>
 //  CHECK-COUNT-2:   nvvm.mma.sync {{.*}} {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, shape = #nvvm.shape<m = 16, n = 8, k = 16>} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
-//  CHECK-COUNT-2:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
+//  CHECK-COUNT-2:   nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg, %{{.*}} : !llvm.ptr<3>, !llvm.ptr<1>, i32
 //          CHECK:   nvvm.cp.async.commit.group
 //          CHECK:   nvvm.cp.async.wait.group 2
 //  CHECK-COUNT-2:   nvvm.ldmatrix {{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)>
@@ -158,7 +158,7 @@ hal.executable @mma_fused_f32 {
 //          CHECK:   nvvm.ldmatrix{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)>
 //  CHECK-COUNT-4:   llvm.extractvalue{{.*}} : !llvm.struct<(i32, i32, i32, i32)>
 //  CHECK-COUNT-2:   nvvm.mma.sync {{.*}} {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<tf32>, multiplicandBPtxType = #nvvm.mma_type<tf32>, shape = #nvvm.shape<m = 16, n = 8, k = 8>} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)>
-//  CHECK-COUNT-2:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
+//  CHECK-COUNT-2:   nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg, %{{.*}} : !llvm.ptr<3>, !llvm.ptr<1>, i32
 //          CHECK:   nvvm.cp.async.commit.group
 //          CHECK:   nvvm.cp.async.wait.group 2
 //          CHECK:   nvvm.ldmatrix{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
@@ -451,7 +451,7 @@ hal.executable @mma_fused {
 //           SM80:   nvvm.cp.async.wait.group 3
 //   SM80-COUNT-4:   nvvm.wmma.load{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)
 //   SM80-COUNT-2:   nvvm.wmma.mma
-//   SM80-COUNT-2:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
+//   SM80-COUNT-2:   nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg, %{{.*}} : !llvm.ptr<3>, !llvm.ptr<1>, i32
 //           SM80:   nvvm.cp.async.commit.group
 //           SM80:   llvm.br
 //       SM80-NOT:   nvvm.wmma.mma
@@ -529,7 +529,7 @@ hal.executable @mma_fused_fp16 {
 //           SM80:   nvvm.cp.async.wait.group 3
 //   SM80-COUNT-2:   nvvm.wmma.load{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)
 //   SM80-COUNT-1:   nvvm.wmma.mma
-//   SM80-COUNT-2:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
+//   SM80-COUNT-2:   nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg, %{{.*}} : !llvm.ptr<3>, !llvm.ptr<1>, i32
 //           SM80:   nvvm.cp.async.commit.group
 //           SM80:   llvm.br
 //       SM80-NOT:   nvvm.wmma.mma
@@ -602,7 +602,7 @@ hal.executable @mma_fused_fp16 {
 //           SM80:   nvvm.cp.async.wait.group 3
 //   SM80-COUNT-4:   nvvm.wmma.load{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)
 //   SM80-COUNT-2:   nvvm.wmma.mma
-//   SM80-COUNT-2:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
+//   SM80-COUNT-2:   nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg, %{{.*}} : !llvm.ptr<3>, !llvm.ptr<1>, i32
 //           SM80:   nvvm.cp.async.commit.group
 //           SM80:   llvm.br
 //       SM80-NOT:   nvvm.wmma.mma
@@ -670,7 +670,7 @@ hal.executable @mma_fused_fp16 {
 //           SM80:   nvvm.cp.async.wait.group 3
 //   SM80-COUNT-4:   nvvm.wmma.load{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)
 //   SM80-COUNT-2:   nvvm.wmma.mma
-//           SM80:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
+//           SM80:   nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg, %{{.*}} : !llvm.ptr<3>, !llvm.ptr<1>, i32
 //           SM80:   nvvm.cp.async.commit.group
 //           SM80:   llvm.br
 //       SM80-NOT:   nvvm.wmma.mma
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
@@ -183,9 +183,9 @@ hal.executable @ceildiv_expand_dispatch {
 //   CDNA3-LABEL: hal.executable public @ceildiv_expand_dispatch
 //         CDNA3:   hal.executable.variant public @rocm
 //     CDNA3-NOT:     arith.ceildivsi
-// CDNA3-COUNT-1:     llvm.select {{.*}} : i1, i32
-// CDNA3-COUNT-2:     llvm.sdiv {{.*}} : i32
-// CDNA3-COUNT-4:     llvm.icmp {{.*}} : i32
-// CDNA3-COUNT-2:     llvm.and {{.*}} : i1
-// CDNA3-COUNT-1:     llvm.or {{.*}} : i1
-// CDNA3-COUNT-1:     llvm.select {{.*}} : i1, i32
+// CDNA3-COUNT-1:     llvm.select {{.*}} : vector<1xi1>, vector<1xi32>
+// CDNA3-COUNT-2:     llvm.sdiv {{.*}} : vector<1xi32>
+// CDNA3-COUNT-4:     llvm.icmp {{.*}} : vector<1xi32>
+// CDNA3-COUNT-2:     llvm.and {{.*}} : vector<1xi1>
+// CDNA3-COUNT-1:     llvm.or {{.*}} : vector<1xi1>
+// CDNA3-COUNT-1:     llvm.select {{.*}} : vector<1xi1>, vector<1xi32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_lowering.mlir
@@ -12,7 +12,8 @@ module {
 // CHECK-LABEL: func.func @broadcast_read_lowering
 //  CHECK-SAME: (%[[ARG0:.+]]: memref<4096x32xf16>)
 //  CHECK: %[[INIT:.+]] = arith.constant dense<0.000000e+00> : vector<1x8xf16>
-//  CHECK: %[[ELEM:.+]] = memref.load %[[ARG0]]{{.*}} : memref<4096x32xf16>
+//  CHECK: %[[LOAD:.+]] = vector.load %[[ARG0]]{{.*}} : memref<4096x32xf16>
+//  CHECK: %[[ELEM:.+]] = vector.extract %[[LOAD]][0] : f16 from vector<1xf16>
 //  CHECK: %[[SPLAT:.+]] = vector.splat %[[ELEM]] : vector<8xf16>
 //  CHECK: %[[INSERT:.+]] = vector.insert %[[SPLAT]], %[[INIT]] [0] : vector<8xf16> into vector<1x8xf16>
 //  CHECK: return %[[INSERT]]
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerLinalgMicrokernels.cpp b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerLinalgMicrokernels.cpp
@@ -94,7 +94,7 @@ bool verifyMemRefInnerDimsContiguousRowMajor(MemRefType type) {
     return true;
   }
 
-  if (failed(mlir::getStridesAndOffset(type, strides, offset))) {
+  if (failed(type.getStridesAndOffset(strides, offset))) {
     return false;
   }
 
diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.cpp
@@ -58,7 +58,7 @@ static Value getByteOffsetForIndices(OpBuilder &builder, Location loc,
   }
   SmallVector<int64_t> strides;
   int64_t offset;
-  if (failed(getStridesAndOffset(memrefType, strides, offset)) ||
+  if (failed(memrefType.getStridesAndOffset(strides, offset)) ||
       strides[0] != 1) {
     emitError(loc, "expected memref stride 1");
     return {};
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
@@ -346,12 +346,12 @@ def Util_AlignOp : Util_PureOp<"align", [
   }];
 
   let arguments = (ins
-    SignlessIntegerLike:$value,
-    SignlessIntegerLike:$alignment
+    SignlessIntegerOrIndexLike:$value,
+    SignlessIntegerOrIndexLike:$alignment
   );
 
   let results = (outs
-    SignlessIntegerLike:$result
+    SignlessIntegerOrIndexLike:$result
   );
 
   let assemblyFormat = [{
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputOps.td b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputOps.td
@@ -873,12 +873,12 @@ def IREEInput_AlignOp : IREEInput_PureOp<"align", [
   }];
 
   let arguments = (ins
-    SignlessIntegerLike:$value,
-    SignlessIntegerLike:$alignment
+    SignlessIntegerOrIndexLike:$value,
+    SignlessIntegerOrIndexLike:$alignment
   );
 
   let results = (outs
-    SignlessIntegerLike:$result
+    SignlessIntegerOrIndexLike:$result
   );
 
   let assemblyFormat = [{
diff --git a/tests/e2e/tosa_ops/pad.mlir b/tests/e2e/tosa_ops/pad.mlir
@@ -1,23 +1,23 @@
 func.func @pad_1D_test() {
     %0 = util.unfoldable_constant dense<42> : tensor<2xi32>
-    %1 = "tosa.const"() { value = dense<[3, 2]> : tensor<2xi32> } : ()  -> (tensor<2xi32>)
-    %result = tosa.pad %0, %1 : (tensor<2xi32>, tensor<2xi32>) -> (tensor<7xi32>)
+    %1 = tosa.const_shape { value = dense<[3, 2]> : tensor<2xindex> } : ()  -> !tosa.shape<2>
+    %result = tosa.pad %0, %1 : (tensor<2xi32>, !tosa.shape<2>) -> (tensor<7xi32>)
     check.expect_eq_const(%result, dense<[0, 0, 0, 42, 42, 0, 0]> : tensor<7xi32>) : tensor<7xi32>
     return
 }
 
 func.func @pad_2D_test() {
     %0 = util.unfoldable_constant dense<42> : tensor<2x2xi32>
-    %1 = "tosa.const"() { value = dense<[1, 1, 1, 1]> : tensor<4xi32> } : ()  -> (tensor<4xi32>)
-    %result = tosa.pad %0, %1 : (tensor<2x2xi32>, tensor<4xi32>) -> (tensor<4x4xi32>)
+    %1 = tosa.const_shape { value = dense<[1, 1, 1, 1]> : tensor<4xindex> } : ()  -> !tosa.shape<4>
+    %result = tosa.pad %0, %1 : (tensor<2x2xi32>, !tosa.shape<4>) -> (tensor<4x4xi32>)
     check.expect_eq_const(%result, dense<[[0, 0, 0, 0], [0, 42, 42, 0], [0, 42, 42, 0], [0, 0, 0, 0]]> : tensor<4x4xi32>) : tensor<4x4xi32>
     return
 }
 
 func.func @pad_3D_test() {
     %0 = util.unfoldable_constant dense<42> : tensor<1x1x2xi32>
-    %1 = "tosa.const"() { value = dense<[0, 1, 1, 0, 0, 0]> : tensor<6xi32> } : ()  -> (tensor<6xi32>)
-    %result = tosa.pad %0, %1 : (tensor<1x1x2xi32>, tensor<6xi32>) -> (tensor<2x2x2xi32>)
+    %1 = tosa.const_shape { value = dense<[0, 1, 1, 0, 0, 0]> : tensor<6xindex> } : ()  -> !tosa.shape<6>
+    %result = tosa.pad %0, %1 : (tensor<1x1x2xi32>, !tosa.shape<6>) -> (tensor<2x2x2xi32>)
     check.expect_eq_const(%result, dense<[[[0, 0], [42, 42]], [[0, 0], [0, 0]]]> : tensor<2x2x2xi32>) : tensor<2x2x2xi32>
     return
 }
diff --git a/third_party/llvm-project b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 47f99c8d32e71911d9d2a880bb66d8e98fa1b0cf
+Subproject commit 95d993a838863269dc1b90de3808c1e40ac6d5f2
diff --git a/third_party/stablehlo b/third_party/stablehlo
@@ -1 +1 @@
-Subproject commit 23d7f60f4f604f9d0ee89023e333422d996537fa
+Subproject commit c27ba678a712a401e4a6db75ec0ef9e1ce9e1777
diff --git a/third_party/torch-mlir b/third_party/torch-mlir
@@ -1 +1 @@
-Subproject commit ba00913d21df03a2c269ef74e682441a2569aca2
+Subproject commit fd34bc575c7b6e88d3283b74e83842aae3f16c13

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ bool verifyMemRefInnerDimsContiguousRowMajor(MemRefType type) {`
`94`	`94`	`return true;`
`95`	`95`	`}`
`96`	`96`
`97`		`- if (failed(mlir::getStridesAndOffset(type, strides, offset))) {`
	`97`	`+ if (failed(type.getStridesAndOffset(strides, offset))) {`
`98`	`98`	`return false;`
`99`	`99`	`}`
`100`	`100`
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ static Value getByteOffsetForIndices(OpBuilder &builder, Location loc,`
`58`	`58`	`}`
`59`	`59`	`SmallVector<int64_t> strides;`
`60`	`60`	`int64_t offset;`
`61`		`- if (failed(getStridesAndOffset(memrefType, strides, offset)) \|\|`
	`61`	`+ if (failed(memrefType.getStridesAndOffset(strides, offset)) \|\|`
`62`	`62`	`strides[0] != 1) {`
`63`	`63`	`emitError(loc, "expected memref stride 1");`
`64`	`64`	`return {};`