Integrate llvm/llvm-project@ab10f08 (#23683)

RattataKing · web-flow · commit 07fb538f5fa3 · 2026-03-06T19:22:32.000-05:00
Integrate up to llvm/llvm-project@ab10f08 Reapply: - llvm/llvm-project#183395, it's reverted on upstream: llvm/llvm-project@f2cdf3f
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp
@@ -229,7 +229,7 @@ struct VectorReductionToGPUPass final
     // TODO: Remove once MultiDimReduce is supported by distribute patterns.
     {
       RewritePatternSet patterns(ctx);
-      vector::populateVectorMultiReductionReorderAndExpandPatterns(
+      vector::populateVectorMultiReductionReorderPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerReduction);
       vector::populateVectorMultiReductionFlatteningPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerReduction);
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution_multi_reduce.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution_multi_reduce.mlir
@@ -155,18 +155,16 @@ builtin.module attributes { transform.with_named_sequence } {
 // Thread reduction
 // CHECK: %[[THREAD_RED0:.+]] = gpu.subgroup_reduce  maximumf %{{.*}} cluster(size = 4, stride = 16) : (f32) -> f32
 // CHECK: %[[THREAD_RED2:.+]] = gpu.subgroup_reduce  maximumf %{{.*}} cluster(size = 4, stride = 16) : (f32) -> f32
-// CHECK: %[[THREAD_RED3:.+]] = vector.from_elements %[[THREAD_RED0]], %[[THREAD_RED2]] : vector<2xf32>
-// CHECK: %[[THREAD_RED4:.+]] = vector.shape_cast %[[THREAD_RED3]] : vector<2xf32> to vector<2x1x1xf32>
 // Subgroup reduction
 // CHECK-DAG: %[[ALLOC:.+]] = memref.alloc() : memref<32x2xf32, #gpu.address_space<workgroup>>
 // CHECK: gpu.barrier memfence [#gpu.address_space<workgroup>]
 // CHECK-DAG: %[[SGID:.+]]:3 = affine.delinearize_index %thread_id_x into (2, 64)
 // CHECK-DAG: %[[TIDX:.+]]:2 = affine.delinearize_index %thread_id_x into (16)
-// CHECK-DAG: %[[EXTRACT0:.+]] = vector.extract %[[THREAD_RED4]][0] : vector<1x1xf32> from vector<2x1x1xf32>
-// CHECK-DAG: %[[EXTRACT1:.+]] = vector.extract %[[THREAD_RED4]][1] : vector<1x1xf32> from vector<2x1x1xf32>
+// CHECK-DAG: %[[BROADCAST0:.+]] = vector.broadcast %[[THREAD_RED0]] : f32 to vector<1x1xf32>
+// CHECK-DAG: vector.transfer_write %[[BROADCAST0]], %[[ALLOC]][%[[TIDX]]#1, %[[SGID]]#1]
 // CHECK-DAG: %[[TIDX1:.+]] = affine.linearize_index disjoint [%c1, %[[TIDX]]#1] by (2, 16) : index
-// CHECK-DAG: vector.transfer_write %[[EXTRACT0]], %[[ALLOC]][%[[TIDX]]#1, %[[SGID]]#1]
-// CHECK-DAG: vector.transfer_write %[[EXTRACT1]], %[[ALLOC]][%[[TIDX1]], %[[SGID]]#1]
+// CHECK-DAG: %[[BROADCAST1:.+]] = vector.broadcast %[[THREAD_RED2]] : f32 to vector<1x1xf32>
+// CHECK-DAG: vector.transfer_write %[[BROADCAST1]], %[[ALLOC]][%[[TIDX1]], %[[SGID]]#1]
 // CHECK: gpu.barrier memfence [#gpu.address_space<workgroup>]
 // CHECK-DAG: %[[BATCH0:.+]]:3 = affine.delinearize_index %thread_id_x into (2, 16) : index, index, index
 // CHECK-DAG: %[[SG_READ0:.+]] = vector.transfer_read %alloc[%[[BATCH0]]#2, %[[BATCH0]]#1], %{{.*}} : memref<32x2xf32, #gpu.address_space<workgroup>>, vector<1x1xf32>
@@ -177,9 +175,8 @@ builtin.module attributes { transform.with_named_sequence } {
 // CHECK-DAG: %[[RED0:.+]] = gpu.subgroup_reduce  maximumf %[[DISTR0]] cluster(size = 2, stride = 16) : (f32) -> f32
 // CHECK-DAG: %[[DISTR1:.+]] = vector.extract %[[SG_READ1]][0, 0] : f32 from vector<1x1xf32>
 // CHECK-DAG: %[[RED1:.+]] = gpu.subgroup_reduce  maximumf %[[DISTR1]] cluster(size = 2, stride = 16) : (f32) -> f32
-// CHECK-DAG: %[[INS:.+]] = vector.from_elements %[[RED0]], %[[RED1]] : vector<2xf32>
-// CHECK-DAG: %[[CAST:.+]] = vector.shape_cast %[[INS]] : vector<2xf32> to vector<2x1x1xf32>
-// CHECK-DAG: arith.maximumf %[[CAST]], %[[ACC]] : vector<2x1x1xf32>
+// CHECK-DAG: %[[INS:.+]] = vector.from_elements %[[RED0]], %[[RED1]] : vector<2x1x1xf32>
+// CHECK-DAG: arith.maximumf %[[INS]], %[[ACC]] : vector<2x1x1xf32>
 
 // -----
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVirtualVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVirtualVectorLowering.cpp
@@ -90,7 +90,7 @@ void LLVMCPUVirtualVectorLoweringPass::runOnOperation() {
     vector::populateScalarVectorTransferLoweringPatterns(
         patterns, /*benefit=*/1, /*allowMultipleUses=*/true);
     vector::populateVectorTransferPermutationMapLoweringPatterns(patterns);
-    vector::populateVectorMultiReductionReorderAndExpandPatterns(
+    vector::populateVectorMultiReductionReorderPatterns(
         patterns, vectorMultiReductionLowering);
     vector::populateVectorMultiReductionFlatteningPatterns(
         patterns, vectorMultiReductionLowering);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp
@@ -607,7 +607,7 @@ struct LLVMGPUVectorLoweringPass final
       vector::populateVectorGatherLoweringPatterns(contractLoweringPatterns);
       vector::populateVectorMaskOpLoweringPatterns(contractLoweringPatterns);
       vector::populateVectorShapeCastLoweringPatterns(contractLoweringPatterns);
-      vector::populateVectorMultiReductionReorderAndExpandPatterns(
+      vector::populateVectorMultiReductionReorderPatterns(
           contractLoweringPatterns,
           vector::VectorMultiReductionLowering::InnerReduction);
       vector::populateVectorMultiReductionFlatteningPatterns(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
@@ -500,7 +500,7 @@ static void populateMultiReductionLoweringPatterns(Operation *target,
                                                    PatternBenefit benefit) {
   assert(target->hasTrait<OpTrait::IsIsolatedFromAbove>());
 
-  vector::populateVectorMultiReductionReorderAndExpandPatterns(
+  vector::populateVectorMultiReductionReorderPatterns(
       patterns, vector::VectorMultiReductionLowering::InnerReduction, benefit);
   vector::populateVectorMultiReductionFlatteningPatterns(
       patterns, vector::VectorMultiReductionLowering::InnerReduction, benefit);
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVFinalVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVFinalVectorLowering.cpp
@@ -80,7 +80,7 @@ class SPIRVFinalVectorLoweringPass final
       vector::populateVectorBroadcastLoweringPatterns(patterns);
       vector::populateVectorContractLoweringPatterns(
           patterns, options.vectorContractLowering);
-      vector::populateVectorMultiReductionReorderAndExpandPatterns(
+      vector::populateVectorMultiReductionReorderPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerParallel);
       vector::populateVectorMultiReductionFlatteningPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerParallel);
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVInitialVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVInitialVectorLowering.cpp
@@ -391,7 +391,7 @@ class SPIRVInitialLoweringPass final
         return WalkResult::advance();
       });
       RewritePatternSet patterns(context);
-      vector::populateVectorMultiReductionReorderAndExpandPatterns(
+      vector::populateVectorMultiReductionReorderPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerParallel);
       vector::populateVectorMultiReductionFlatteningPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerParallel);
diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/IR/test/invalid.mlir b/compiler/src/iree/compiler/Dialect/LinalgExt/IR/test/invalid.mlir
@@ -1058,7 +1058,7 @@ func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf
 // -----
 
 // duplicate element in `outer_dims_perm`, fail.
-func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<256x128xf32> {
   // expected-error@+1 {{invalid outer_dims_perm vector}}
   %0 = iree_linalg_ext.unpack %output outer_dims_perm = [1, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %input : (tensor<8x8x32x16xf32> tensor<256x128xf32>) -> tensor<256x128xf32>
   return %0 : tensor<256x128xf32>
@@ -1067,7 +1067,7 @@ func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf
 // -----
 
 // `outer_dims_perm` is out of bound.
-func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<256x128xf32> {
   // expected-error@+1 {{invalid outer_dims_perm vector}}
   %0 = iree_linalg_ext.unpack %output outer_dims_perm = [2, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %input : (tensor<8x8x32x16xf32> tensor<256x128xf32>) -> tensor<256x128xf32>
   return %0 : tensor<256x128xf32>
diff --git a/third_party/llvm-project b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 0a75a21945e1b981dc223abcd8a8ade3e34d983d
+Subproject commit ab10f0848b22417afc6352051b6e26c7704084d2

Original file line number	Diff line number	Diff line change
`@@ -229,7 +229,7 @@ struct VectorReductionToGPUPass final`
`229`	`229`	`// TODO: Remove once MultiDimReduce is supported by distribute patterns.`
`230`	`230`	`{`
`231`	`231`	`RewritePatternSet patterns(ctx);`
`232`		`- vector::populateVectorMultiReductionReorderAndExpandPatterns(`
	`232`	`+ vector::populateVectorMultiReductionReorderPatterns(`
`233`	`233`	`patterns, vector::VectorMultiReductionLowering::InnerReduction);`
`234`	`234`	`vector::populateVectorMultiReductionFlatteningPatterns(`
`235`	`235`	`patterns, vector::VectorMultiReductionLowering::InnerReduction);`