Integrate LLVM at 2f925d75dee8b4012d747d889ac4bb1d8a31d5a0 (iree-org#19184)

Groverkss · giacs-epic · commit 94b88044e370 · 2024-12-04T14:52:51.000Z
Still carrying a revert for 1004865f1ca41a9581da8747f34b29862d3ebc3d Dropped the reverts on 3ad0148020ca91cc288bffd8ad36e25f7555a3bb and c02b8a01b7caf2e4ffe17a123f1bcf59192e4b39 after fixes upstream. Also carries a cherry pick for llvm/llvm-project#116650 Signed-off-by: Giacomo Serafini <179146510+giacs-epic@users.noreply.github.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp
@@ -139,13 +139,12 @@ moveScalarAndBindingUniformCode(vector::WarpExecuteOnLane0Op warpOp) {
     op->moveBefore(warpOp);
 }
 
-/// Pattern to convert InsertElement to broadcast, this is a workaround until
-/// MultiDimReduction distribution is supported.
-struct InsertElementToBroadcast final
-    : OpRewritePattern<vector::InsertElementOp> {
-  using OpRewritePattern<vector::InsertElementOp>::OpRewritePattern;
+/// Pattern to convert single element vector.insert to broadcast, this is a
+/// workaround until MultiDimReduction distribution is supported.
+struct InsertToBroadcast final : OpRewritePattern<vector::InsertOp> {
+  using OpRewritePattern::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(vector::InsertElementOp insertOp,
+  LogicalResult matchAndRewrite(vector::InsertOp insertOp,
                                 PatternRewriter &rewriter) const override {
     if (insertOp.getDestVectorType().getNumElements() != 1)
       return failure();
@@ -209,7 +208,7 @@ struct VectorReductionToGPUPass final
       vector::populateVectorMultiReductionLoweringPatterns(
           patterns, vector::VectorMultiReductionLowering::InnerReduction);
       // Add clean up patterns after lowering of multidimreduce lowering.
-      patterns.add<InsertElementToBroadcast>(ctx);
+      patterns.add<InsertToBroadcast>(ctx);
       vector::ShapeCastOp::getCanonicalizationPatterns(patterns, ctx);
       vector::BroadcastOp::getCanonicalizationPatterns(patterns, ctx);
       vector::ExtractOp::getCanonicalizationPatterns(patterns, ctx);
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
@@ -44,7 +44,7 @@ module {
 //   CHECK-DAG:   %[[TID:.*]] = gpu.thread_id  x
 //   CHECK-DAG:   %[[VCST:.*]] = arith.constant dense<0.000000e+00> : vector<1xf32>
 //       CHECK:   %[[F:.*]] = scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[V0:.*]] = %[[VCST]]) -> (vector<1xf32>) {
-//   CHECK-DAG:     %[[E:.*]] = vector.extractelement %[[V0]][%[[C0]] : index] : vector<1xf32>
+//   CHECK-DAG:     %[[E:.*]] = vector.extract %[[V0]][0] : f32 from vector<1xf32>
 //   CHECK-DAG:     %[[ID:.*]] = affine.apply
 //   CHECK-DAG:     %[[V1:.*]] = vector.transfer_read %{{.*}}[%{{.*}}, %[[ID]]], %{{.*}} {in_bounds = [true]} : memref<128x384xf32>, vector<1xf32>
 //       CHECK:     %[[S:.*]] = vector.extract %[[V1]][0] : f32 from vector<1xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir
@@ -12,9 +12,8 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x
 // CHECK-RESHAPE:     %[[EXPANDED:.+]] = tensor.expand_shape %[[IN]] {{\[}}[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 1, 32, 1, 8] : tensor<1x1x32x8xf32> into tensor<1x1x1x32x1x8xf32>
 // CHECK-RESHAPE:     %[[RESULT:.+]] = linalg.transpose ins(%[[EXPANDED]] : tensor<1x1x1x32x1x8xf32>) outs(%[[OUT]] : tensor<1x1x1x1x8x32xf32>) permutation = [0, 1, 2, 4, 5, 3]
 
-// CHECK:             %[[TILE:.+]] = tensor.extract_slice %[[IN]][0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> to tensor<32x8xf32>
-// CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32>
-// CHECK:             %[[TRANS:.+]] = linalg.transpose ins(%[[TILE]] : tensor<32x8xf32>) outs(%[[EMPTY]] : tensor<8x32xf32>) permutation = [1, 0]
+// CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<1x1x8x32xf32>
+// CHECK:             %[[TRANS:.+]] = linalg.transpose ins(%[[IN]] : tensor<1x1x32x8xf32>) outs(%[[EMPTY]] : tensor<1x1x8x32xf32>) permutation = [0, 1, 3, 2]
 // CHECK:             %[[RESULT:.+]] = tensor.insert_slice %[[TRANS]] into %[[OUT]][0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 8, 32] [1, 1, 1, 1, 1, 1]
 
 // CHECK-ALL:         return %[[RESULT]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
@@ -276,5 +276,5 @@ func.func @split_reduction_double_reduction_unsupported() attributes {hal.execut
 }
 
 // CHECK-LABEL:  func.func @split_reduction_double_reduction_unsupported()
-// CHECK:          vector.insertelement %{{.+}}, %{{.+}} : vector<4xi32>
-// CHECK-NOT:      vector.insertelement %{{.+}}, %{{.+}} : vector<1xi32>
+// CHECK:          vector.insert %{{.+}}, %{{.+}} : i32 into vector<4xi32>
+// CHECK-NOT:      vector.insert %{{.+}}, %{{.+}} : i32 into vector<1xi32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
@@ -106,7 +106,7 @@ hal.executable @i4_dequant_unit_matmul_f16 {
 //         CHECK: %[[VS1:.+]] = spirv.VectorShuffle [2 : i32, 3 : i32] %[[LD]]
 //         CHECK: spirv.Bitcast %[[VS1]] : vector<2xi32> to vector<4xf16>
 
-//         CHECK: spirv.GroupNonUniformFAdd "Subgroup" "Reduce" {{.*}} : f16
+//         CHECK: spirv.GroupNonUniformFAdd <Subgroup> <Reduce> {{.*}} : f16
 
 //         CHECK: spirv.mlir.selection
 
@@ -223,6 +223,6 @@ hal.executable @i4_dequant_matvec_f16_subgroup_64 {
 //         CHECK: %[[LD:.+]] = spirv.Load "Function" {{.*}} : vector<4xf16>
 //         CHECK: %[[RES:.+]] = spirv.Dot %[[LD]], %[[CSTVEC4XF16_1]] : vector<4xf16> -> f16
 
-//         CHECK: spirv.GroupNonUniformFAdd "Subgroup" "Reduce" %[[RES]] : f16
+//         CHECK: spirv.GroupNonUniformFAdd <Subgroup> <Reduce> %[[RES]] : f16
 
 //         CHECK: spirv.mlir.selection
diff --git a/third_party/llvm-project b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 8e7633e241e7f842bac99b68eee2830fd879bb7f
+Subproject commit b851fe938f6811e2e4ffe424809ee4b4a6e0a77e

Original file line number	Diff line number	Diff line change
`@@ -276,5 +276,5 @@ func.func @split_reduction_double_reduction_unsupported() attributes {hal.execut`
`276`	`276`	`}`
`277`	`277`
`278`	`278`	`// CHECK-LABEL: func.func @split_reduction_double_reduction_unsupported()`
`279`		`-// CHECK: vector.insertelement %{{.+}}, %{{.+}} : vector<4xi32>`
`280`		`-// CHECK-NOT: vector.insertelement %{{.+}}, %{{.+}} : vector<1xi32>`
	`279`	`+// CHECK: vector.insert %{{.+}}, %{{.+}} : i32 into vector<4xi32>`
	`280`	`+// CHECK-NOT: vector.insert %{{.+}}, %{{.+}} : i32 into vector<1xi32>`