[LLVM Pulldown] Another llvm version bump (#1072)

Garra1980 · web-flow · commit a95a408feca7 · 2025-05-09T11:27:59.000-05:00
* [LLVM Pulldown] Another llvm version bump

* fix pre-commit
diff --git a/build_tools/llvm_version.txt b/build_tools/llvm_version.txt
@@ -1 +1 @@
-bc1198719e206b2b6928ac437cb59107b45662d6
+c82e2f5c9ed08a270a1ec60bf7313af9c236ab98
diff --git a/lib/Dialect/XeTile/Transforms/BlockOpFallback.cpp b/lib/Dialect/XeTile/Transforms/BlockOpFallback.cpp
@@ -580,10 +580,10 @@ class XeTileBlockOpFallbackPass final
     analyzeAtomicRMWOp(op, convertToScatteredType);
     mlir::RewritePatternSet patterns(context);
     mlir::GreedyRewriteConfig config;
-    config.enableRegionSimplification =
-        mlir::GreedySimplifyRegionLevel::Disabled;
-    config.useTopDownTraversal = true;
-    config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+    config.setUseTopDownTraversal(true);
+    config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
     patterns.add<InitTileOpPattern>(context, uArchInterface,
                                     convertToScatteredType);
     patterns.add<LoadTileOpPattern, StoreTileOpPattern,
diff --git a/lib/Dialect/XeTile/Transforms/Blocking.cpp b/lib/Dialect/XeTile/Transforms/Blocking.cpp
@@ -1638,14 +1638,14 @@ class XeTileBlockingPass : public impl::XeTileBlockingBase<XeTileBlockingPass> {
     MLIRContext &context = getContext();
 
     GreedyRewriteConfig config;
-    config.strictMode = GreedyRewriteStrictness::ExistingOps;
+    config.setStrictness(GreedyRewriteStrictness::ExistingOps);
     // ops inside regions, e.g., body of scf.for, needs to be processed
     // before the op (e.g., scf.for) containing the region; otherwise
     // the blocking analysis result for region args will be destroyed
     // after scf.for is updated, leading to their users cannot be updated
     // correctly.
     mod.walk([&](Region *region) {
-      config.scope = region;
+      config.setScope(region);
       RewritePatternSet patterns(&context);
       populateXeTileBlockingPatterns(patterns, analysis);
       llvm::SmallVector<Operation *> ops;
diff --git a/lib/Dialect/XeTile/Transforms/Canonicalization.cpp b/lib/Dialect/XeTile/Transforms/Canonicalization.cpp
@@ -428,10 +428,10 @@ struct XeTileCanonicalizationPass final
     {
       mlir::RewritePatternSet patterns(context);
       mlir::GreedyRewriteConfig config;
-      config.enableRegionSimplification =
-          mlir::GreedySimplifyRegionLevel::Disabled;
-      config.useTopDownTraversal = true;
-      config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
+      config.setRegionSimplificationLevel(
+          mlir::GreedySimplifyRegionLevel::Disabled);
+      config.setUseTopDownTraversal(true);
+      config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
       patterns.add<VectorTransposeToXetileTransposeOpPattern,
                    VectorBroadcastToXetileBroadcastOpPattern,
                    VectorMultiReductionToXeTileReduce>(context);
@@ -543,10 +543,10 @@ struct XeTileCanonicalizationPass final
     {
       mlir::RewritePatternSet patterns(context);
       mlir::GreedyRewriteConfig config;
-      config.enableRegionSimplification =
-          mlir::GreedySimplifyRegionLevel::Disabled;
-      config.useTopDownTraversal = true;
-      config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
+      config.setRegionSimplificationLevel(
+          mlir::GreedySimplifyRegionLevel::Disabled);
+      config.setUseTopDownTraversal(true);
+      config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
       patterns.add<RemoveRedundantTransposeOpPattern>(context);
 
       if (failed(applyPatternsGreedily(getOperation(), std::move(patterns),
diff --git a/lib/Transforms/HoistTranspose.cpp b/lib/Transforms/HoistTranspose.cpp
@@ -169,10 +169,10 @@ struct HoistTransposePass final
 
     mlir::RewritePatternSet patterns(context);
     mlir::GreedyRewriteConfig config;
-    config.enableRegionSimplification =
-        mlir::GreedySimplifyRegionLevel::Disabled;
-    config.useTopDownTraversal = true;
-    config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+    config.setUseTopDownTraversal(true);
+    config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
     // TODO: Currently we only support hoisting TransposeOps before
     // ExtractStridedSliceOp. We may also want to support hoisting TransposeOps
     // before element-wise ops.
diff --git a/lib/Transforms/OptimizeTranspose.cpp b/lib/Transforms/OptimizeTranspose.cpp
@@ -953,9 +953,9 @@ struct OptimizeTransposePass final
     auto *context = &getContext();
     RewritePatternSet patterns(context);
     GreedyRewriteConfig config;
-    config.enableRegionSimplification = GreedySimplifyRegionLevel::Disabled;
-    config.useTopDownTraversal = true;
-    config.strictMode = GreedyRewriteStrictness::ExistingAndNewOps;
+    config.setRegionSimplificationLevel(GreedySimplifyRegionLevel::Disabled);
+    config.setUseTopDownTraversal(true);
+    config.setStrictness(GreedyRewriteStrictness::ExistingAndNewOps);
     patterns.add<TransposeRewritePattern>(context, analysis, uArchInterface);
     if (failed(applyPatternsGreedily(getOperation(), std::move(patterns),
                                      config))) {
diff --git a/test/Transforms/lit.local.cfg b/test/Transforms/lit.local.cfg
diff --git a/test/Transforms/vector-linearize.mlir b/test/Transforms/vector-linearize.mlir
@@ -258,21 +258,21 @@ func.func @test_vector_transpose_16x16(%arg: vector<16x16xf32>) -> vector<16x16x
 // CHECK:       %[[L3:.*]] = vector.load %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
 // CHECK:       %[[T3:.*]] = vector.shuffle %[[L3]], %[[L3]] [0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<4xf32>, vector<4xf32>
 // CHECK:       %[[R4:.*]] = vector.shuffle %[[R3]], %[[T3]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19] : vector<16xf32>, vector<16xf32>
-//
-// CHECK:       %[[S0:.*]] = vector.shuffle %[[R4]], %[[R4]] [0, 1, 2, 3] : vector<16xf32>, vector<16xf32>
+// CHECK:       %[[R5:.*]] = vector.shape_cast %[[R4]] : vector<16xf32> to vector<16xf32>
+// CHECK:       %[[S0:.*]] = vector.shuffle %[[R5]], %[[R5]] [0, 1, 2, 3] : vector<16xf32>, vector<16xf32>
 // CHECK:       vector.store %[[S0]], %{{.*}}[%[[C0]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
 //
-// CHECK:       %[[S1:.*]] = vector.shuffle %[[R4]], %[[R4]] [4, 5, 6, 7] : vector<16xf32>, vector<16xf32>
+// CHECK:       %[[S1:.*]] = vector.shuffle %[[R5]], %[[R5]] [4, 5, 6, 7] : vector<16xf32>, vector<16xf32>
 // CHECK:       %[[C1:.*]] = arith.constant 1 : index
 // CHECK:       %[[I1:.*]] = arith.addi %[[C0]], %[[C1]] : index
 // CHECK:       vector.store %[[S1]], %{{.*}}[%[[I1]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
 //
-// CHECK:       %[[S2:.*]] = vector.shuffle %[[R4]], %[[R4]] [8, 9, 10, 11] : vector<16xf32>, vector<16xf32>
+// CHECK:       %[[S2:.*]] = vector.shuffle %[[R5]], %[[R5]] [8, 9, 10, 11] : vector<16xf32>, vector<16xf32>
 // CHECK:       %[[C2:.*]] = arith.constant 2 : index
 // CHECK:       %[[I2:.*]] = arith.addi %[[C0]], %[[C2]] : index
 // CHECK:       vector.store %[[S2]], %{{.*}}[%[[I2]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
 //
-// CHECK:       %[[S3:.*]] = vector.shuffle %[[R4]], %[[R4]] [12, 13, 14, 15] : vector<16xf32>, vector<16xf32>
+// CHECK:       %[[S3:.*]] = vector.shuffle %[[R5]], %[[R5]] [12, 13, 14, 15] : vector<16xf32>, vector<16xf32>
 // CHECK:       %[[C3:.*]] = arith.constant 3 : index
 // CHECK:       %[[I3:.*]] = arith.addi %[[C0]], %[[C3]] : index
 // CHECK:       vector.store %[[S3]], %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
@@ -316,21 +316,21 @@ func.func @test_vector_store_load_4x4_f16(%buffer: memref<4x4xf16>) {
 // CHECK:       %[[L3:.*]] = vector.load %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
 // CHECK:       %[[T3:.*]] = vector.shuffle %[[L3]], %[[L3]] [0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<4xf16>, vector<4xf16>
 // CHECK:       %[[R4:.*]] = vector.shuffle %[[R3]], %[[T3]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19] : vector<16xf16>, vector<16xf16>
-//
-// CHECK:       %[[S0:.*]] = vector.shuffle %[[R4]], %[[R4]] [0, 1, 2, 3] : vector<16xf16>, vector<16xf16>
+// CHECK:       %[[R5:.*]] = vector.shape_cast %[[R4]] : vector<16xf16> to vector<16xf16>
+// CHECK:       %[[S0:.*]] = vector.shuffle %[[R5]], %[[R5]] [0, 1, 2, 3] : vector<16xf16>, vector<16xf16>
 // CHECK:       vector.store %[[S0]], %{{.*}}[%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
 //
-// CHECK:       %[[S1:.*]] = vector.shuffle %[[R4]], %[[R4]] [4, 5, 6, 7] : vector<16xf16>, vector<16xf16>
+// CHECK:       %[[S1:.*]] = vector.shuffle %[[R5]], %[[R5]] [4, 5, 6, 7] : vector<16xf16>, vector<16xf16>
 // CHECK:       %[[C1:.*]] = arith.constant 1 : index
 // CHECK:       %[[I1:.*]] = arith.addi %[[C0]], %[[C1]] : index
 // CHECK:       vector.store %[[S1]], %{{.*}}[%[[I1]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
 //
-// CHECK:       %[[S2:.*]] = vector.shuffle %[[R4]], %[[R4]] [8, 9, 10, 11] : vector<16xf16>, vector<16xf16>
+// CHECK:       %[[S2:.*]] = vector.shuffle %[[R5]], %[[R5]] [8, 9, 10, 11] : vector<16xf16>, vector<16xf16>
 // CHECK:       %[[C2:.*]] = arith.constant 2 : index
 // CHECK:       %[[I2:.*]] = arith.addi %[[C0]], %[[C2]] : index
 // CHECK:       vector.store %[[S2]], %{{.*}}[%[[I2]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
 //
-// CHECK:       %[[S3:.*]] = vector.shuffle %[[R4]], %[[R4]] [12, 13, 14, 15] : vector<16xf16>, vector<16xf16>
+// CHECK:       %[[S3:.*]] = vector.shuffle %[[R5]], %[[R5]] [12, 13, 14, 15] : vector<16xf16>, vector<16xf16>
 // CHECK:       %[[C3:.*]] = arith.constant 3 : index
 // CHECK:       %[[I3:.*]] = arith.addi %[[C0]], %[[C3]] : index
 // CHECK:       vector.store %[[S3]], %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
diff --git a/test/Transforms/xegpu-optimize-transpose.mlir b/test/Transforms/xegpu-optimize-transpose.mlir
@@ -123,17 +123,18 @@ func.func @test_scf_for_array_len(%arg0 : memref<64x64xf16>, %arg1 : vector<8x16
 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<64x64xf16>, %[[ARG1:[a-zA-Z0-9]+]]: vector<8x16xf16>, %[[ARG2:[a-zA-Z0-9]+]]: memref<64x64xf32>) {
 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
+// CHECK-DAG: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32>
 // CHECK: scf.for {{.*}} {
 // CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}, %[[C0]]] : memref<64x64xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
 // CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}, %[[C16]]] : memref<64x64xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
-// CHECK: %[[T3:.*]]:3 = scf.for {{.*}} iter_args(%[[ARG5:.*]] = %[[T2]], %[[ARG6:.*]] = %[[T0]], %[[ARG7:.*]] = %[[T1]]) -> (vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>) {
-// CHECK-DAG: %[[T5:.*]] = xegpu.load_nd %[[ARG6]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
-// CHECK-DAG: %[[T7:.*]] = xegpu.load_nd %[[ARG7]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
-// CHECK-DAG: %[[T9:.*]] = xegpu.update_nd_offset %[[ARG6]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
-// CHECK-DAG: %[[T10:.*]] = xegpu.update_nd_offset %[[ARG7]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
-// CHECK: scf.yield %{{.*}}, %[[T9]], %[[T10]] : vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
-// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG2]][{{.*}}] : memref<64x64xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
-// CHECK: xegpu.store_nd %[[T3]]#0, %{{.*}}  : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
+// CHECK: %[[T2:.*]]:3 = scf.for {{.*}} iter_args(%[[ARG5:.*]] = %[[CST]], %[[ARG6:.*]] = %[[T0]], %[[ARG7:.*]] = %[[T1]]) -> (vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>) {
+// CHECK-DAG: %[[T4:.*]] = xegpu.load_nd %[[ARG6]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
+// CHECK-DAG: %[[T6:.*]] = xegpu.load_nd %[[ARG7]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
+// CHECK-DAG: %[[T8:.*]] = xegpu.update_nd_offset %[[ARG6]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
+// CHECK-DAG: %[[T9:.*]] = xegpu.update_nd_offset %[[ARG7]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
+// CHECK: scf.yield %{{.*}}, %[[T8]], %[[T9]] : vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space =  global, array_length = 1 : i64, boundary_check = true>>
+// CHECK: %[[T3:.*]] = xegpu.create_nd_tdesc %[[ARG2]][{{.*}}] : memref<64x64xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
+// CHECK: xegpu.store_nd %[[T2]]#0, %{{.*}}  : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
 func.func @test_nested_scf_for_array_len(%arg0: memref<64x64xf16>, %arg1: vector<8x16xf16>, %arg2: memref<64x64xf32>) {
   %c0 = arith.constant 0 : index
   %c8 = arith.constant 8 : index
@@ -536,7 +537,7 @@ module attributes {gpu.container_module} {
     gpu.launch_func  @add_bf16_EC831D15_4614D61C_861::@add_bf16_EC831D15_4614D61C_861 blocks in (%c48, %c1, %c1) threads in (%c48, %c1, %c1)  args(%arg0 : memref<2x16x384x384xbf16>, %arg1 : memref<2x1x384x384xbf16>, %arg2 : memref<2x16x384x384xbf16>)
     return
   }
-  gpu.module @add_bf16_EC831D15_4614D61C_861 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Bfloat16ConversionINTEL, BFloat16TypeKHR, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR, VectorAnyINTEL, VectorComputeINTEL, RegionGroupINTEL], [SPV_EXT_shader_atomic_float_add, SPV_KHR_bfloat16, SPV_KHR_expect_assume, SPV_INTEL_bfloat16_conversion, SPV_INTEL_vector_compute, SPV_INTEL_region_group]>, api=OpenCL, #spirv.resource_limits<>>} {
+  gpu.module @add_bf16_EC831D15_4614D61C_861 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Bfloat16ConversionINTEL, BFloat16TypeKHR, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR, VectorAnyINTEL, VectorComputeINTEL], [SPV_EXT_shader_atomic_float_add, SPV_KHR_bfloat16, SPV_KHR_expect_assume, SPV_INTEL_bfloat16_conversion, SPV_INTEL_vector_compute]>, api=OpenCL, #spirv.resource_limits<>>} {
     gpu.func @add_bf16_EC831D15_4614D61C_861(%arg0: memref<2x16x384x384xbf16>, %arg1: memref<2x1x384x384xbf16>, %arg2: memref<2x16x384x384xbf16>) kernel attributes {VectorComputeFunctionINTEL, known_block_size = array<i32: 48, 1, 1>, known_grid_size = array<i32: 48, 1, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
       %c8 = arith.constant 8 : index
       %c2 = arith.constant 2 : index

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-bc1198719e206b2b6928ac437cb59107b45662d6`
	`1`	`+c82e2f5c9ed08a270a1ec60bf7313af9c236ab98`