Skip to content

Commit a95a408

Browse files
authored
[LLVM Pulldown] Another llvm version bump (#1072)
* [LLVM Pulldown] Another llvm version bump * fix pre-commit
1 parent 422c2a5 commit a95a408

File tree

9 files changed

+42
-47
lines changed

9 files changed

+42
-47
lines changed

build_tools/llvm_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
bc1198719e206b2b6928ac437cb59107b45662d6
1+
c82e2f5c9ed08a270a1ec60bf7313af9c236ab98

lib/Dialect/XeTile/Transforms/BlockOpFallback.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -580,10 +580,10 @@ class XeTileBlockOpFallbackPass final
580580
analyzeAtomicRMWOp(op, convertToScatteredType);
581581
mlir::RewritePatternSet patterns(context);
582582
mlir::GreedyRewriteConfig config;
583-
config.enableRegionSimplification =
584-
mlir::GreedySimplifyRegionLevel::Disabled;
585-
config.useTopDownTraversal = true;
586-
config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
583+
config.setRegionSimplificationLevel(
584+
mlir::GreedySimplifyRegionLevel::Disabled);
585+
config.setUseTopDownTraversal(true);
586+
config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
587587
patterns.add<InitTileOpPattern>(context, uArchInterface,
588588
convertToScatteredType);
589589
patterns.add<LoadTileOpPattern, StoreTileOpPattern,

lib/Dialect/XeTile/Transforms/Blocking.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,14 +1638,14 @@ class XeTileBlockingPass : public impl::XeTileBlockingBase<XeTileBlockingPass> {
16381638
MLIRContext &context = getContext();
16391639

16401640
GreedyRewriteConfig config;
1641-
config.strictMode = GreedyRewriteStrictness::ExistingOps;
1641+
config.setStrictness(GreedyRewriteStrictness::ExistingOps);
16421642
// ops inside regions, e.g., body of scf.for, needs to be processed
16431643
// before the op (e.g., scf.for) containing the region; otherwise
16441644
// the blocking analysis result for region args will be destroyed
16451645
// after scf.for is updated, leading to their users cannot be updated
16461646
// correctly.
16471647
mod.walk([&](Region *region) {
1648-
config.scope = region;
1648+
config.setScope(region);
16491649
RewritePatternSet patterns(&context);
16501650
populateXeTileBlockingPatterns(patterns, analysis);
16511651
llvm::SmallVector<Operation *> ops;

lib/Dialect/XeTile/Transforms/Canonicalization.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,10 @@ struct XeTileCanonicalizationPass final
428428
{
429429
mlir::RewritePatternSet patterns(context);
430430
mlir::GreedyRewriteConfig config;
431-
config.enableRegionSimplification =
432-
mlir::GreedySimplifyRegionLevel::Disabled;
433-
config.useTopDownTraversal = true;
434-
config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
431+
config.setRegionSimplificationLevel(
432+
mlir::GreedySimplifyRegionLevel::Disabled);
433+
config.setUseTopDownTraversal(true);
434+
config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
435435
patterns.add<VectorTransposeToXetileTransposeOpPattern,
436436
VectorBroadcastToXetileBroadcastOpPattern,
437437
VectorMultiReductionToXeTileReduce>(context);
@@ -543,10 +543,10 @@ struct XeTileCanonicalizationPass final
543543
{
544544
mlir::RewritePatternSet patterns(context);
545545
mlir::GreedyRewriteConfig config;
546-
config.enableRegionSimplification =
547-
mlir::GreedySimplifyRegionLevel::Disabled;
548-
config.useTopDownTraversal = true;
549-
config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
546+
config.setRegionSimplificationLevel(
547+
mlir::GreedySimplifyRegionLevel::Disabled);
548+
config.setUseTopDownTraversal(true);
549+
config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
550550
patterns.add<RemoveRedundantTransposeOpPattern>(context);
551551

552552
if (failed(applyPatternsGreedily(getOperation(), std::move(patterns),

lib/Transforms/HoistTranspose.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,10 @@ struct HoistTransposePass final
169169

170170
mlir::RewritePatternSet patterns(context);
171171
mlir::GreedyRewriteConfig config;
172-
config.enableRegionSimplification =
173-
mlir::GreedySimplifyRegionLevel::Disabled;
174-
config.useTopDownTraversal = true;
175-
config.strictMode = mlir::GreedyRewriteStrictness::ExistingAndNewOps;
172+
config.setRegionSimplificationLevel(
173+
mlir::GreedySimplifyRegionLevel::Disabled);
174+
config.setUseTopDownTraversal(true);
175+
config.setStrictness(mlir::GreedyRewriteStrictness::ExistingAndNewOps);
176176
// TODO: Currently we only support hoisting TransposeOps before
177177
// ExtractStridedSliceOp. We may also want to support hoisting TransposeOps
178178
// before element-wise ops.

lib/Transforms/OptimizeTranspose.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -953,9 +953,9 @@ struct OptimizeTransposePass final
953953
auto *context = &getContext();
954954
RewritePatternSet patterns(context);
955955
GreedyRewriteConfig config;
956-
config.enableRegionSimplification = GreedySimplifyRegionLevel::Disabled;
957-
config.useTopDownTraversal = true;
958-
config.strictMode = GreedyRewriteStrictness::ExistingAndNewOps;
956+
config.setRegionSimplificationLevel(GreedySimplifyRegionLevel::Disabled);
957+
config.setUseTopDownTraversal(true);
958+
config.setStrictness(GreedyRewriteStrictness::ExistingAndNewOps);
959959
patterns.add<TransposeRewritePattern>(context, analysis, uArchInterface);
960960
if (failed(applyPatternsGreedily(getOperation(), std::move(patterns),
961961
config))) {

test/Transforms/lit.local.cfg

Lines changed: 0 additions & 6 deletions
This file was deleted.

test/Transforms/vector-linearize.mlir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -258,21 +258,21 @@ func.func @test_vector_transpose_16x16(%arg: vector<16x16xf32>) -> vector<16x16x
258258
// CHECK: %[[L3:.*]] = vector.load %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
259259
// CHECK: %[[T3:.*]] = vector.shuffle %[[L3]], %[[L3]] [0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<4xf32>, vector<4xf32>
260260
// CHECK: %[[R4:.*]] = vector.shuffle %[[R3]], %[[T3]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19] : vector<16xf32>, vector<16xf32>
261-
//
262-
// CHECK: %[[S0:.*]] = vector.shuffle %[[R4]], %[[R4]] [0, 1, 2, 3] : vector<16xf32>, vector<16xf32>
261+
// CHECK: %[[R5:.*]] = vector.shape_cast %[[R4]] : vector<16xf32> to vector<16xf32>
262+
// CHECK: %[[S0:.*]] = vector.shuffle %[[R5]], %[[R5]] [0, 1, 2, 3] : vector<16xf32>, vector<16xf32>
263263
// CHECK: vector.store %[[S0]], %{{.*}}[%[[C0]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
264264
//
265-
// CHECK: %[[S1:.*]] = vector.shuffle %[[R4]], %[[R4]] [4, 5, 6, 7] : vector<16xf32>, vector<16xf32>
265+
// CHECK: %[[S1:.*]] = vector.shuffle %[[R5]], %[[R5]] [4, 5, 6, 7] : vector<16xf32>, vector<16xf32>
266266
// CHECK: %[[C1:.*]] = arith.constant 1 : index
267267
// CHECK: %[[I1:.*]] = arith.addi %[[C0]], %[[C1]] : index
268268
// CHECK: vector.store %[[S1]], %{{.*}}[%[[I1]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
269269
//
270-
// CHECK: %[[S2:.*]] = vector.shuffle %[[R4]], %[[R4]] [8, 9, 10, 11] : vector<16xf32>, vector<16xf32>
270+
// CHECK: %[[S2:.*]] = vector.shuffle %[[R5]], %[[R5]] [8, 9, 10, 11] : vector<16xf32>, vector<16xf32>
271271
// CHECK: %[[C2:.*]] = arith.constant 2 : index
272272
// CHECK: %[[I2:.*]] = arith.addi %[[C0]], %[[C2]] : index
273273
// CHECK: vector.store %[[S2]], %{{.*}}[%[[I2]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
274274
//
275-
// CHECK: %[[S3:.*]] = vector.shuffle %[[R4]], %[[R4]] [12, 13, 14, 15] : vector<16xf32>, vector<16xf32>
275+
// CHECK: %[[S3:.*]] = vector.shuffle %[[R5]], %[[R5]] [12, 13, 14, 15] : vector<16xf32>, vector<16xf32>
276276
// CHECK: %[[C3:.*]] = arith.constant 3 : index
277277
// CHECK: %[[I3:.*]] = arith.addi %[[C0]], %[[C3]] : index
278278
// CHECK: vector.store %[[S3]], %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf32>, vector<4xf32>
@@ -316,21 +316,21 @@ func.func @test_vector_store_load_4x4_f16(%buffer: memref<4x4xf16>) {
316316
// CHECK: %[[L3:.*]] = vector.load %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
317317
// CHECK: %[[T3:.*]] = vector.shuffle %[[L3]], %[[L3]] [0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<4xf16>, vector<4xf16>
318318
// CHECK: %[[R4:.*]] = vector.shuffle %[[R3]], %[[T3]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19] : vector<16xf16>, vector<16xf16>
319-
//
320-
// CHECK: %[[S0:.*]] = vector.shuffle %[[R4]], %[[R4]] [0, 1, 2, 3] : vector<16xf16>, vector<16xf16>
319+
// CHECK: %[[R5:.*]] = vector.shape_cast %[[R4]] : vector<16xf16> to vector<16xf16>
320+
// CHECK: %[[S0:.*]] = vector.shuffle %[[R5]], %[[R5]] [0, 1, 2, 3] : vector<16xf16>, vector<16xf16>
321321
// CHECK: vector.store %[[S0]], %{{.*}}[%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
322322
//
323-
// CHECK: %[[S1:.*]] = vector.shuffle %[[R4]], %[[R4]] [4, 5, 6, 7] : vector<16xf16>, vector<16xf16>
323+
// CHECK: %[[S1:.*]] = vector.shuffle %[[R5]], %[[R5]] [4, 5, 6, 7] : vector<16xf16>, vector<16xf16>
324324
// CHECK: %[[C1:.*]] = arith.constant 1 : index
325325
// CHECK: %[[I1:.*]] = arith.addi %[[C0]], %[[C1]] : index
326326
// CHECK: vector.store %[[S1]], %{{.*}}[%[[I1]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
327327
//
328-
// CHECK: %[[S2:.*]] = vector.shuffle %[[R4]], %[[R4]] [8, 9, 10, 11] : vector<16xf16>, vector<16xf16>
328+
// CHECK: %[[S2:.*]] = vector.shuffle %[[R5]], %[[R5]] [8, 9, 10, 11] : vector<16xf16>, vector<16xf16>
329329
// CHECK: %[[C2:.*]] = arith.constant 2 : index
330330
// CHECK: %[[I2:.*]] = arith.addi %[[C0]], %[[C2]] : index
331331
// CHECK: vector.store %[[S2]], %{{.*}}[%[[I2]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
332332
//
333-
// CHECK: %[[S3:.*]] = vector.shuffle %[[R4]], %[[R4]] [12, 13, 14, 15] : vector<16xf16>, vector<16xf16>
333+
// CHECK: %[[S3:.*]] = vector.shuffle %[[R5]], %[[R5]] [12, 13, 14, 15] : vector<16xf16>, vector<16xf16>
334334
// CHECK: %[[C3:.*]] = arith.constant 3 : index
335335
// CHECK: %[[I3:.*]] = arith.addi %[[C0]], %[[C3]] : index
336336
// CHECK: vector.store %[[S3]], %{{.*}}[%[[I3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>

test/Transforms/xegpu-optimize-transpose.mlir

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,17 +123,18 @@ func.func @test_scf_for_array_len(%arg0 : memref<64x64xf16>, %arg1 : vector<8x16
123123
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<64x64xf16>, %[[ARG1:[a-zA-Z0-9]+]]: vector<8x16xf16>, %[[ARG2:[a-zA-Z0-9]+]]: memref<64x64xf32>) {
124124
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
125125
// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
126+
// CHECK-DAG: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32>
126127
// CHECK: scf.for {{.*}} {
127128
// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}, %[[C0]]] : memref<64x64xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
128129
// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}, %[[C16]]] : memref<64x64xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
129-
// CHECK: %[[T3:.*]]:3 = scf.for {{.*}} iter_args(%[[ARG5:.*]] = %[[T2]], %[[ARG6:.*]] = %[[T0]], %[[ARG7:.*]] = %[[T1]]) -> (vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>) {
130-
// CHECK-DAG: %[[T5:.*]] = xegpu.load_nd %[[ARG6]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
131-
// CHECK-DAG: %[[T7:.*]] = xegpu.load_nd %[[ARG7]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
132-
// CHECK-DAG: %[[T9:.*]] = xegpu.update_nd_offset %[[ARG6]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
133-
// CHECK-DAG: %[[T10:.*]] = xegpu.update_nd_offset %[[ARG7]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
134-
// CHECK: scf.yield %{{.*}}, %[[T9]], %[[T10]] : vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
135-
// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG2]][{{.*}}] : memref<64x64xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
136-
// CHECK: xegpu.store_nd %[[T3]]#0, %{{.*}} : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
130+
// CHECK: %[[T2:.*]]:3 = scf.for {{.*}} iter_args(%[[ARG5:.*]] = %[[CST]], %[[ARG6:.*]] = %[[T0]], %[[ARG7:.*]] = %[[T1]]) -> (vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>) {
131+
// CHECK-DAG: %[[T4:.*]] = xegpu.load_nd %[[ARG6]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
132+
// CHECK-DAG: %[[T6:.*]] = xegpu.load_nd %[[ARG7]] <{transpose = array<i64: 1, 0>, transpose_bit_width = 32 : i32}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>> -> vector<8x16x2xf16>
133+
// CHECK-DAG: %[[T8:.*]] = xegpu.update_nd_offset %[[ARG6]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
134+
// CHECK-DAG: %[[T9:.*]] = xegpu.update_nd_offset %[[ARG7]], [{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
135+
// CHECK: scf.yield %{{.*}}, %[[T8]], %[[T9]] : vector<8x16xf32>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<memory_space = global, array_length = 1 : i64, boundary_check = true>>
136+
// CHECK: %[[T3:.*]] = xegpu.create_nd_tdesc %[[ARG2]][{{.*}}] : memref<64x64xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
137+
// CHECK: xegpu.store_nd %[[T2]]#0, %{{.*}} : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<array_length = 1 : i64>>
137138
func.func @test_nested_scf_for_array_len(%arg0: memref<64x64xf16>, %arg1: vector<8x16xf16>, %arg2: memref<64x64xf32>) {
138139
%c0 = arith.constant 0 : index
139140
%c8 = arith.constant 8 : index
@@ -536,7 +537,7 @@ module attributes {gpu.container_module} {
536537
gpu.launch_func @add_bf16_EC831D15_4614D61C_861::@add_bf16_EC831D15_4614D61C_861 blocks in (%c48, %c1, %c1) threads in (%c48, %c1, %c1) args(%arg0 : memref<2x16x384x384xbf16>, %arg1 : memref<2x1x384x384xbf16>, %arg2 : memref<2x16x384x384xbf16>)
537538
return
538539
}
539-
gpu.module @add_bf16_EC831D15_4614D61C_861 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Bfloat16ConversionINTEL, BFloat16TypeKHR, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR, VectorAnyINTEL, VectorComputeINTEL, RegionGroupINTEL], [SPV_EXT_shader_atomic_float_add, SPV_KHR_bfloat16, SPV_KHR_expect_assume, SPV_INTEL_bfloat16_conversion, SPV_INTEL_vector_compute, SPV_INTEL_region_group]>, api=OpenCL, #spirv.resource_limits<>>} {
540+
gpu.module @add_bf16_EC831D15_4614D61C_861 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Bfloat16ConversionINTEL, BFloat16TypeKHR, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR, VectorAnyINTEL, VectorComputeINTEL], [SPV_EXT_shader_atomic_float_add, SPV_KHR_bfloat16, SPV_KHR_expect_assume, SPV_INTEL_bfloat16_conversion, SPV_INTEL_vector_compute]>, api=OpenCL, #spirv.resource_limits<>>} {
540541
gpu.func @add_bf16_EC831D15_4614D61C_861(%arg0: memref<2x16x384x384xbf16>, %arg1: memref<2x1x384x384xbf16>, %arg2: memref<2x16x384x384xbf16>) kernel attributes {VectorComputeFunctionINTEL, known_block_size = array<i32: 48, 1, 1>, known_grid_size = array<i32: 48, 1, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
541542
%c8 = arith.constant 8 : index
542543
%c2 = arith.constant 2 : index

0 commit comments

Comments
 (0)