Skip to content

Commit 12c653b

Browse files
authored
Integrate LLVM at bfde1783 (#18635)
Cherry-picks: 1. Cherry-picking llvm/llvm-project#110518 Carrying two local reverts: 1. Revert llvm/llvm-project#100667) - As noted by @hanhanW on #18619, that PR "breaks the stablehlo build. We need to wait stablehlo bumping LLVM ahead of it and fix the issue. Then we can bump stablehlo and drop the local commit together." 2. Revert llvm/llvm-project#110170) - That is just the Bazel change accompanying 1. Signed-off-by: Benoit Jacob <[email protected]>
1 parent f5dc573 commit 12c653b

File tree

3 files changed

+16
-21
lines changed

3 files changed

+16
-21
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,15 @@ func.func @distribute_thread_forall_single_thread(%out : memref<?xi32>)
102102
}
103103

104104
// CHECK-LABEL: func @distribute_thread_forall_single_thread
105+
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
105106
// CHECK-DAG: %[[TX:.+]] = gpu.thread_id x
106107
// CHECK-DAG: %[[TY:.+]] = gpu.thread_id y
107108
// CHECK-DAG: %[[TZ:.+]] = gpu.thread_id z
108109
// CHECK: %[[LINID:.+]] = affine.apply
109110
// CHECK-SAME: affine_map<()[s0, s1, s2] -> (s0 + s1 * 64 + s2 * 128)>
110111
// CHECK-SAME: [%[[TX]], %[[TY]], %[[TZ]]]
111112
// CHECK: scf.for %[[I:.+]] = %[[LINID]] to %c1 step %c128 {
112-
// CHECK: %[[DELIN:.+]] = affine.delinearize_index %[[I]] into (%c1) : index
113-
// CHECK: memref.store {{.*}}[%[[DELIN]]]
113+
// CHECK: memref.store {{.*}}[%[[C0]]]
114114

115115
// -----
116116

compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_mma_to_lanes.mlir

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -393,22 +393,20 @@ func.func @data_tiled_1x1x1_tensor_multi_mma(%lhs: tensor<1x1x4x16xf32>, %rhs: t
393393
// CHECK-SAME: %[[LHS:[A-Za-z0-9]+]]
394394
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]
395395
// CHECK-SAME: %[[ACC:[A-Za-z0-9]+]]
396-
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
397396
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
398397
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
399398
// CHECK: scf.forall (%[[THREAD_ID:.+]]) in (64) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x4x16x4xf32>)
400399
// CHECK: %[[ID_CLAMPED:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]])
401400
// CHECK-DAG: %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]])
402401
// CHECK-DAG: %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]][0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1] [1, 1, 1, 1] [1, 1, 1, 1]
403402
// CHECK-DAG: %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]][0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1] [1, 1, 1, 1] [1, 1, 1, 1]
404-
// CHECK-DAG: %[[ACC_IDS:.+]]:3 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]], %[[C1]])
405403
// CHECK-DAG: %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]]
406-
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2] [1, 1, 1, 1, 4] [1, 1, 1, 1, 1]
404+
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 1, 1, 4] [1, 1, 1, 1, 1]
407405
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS_SLICE]], %[[RHS_SLICE]], %[[ACC_SLICE]]
408406
// CHECK-SAME: kind = #iree_gpu.data_tiled_mma_layout<intrinsic = MFMA_F32_16x16x4_F32>
409407
// CHECK-SAME: : tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32> into tensor<1x1x1x1x4xf32>
410408
// CHECK: tensor.parallel_insert_slice %[[MMA]] into %[[ACC_ARG]]
411-
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2] [1, 1, 1, 1, 4] [1, 1, 1, 1, 1]
409+
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 1, 1, 4] [1, 1, 1, 1, 1]
412410
// CHECK: mapping = [#gpu.thread<linear_dim_0>]
413411

414412
// -----
@@ -434,24 +432,22 @@ func.func @data_tiled_2x2x4_tensor_multi_mma_unrolled(%lhs: tensor<1x1x2x4x16x4x
434432
// CHECK-SAME: %[[LHS:[A-Za-z0-9]+]]
435433
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]
436434
// CHECK-SAME: %[[ACC:[A-Za-z0-9]+]]
437-
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
438435
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
439436
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
440437
// CHECK: scf.forall (%[[THREAD_ID:.+]]) in (64) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x2x2x4x16x4xf32>)
441438
// CHECK: %[[ID_CLAMPED:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]])
442-
// CHECK-DAG: %[[IN_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C1]], %[[C4]], %[[C16]], %[[C1]])
439+
// CHECK-DAG: %[[IN_IDS:.+]]:2 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C4]], %[[C16]])
443440
// CHECK-DAG: %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]]
444-
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, %[[IN_IDS]]#3] [1, 1, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1]
441+
// CHECK-SAME: [0, 0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1]
445442
// CHECK-DAG: %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]]
446-
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, %[[IN_IDS]]#3] [1, 1, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1]
447-
// CHECK-DAG: %[[ACC_IDS:.+]]:5 = affine.delinearize_index %[[ID_CLAMPED]] into (%[[C1]], %[[C1]], %[[C4]], %[[C16]], %[[C1]])
443+
// CHECK-SAME: [0, 0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1]
448444
// CHECK-DAG: %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]]
449-
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, %[[ACC_IDS]]#4] [1, 1, 2, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
445+
// CHECK-SAME: [0, 0, 0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 2, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
450446
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS_SLICE]], %[[RHS_SLICE]], %[[ACC_SLICE]]
451447
// CHECK-SAME: kind = #iree_gpu.data_tiled_mma_layout<intrinsic = MFMA_F32_16x16x4_F32, unroll_m = 2, unroll_n = 2, unroll_k = 4>
452448
// CHECK-SAME: : tensor<1x1x2x1x1x4xf32>, tensor<1x1x2x1x1x4xf32> into tensor<1x1x2x2x1x1x4xf32>
453449
// CHECK: tensor.parallel_insert_slice %[[MMA]] into %[[ACC_ARG]]
454-
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, %[[ACC_IDS]]#4] [1, 1, 2, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
450+
// CHECK-SAME: [0, 0, 0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, 0] [1, 1, 2, 2, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
455451
// CHECK: mapping = [#gpu.thread<linear_dim_0>]
456452

457453
// -----
@@ -478,25 +474,24 @@ func.func @data_tiled_2x2x4_tensor_multi_mma_unrolled_to_subgroups(%lhs: tensor<
478474
// CHECK-SAME: %[[LHS:[A-Za-z0-9]+]]
479475
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]
480476
// CHECK-SAME: %[[ACC:[A-Za-z0-9]+]]
481-
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
482477
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
483478
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
484479
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
485480
// CHECK: scf.forall (%[[THREAD_ID:.+]]) in (256) shared_outs(%[[ACC_ARG:.+]] = %[[ACC]]) -> (tensor<1x1x2x2x4x16x4xf32>)
486481
// CHECK: %[[ID_CLAMPED_128:.+]] = affine.apply #[[$MAP]](%[[THREAD_ID]])
487-
// CHECK-DAG: %[[IN_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED_128]] into (%[[C2]], %[[C4]], %[[C16]], %[[C1]])
482+
// CHECK-DAG: %[[IN_IDS:.+]]:3 = affine.delinearize_index %[[ID_CLAMPED_128]] into (%[[C2]], %[[C4]], %[[C16]])
488483
// CHECK-DAG: %[[LHS_SLICE:.+]] = tensor.extract_slice %[[LHS]]
489-
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, %[[IN_IDS]]#3] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1]
484+
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, 0] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1]
490485
// CHECK-DAG: %[[RHS_SLICE:.+]] = tensor.extract_slice %[[RHS]]
491-
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, %[[IN_IDS]]#3] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1]
486+
// CHECK-SAME: [0, 0, %[[IN_IDS]]#0, %[[IN_IDS]]#1, %[[IN_IDS]]#2, 0] [1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1]
492487
// CHECK: %[[ID_CLAMPED_256:.+]] = affine.apply #[[$MAP1]](%[[THREAD_ID]])
493-
// CHECK-DAG: %[[ACC_IDS:.+]]:5 = affine.delinearize_index %[[ID_CLAMPED_256]] into (%[[C2]], %[[C2]], %[[C4]], %[[C16]], %[[C1]])
488+
// CHECK-DAG: %[[ACC_IDS:.+]]:4 = affine.delinearize_index %[[ID_CLAMPED_256]] into (%[[C2]], %[[C2]], %[[C4]], %[[C16]])
494489
// CHECK-DAG: %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC_ARG]]
495-
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, %[[ACC_IDS]]#4] [1, 1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
490+
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, 0] [1, 1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
496491
// CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS_SLICE]], %[[RHS_SLICE]], %[[ACC_SLICE]]
497492
// CHECK-SAME: kind = #iree_gpu.data_tiled_mma_layout<intrinsic = MFMA_F32_16x16x4_F32,
498493
// CHECK-SAME: unroll_m_to_subgroups = 2, unroll_n_to_subgroups = 2, unroll_k = 4>}
499494
// CHECK-SAME: : tensor<1x1x1x1x1x4xf32>, tensor<1x1x1x1x1x4xf32> into tensor<1x1x1x1x1x1x4xf32>
500495
// CHECK: tensor.parallel_insert_slice %[[MMA]] into %[[ACC_ARG]]
501-
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, %[[ACC_IDS]]#4] [1, 1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
496+
// CHECK-SAME: [0, 0, %[[ACC_IDS]]#0, %[[ACC_IDS]]#1, %[[ACC_IDS]]#2, %[[ACC_IDS]]#3, 0] [1, 1, 1, 1, 1, 1, 4] [1, 1, 1, 1, 1, 1, 1]
502497
// CHECK: mapping = [#gpu.thread<linear_dim_0>]

third_party/llvm-project

Submodule llvm-project updated 481 files

0 commit comments

Comments
 (0)