Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions mlir/docs/Dialects/Affine.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,7 @@ immediately enclosed by the latter),
3. a value that dominates the `AffineScope` op enclosing the value's
use,
4. the result of a constant operation,
5. the result of an
[`affine.apply` operation](#affineapply-mliraffineapplyop) that recursively takes as
arguments any valid symbolic identifiers, or
5. the result of a `Pure` operation that its operands are the valid symbolic identifiers.
6. the result of a
[`dim` operation](MemRef.md/#memrefdim-mlirmemrefdimop) on either a memref that
is an argument to a `AffineScope` op or a memref where the corresponding
Expand Down
15 changes: 11 additions & 4 deletions mlir/lib/Dialect/Affine/IR/AffineOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ bool mlir::affine::isValidSymbol(Value value) {
/// A value can be used as a symbol for `region` iff it meets one of the
/// following conditions:
/// *) It is a constant.
/// *) It is the result of an affine apply operation with symbol arguments.
/// *) It is a result of a `Pure` operation that its operands are the valid
/// *) symbolic identifiers.
/// *) It is a result of the dim op on a memref whose corresponding size is
/// a valid symbol.
/// *) It is defined at the top level of 'region' or is its argument.
Expand Down Expand Up @@ -443,9 +444,15 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) {
if (matchPattern(defOp, m_Constant(&operandCst)))
return true;

// Affine apply operation is ok if all of its operands are ok.
if (auto applyOp = dyn_cast<AffineApplyOp>(defOp))
return applyOp.isValidSymbol(region);
// `Pure` operation that its operands are the valid symbolic identifiers.
if (isPure(defOp)) {
bool operandVaildSymbol =
llvm::all_of(defOp->getOperands(), [&](Value operand) {
return affine::isValidSymbol(operand, region);
});
if (operandVaildSymbol)
return true;
}

// Dim op results could be valid symbols at any level.
if (auto dimOp = dyn_cast<ShapedDimOpInterface>(defOp))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,13 +638,13 @@ func.func @vecdim_reduction_complex_ub(%in: memref<256x512xf32>, %out: memref<25
return
}

// CHECK: #[[$map3:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]], [[d1]] * 2)>
// CHECK: #[[$map3_sub:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]] - [[d1]])>
// CHECK: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1 * 2)>
// CHECK: #[[$map3_sub:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK-LABEL: @vecdim_reduction_complex_ub
// CHECK: %[[vzero:.*]] = arith.constant dense<0.000000e+00> : vector<128xf32>
// CHECK: %{{.*}} = affine.for %[[iv:.*]] = 0 to min #[[$map3]](%[[M:.*]], %[[N:.*]]) step 128 iter_args(%[[red_iter:.*]] = {{.*}}) -> (vector<128xf32>) {
// CHECK: %[[ub:.*]] = affine.min #[[$map3]](%[[M]], %[[N]])
// CHECK: %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[ub]], %[[iv]])
// CHECK: %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[iv]])[%[[ub]]]
// CHECK: %[[mask:.*]] = vector.create_mask %[[elems_left]] : vector<128xi1>
// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
// CHECK: %[[select:.*]] = arith.select %[[mask]], %[[ld]], %[[vzero]] : vector<128xi1>, vector<128xf32>
Expand Down
44 changes: 0 additions & 44 deletions mlir/test/Dialect/Affine/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,6 @@ func.func @affine_apply_resul_non_index(%arg0 : index) {
return
}

// -----

#map = affine_map<(d0)[s0] -> (d0 + s0)>

func.func @affine_for_lower_bound_invalid_dim(%arg : index) {
affine.for %n0 = 0 to 7 {
%dim = arith.addi %arg, %arg : index

// expected-error@+1 {{operand cannot be used as a dimension id}}
affine.for %n1 = 0 to #map(%dim)[%arg] {
}
}
return
}

// -----

#map = affine_map<(d0)[s0] -> (d0 + s0)>

func.func @affine_for_upper_bound_invalid_dim(%arg : index) {
affine.for %n0 = 0 to 7 {
%dim = arith.addi %arg, %arg : index

// expected-error@+1 {{operand cannot be used as a dimension id}}
affine.for %n1 = #map(%dim)[%arg] to 7 {
}
}
return
}

// -----
func.func @affine_load_invalid_dim(%M : memref<10xi32>) {
"unknown"() ({
Expand Down Expand Up @@ -93,20 +63,6 @@ func.func @affine_for_upper_bound_invalid_sym() {

#set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)>

func.func @affine_if_invalid_dim(%arg : index) {
affine.for %n0 = 0 to 7 {
%dim = arith.addi %arg, %arg : index

// expected-error@+1 {{operand cannot be used as a dimension id}}
affine.if #set0(%dim)[%n0] {}
}
return
}

// -----

#set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)>

func.func @affine_if_invalid_sym() {
affine.for %i0 = 0 to 7 {
// expected-error@+1 {{operand cannot be used as a symbol}}
Expand Down
37 changes: 37 additions & 0 deletions mlir/test/Dialect/Affine/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,40 @@ module attributes {gpu.container_module} {
// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
// CHECK: }
// CHECK: gpu.return

// -----

#map = affine_map<()[s0] -> (s0 mod 32)>

// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)>

// CHECK-LABEL: @affine_thread_id

module {
gpu.module @gpu {
gpu.func @affine_thread_id(%arg0: memref<?x?xf32>) kernel {
%c3 = arith.constant 3 : index
%dim = memref.dim %arg0, %c3 : memref<?x?xf32>
%c0 = arith.constant 0 : index
affine.for %arg3 = %c0 to %dim step 32 {
%thread_id_x = gpu.thread_id x
%0 = affine.apply #map()[%thread_id_x]
%c128 = arith.constant 128 : index
affine.for %arg4 = %0 to %c128 step 8 {
%c32 = arith.constant 32 : index
}
}
gpu.return
}
}
}

// CHECK-SAME: (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
// CHECK: %[[VAL_5:.*]] = gpu.thread_id x
// CHECK: %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]]
// CHECK: %[[VAL_7:.*]] = arith.constant 128 : index
// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step 8 {
80 changes: 40 additions & 40 deletions mlir/test/Dialect/GPU/transform-gpu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)>
// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)>

// CHECK-LABEL: func.func @warpgroup_3d(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
// CHECK: gpu.launch
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
Expand Down Expand Up @@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)>
// CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)>

// CHECK-LABEL: func.func @warp_3d(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g
// CHECK: gpu.launch
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
Expand Down Expand Up @@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)>
// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)>
// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)>
// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)>

// CHECK-LABEL: func.func @warpgroup_linear(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index
// CHECK: scf.if %[[CMPLIN]]
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
Expand Down Expand Up @@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)>
// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)>

// CHECK-LABEL: func.func @warp_linear(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index
// CHECK: scf.if %[[CMPLIN]]
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
Expand Down Expand Up @@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)>
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)>

// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)>
// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)>
// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)>
// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)>
// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)>
// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)>

// CHECK-LABEL: func.func @map_multi_level_linear(
func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type {
Expand Down Expand Up @@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
memref.store %6, %y[%i, %j] : !type
} { mapping = [#gpu.thread<y>, #gpu.thread<x>]}

// CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index
// CHECK: scf.if %[[CMPLIN]]
scf.forall (%i, %j, %k) in (%c3, %c2, %c1) {
Expand All @@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
memref.store %8, %y[%i, %j] : !type
} {mapping = [#gpu.warp<linear_dim_0>, #gpu.warp<linear_dim_1>, #gpu.warp<linear_dim_2>] }

// CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index
// CHECK: scf.if %[[COND]]
// CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32>
Expand Down Expand Up @@ -545,9 +545,9 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 12 + d2 * 108)>
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) floordiv 7)>
// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 12 + s2 * 108)>
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) floordiv 7)>

// CHECK-LABEL: func.func @block_linear_existing_launch(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -566,9 +566,9 @@ func.func @block_linear_existing_launch(
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
// CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z
// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[BIDLIN]], %[[C63]] : index
// CHECK: scf.if %[[CMPLIN]]
// CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
Expand Down Expand Up @@ -600,8 +600,8 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0) -> (d0 mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> (d1 + d2 * 9 + d0 floordiv 7)>
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0] -> (s0 mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * 9 + s0 floordiv 7)>

// CHECK-LABEL: func.func @block_linear_generate_launch(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -620,8 +620,8 @@ func.func @block_linear_generate_launch(
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
// CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]])
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]]]
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
// CHECK: memref.load %[[ARGY]][%[[BLX]], %[[BLY]]]
scf.forall (%i, %j) in (%c7, %c9) {
Expand All @@ -647,8 +647,8 @@ module attributes {transform.with_named_sequence} {
#map = affine_map<(d0) -> (d0 * 128)>
#map1 = affine_map<(d0) -> (d0 * 32)>

// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
// CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)>
// CHECK-DAG: #[[$MAPB:.*]] = affine_map<()[s0] -> (s0 * 128)>
// CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>

// CHECK-LABEL: func.func @simple_fill(
func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
Expand All @@ -660,14 +660,14 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
// CHECK: gpu.launch
scf.forall (%arg1) in (1) {
// CHECK: %[[BIDX:.*]] = gpu.block_id x
// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]](%[[BIDX]])
// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]]()[%[[BIDX]]]
%0 = affine.apply #map(%arg1)
%subview = memref.subview %arg0[%0] [128] [1] : memref<128xf32> to memref<128xf32, strided<[1], offset: ?>>
scf.forall (%arg2) in (4) {
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
// CHECK: %[[TIDZ:.*]] = gpu.thread_id z
// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-NOT: scf.if
// CHECK: memref.subview %{{.*}}[%[[THX]]]
%1 = affine.apply #map1(%arg2)
Expand Down
Loading
Loading