@@ -29,7 +29,7 @@ builtin.module attributes { transform.with_named_sequence } {
2929 }
3030}
3131
32- // CHECK: %[[IDX:.+]] = gpu.thread_id x
32+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
3333// CHECK: %[[YX:.+]]:3 = affine.delinearize_index %[[IDX]] into (4, 8)
3434// CHECK: %[[Y_SCALED:.+]] = affine.linearize_index disjoint [%[[YX]]#1, %c0] by (4, 4)
3535// CHECK: %[[RD00:.+]] = vector.transfer_read %arg0[%[[Y_SCALED]], %[[YX]]#2], {{.*}} : memref<32x32xf16>, vector<4x1xf16>
@@ -73,7 +73,7 @@ builtin.module attributes { transform.with_named_sequence } {
7373// CHECK-LABEL: @distribute_transfer_read_row_major_with_nontrivial_index
7474// CHECK-SAME: %[[I0:.+]]: index, %[[I1:.+]]: index
7575
76- // CHECK: %[[IDX:.+]] = gpu.thread_id x
76+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
7777// CHECK: %[[X:.+]]:2 = affine.delinearize_index %[[IDX]] into (8) : index, index
7878// CHECK: %[[OFF0:.+]] = affine.linearize_index [%[[X]]#1, %[[I0]]] by (8, 1)
7979// CHECK: vector.transfer_read %{{.*}}[%c0, %c0, %[[OFF0]], %[[I1]]]
@@ -161,7 +161,7 @@ builtin.module attributes { transform.with_named_sequence } {
161161// CHECK-LABEL: @distribute_transfer_read_row_major_transpose
162162// CHECK-SAME: %[[I0:.+]]: index, %[[I1:.+]]: index
163163
164- // CHECK: %[[IDX:.+]] = gpu.thread_id x
164+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
165165// CHECK: %[[X:.+]]:2 = affine.delinearize_index %[[IDX]] into (8) : index, index
166166// CHECK: %[[LIN_ID0:.+]] = affine.linearize_index [%[[X]]#1, %[[I1]]] by (8, 1)
167167// CHECK: vector.transfer_read %{{.*}}[%c0, %c0, %[[I0]], %[[LIN_ID0]]], {{.*}} permutation_map = #[[$PERM]]
@@ -278,7 +278,7 @@ builtin.module attributes { transform.with_named_sequence } {
278278 }
279279}
280280
281- // CHECK: %[[IDX:.+]] = gpu.thread_id x
281+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
282282// CHECK: %[[YX:.+]]:3 = affine.delinearize_index %[[IDX]] into (4, 16)
283283// CHECK: %[[LANEY:.+]] = affine.linearize_index disjoint [%[[YX]]#1, %c0] by (4, 4)
284284// CHECK: %[[RD:.+]] = vector.transfer_read %{{.*}}[%c0, %[[LANEY:.+]]], {{.*}} : memref<32x32xf16>, vector<4xf16>
@@ -314,7 +314,7 @@ builtin.module attributes { transform.with_named_sequence } {
314314 }
315315}
316316
317- // CHECK: %[[IDX:.+]] = gpu.thread_id x
317+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
318318// CHECK: %[[YX:.+]]:3 = affine.delinearize_index %[[IDX]] into (2, 64)
319319// CHECK: %[[SUBGROUP:.+]]:2 = affine.delinearize_index %[[IDX]] into (16)
320320// CHECK: %[[LANEY:.+]] = affine.linearize_index disjoint [%[[YX]]#1, %[[SUBGROUP]]#1, %c0] by (2, 16, 4)
@@ -387,7 +387,7 @@ builtin.module attributes { transform.with_named_sequence } {
387387 }
388388}
389389
390- // CHECK: %[[IDX:.+]] = gpu.thread_id x
390+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
391391// CHECK: %[[LANEX:.+]]:2 = affine.delinearize_index %[[IDX]] into (8)
392392// CHECK: %[[SLICE:.+]] = vector.extract %{{.*}}[0, 0, 0, 0] : vector<1x8xf16> from vector<2x2x1x1x1x8xf16>
393393// CHECK: vector.transfer_write %[[SLICE]], %{{.*}}[%[[LANEX]]#1, %c0] {in_bounds = [true, true]} : vector<1x8xf16>, memref<64x64xf16>
@@ -430,7 +430,7 @@ builtin.module attributes { transform.with_named_sequence } {
430430 }
431431}
432432
433- // CHECK: %[[IDX:.+]] = gpu.thread_id x
433+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
434434// CHECK: %[[YX:.+]]:3 = affine.delinearize_index %[[IDX]] into (4, 8)
435435// CHECK: %[[LANEY:.+]] = affine.linearize_index disjoint [%[[YX]]#1, %c0] by (4, 4)
436436// CHECK: vector.extract %{{.*}}[0, 0, 0, 0]
@@ -475,7 +475,7 @@ builtin.module attributes { transform.with_named_sequence } {
475475// CHECK-LABEL: @distribute_transfer_write_row_major_with_nontrivial_index
476476// CHECK-SAME: vector<16x16xf16>, %[[I0:.+]]: index, %[[I1:.+]]: index
477477
478- // CHECK: %[[IDX:.+]] = gpu.thread_id x
478+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
479479// CHECK: %[[LANE:.+]]:2 = affine.delinearize_index %[[IDX]] into (8)
480480// CHECK: %[[LIN_ID0:.+]] = affine.linearize_index [%[[LANE]]#1, %[[I1]]] by (8, 1)
481481// CHECK: vector.extract %{{.*}}[0, 0, 0, 0]
@@ -585,7 +585,7 @@ func.func @mfma_64x128x8_read(%mem: memref<128x8xf16>,
585585 %c0 = arith.constant 0 : index
586586 %cst = arith.constant 0.0 : f16
587587
588- // CHECK: %[[IDX:.+]] = gpu.thread_id x
588+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
589589 // CHECK-DAG: %[[WG:.+]]:4 = affine.delinearize_index %[[IDX]] into (4, 2, 64)
590590 // CHECK-DAG: %[[LANE:.+]]:3 = affine.delinearize_index %[[IDX]] into (2, 32)
591591 // This doesn't canonicalize away currently, but could be equivalent to %WG
@@ -675,7 +675,7 @@ builtin.module attributes { transform.with_named_sequence } {
675675
676676// CHECK-LABEL: @transposed_read_64x8
677677
678- // CHECK: %[[IDX:.+]] = gpu.thread_id x
678+ // CHECK: %[[IDX:.+]] = gpu.thread_id x
679679// CHECK-DAG: %[[WG:.+]]:4 = affine.delinearize_index %[[IDX]] into (2, 2, 64)
680680// CHECK-DAG: %[[LANE:.+]]:3 = affine.delinearize_index %[[IDX]] into (2, 32)
681681// CHECK-DAG: %[[M:.+]] = affine.linearize_index disjoint [%[[WG]]#1, %[[LANE]]#2] by (2, 32)
@@ -934,7 +934,7 @@ builtin.module attributes { transform.with_named_sequence } {
934934}
935935
936936// CHECK-LABEL: func @transpose_3d
937- // CHECK-DAG: %[[IDX:.+]] = gpu.thread_id x
937+ // CHECK-DAG: %[[IDX:.+]] = gpu.thread_id x
938938// CHECK-DAG: %[[WG:.+]]:3 = affine.delinearize_index %[[IDX]] into (2, 64)
939939// CHECK-DAG: %[[LANE:.+]]:4 = affine.delinearize_index %[[IDX]] into (4, 8, 2)
940940// CHECK-DAG: %[[DIM:.+]] = affine.linearize_index disjoint [%[[WG]]#1, %[[LANE]]#1, %c0] by (2, 4, 4)
@@ -1373,7 +1373,7 @@ builtin.module attributes { transform.with_named_sequence } {
13731373}
13741374
13751375// CHECK-LABEL: @distribute_map_store_row_major
1376- // CHECK-DAG: %[[IDX:.+]] = gpu.thread_id x
1376+ // CHECK-DAG: %[[IDX:.+]] = gpu.thread_id x
13771377// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
13781378// CHECK-DAG: %[[LANEX:.+]]:2 = affine.delinearize_index %[[IDX]] into (8)
13791379// CHECK-DAG: %[[SLICE0:.+]] = vector.extract %{{.*}}[0, 0, 0, 0]
@@ -1411,7 +1411,7 @@ builtin.module attributes { transform.with_named_sequence } {
14111411// CHECK-LABEL: @undistributed_write
14121412func.func @undistributed_write (%out: memref <f32 , #amdgpu.address_space <fat_raw_buffer >>, %v: vector <f32 >) {
14131413 // CHECK-DAG: %[[ZERO:.*]] = arith.constant 0 : index
1414- // CHECK-DAG: %[[TID:.*]] = gpu.thread_id x
1414+ // CHECK-DAG: %[[TID:.*]] = gpu.thread_id x
14151415 // CHECK-DAG: %[[COND:.+]] = arith.cmpi eq, %[[TID]], %[[ZERO]] : index
14161416 // CHECK-NEXT: scf.if %[[COND]] {
14171417 // CHECK: vector.transfer_write
@@ -1446,7 +1446,7 @@ builtin.module attributes { transform.with_named_sequence } {
14461446// across all threads (note the thread strides). This test checks if we account
14471447// for such broadcasts when generating conditional writes.
14481448// CHECK-LABEL: @partially_distributed_write
1449- // CHECK-DAG: %[[TID:.+]] = gpu.thread_id x
1449+ // CHECK-DAG: %[[TID:.+]] = gpu.thread_id x
14501450// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
14511451// CHECK: %[[DELIN:.*]]:5 = affine.delinearize_index %[[TID:.+]] into (4, 2, 4, 8)
14521452// CHECK-DAG: %[[SUBGROUP_COND:.+]] = arith.cmpi eq, %[[DELIN]]#0, %[[C0]] : index
0 commit comments