@@ -8,8 +8,14 @@ func.func @memref_copy(%source: memref<2x2xf32>, %dest: memref<2x2xf32>) {
88// CHECK-LABEL: func.func @memref_copy
99// CHECK-SAME: %[[SOURCE:[A-Za-z0-9]+]]: memref<2x2xf32>
1010// CHECK-SAME: %[[DEST:[A-Za-z0-9]+]]: memref<2x2xf32>
11- // CHECK: %[[RD:.+]] = vector.transfer_read %[[SOURCE]]
12- // CHECK: vector.transfer_write %[[RD]], %[[DEST]]
11+ // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
12+ // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
13+ // CHECK: scf.for %[[ARG2:.+]] = %[[C0]] to %[[C2]] step %[[C2]]
14+ // CHECK: scf.for %[[ARG3:.+]] = %[[C0]] to %[[C2]] step %[[C2]]
15+ // CHECK: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[ARG2]], %[[ARG3]]] [2, 2] [1, 1]
16+ // CHECK: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[ARG2]], %[[ARG3]]] [2, 2] [1, 1]
17+ // CHECK: %[[RD:.+]] = vector.transfer_read %[[SOURCE_SUBVIEW]]
18+ // CHECK: vector.transfer_write %[[RD]], %[[DEST_SUBVIEW]]
1319
1420// -----
1521
@@ -21,5 +27,95 @@ func.func @linalg_copy(%source: memref<2x2xf32>, %dest: memref<2x2xf32>) {
2127// CHECK-LABEL: func.func @linalg_copy
2228// CHECK-SAME: %[[SOURCE:[A-Za-z0-9]+]]: memref<2x2xf32>
2329// CHECK-SAME: %[[DEST:[A-Za-z0-9]+]]: memref<2x2xf32>
24- // CHECK: %[[RD:.+]] = vector.transfer_read %[[SOURCE]]
25- // CHECK: vector.transfer_write %[[RD]], %[[DEST]]
30+ // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
31+ // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
32+ // CHECK: scf.for %[[ARG2:.+]] = %[[C0]] to %[[C2]] step %[[C2]]
33+ // CHECK: scf.for %[[ARG3:.+]] = %[[C0]] to %[[C2]] step %[[C2]]
34+ // CHECK: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[ARG2]], %[[ARG3]]] [2, 2] [1, 1]
35+ // CHECK: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[ARG2]], %[[ARG3]]] [2, 2] [1, 1]
36+ // CHECK: %[[RD:.+]] = vector.transfer_read %[[SOURCE_SUBVIEW]]
37+ // CHECK: vector.transfer_write %[[RD]], %[[DEST_SUBVIEW]]
38+
39+ // -----
40+
41+ // Test with the last dimension larger than and not a multiple of the preferred number of copy elements.
42+
43+ func.func @memref_copy_not_multiple_of_preferred (%source: memref <2 x6 xf32 >, %dest: memref <2 x6 xf32 >) {
44+ memref.copy %source , %dest : memref <2 x6 xf32 > to memref <2 x6 xf32 >
45+ return
46+ }
47+ // CHECK-LABEL: func.func @memref_copy_not_multiple_of_preferred
48+ // CHECK-SAME: %[[SOURCE:[A-Za-z0-9]+]]: memref<2x6xf32>
49+ // CHECK-SAME: %[[DEST:[A-Za-z0-9]+]]: memref<2x6xf32>
50+ // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
51+ // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
52+ // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
53+ // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
54+ // CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
55+ // CHECK: scf.for %[[ARG2:.+]] = %[[C0]] to %[[C2]] step %[[C1]]
56+ // CHECK: scf.for %[[ARG3:.+]] = %[[C0]] to %[[C6]] step %[[C4]]
57+ // CHECK: %[[MIN:.+]] = affine.min affine_map<(d0) -> (-d0 + 6, 4)>(%[[ARG3]])
58+ // CHECK: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[ARG2]], %[[ARG3]]] [1, %[[MIN]]] [1, 1]
59+ // CHECK: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[ARG2]], %[[ARG3]]] [1, %[[MIN]]] [1, 1]
60+ // CHECK: memref.copy %[[SOURCE_SUBVIEW]], %[[DEST_SUBVIEW]]
61+
62+ // -----
63+
64+ // Test with the penultimate dimension larger than and not a multiple of the preferred number of copy elements on that dimension.
65+
66+ func.func @memref_copy_not_multiple_on_penultimate_dim (%source: memref <3 x2 xf32 >, %dest: memref <3 x2 xf32 >) {
67+ memref.copy %source , %dest : memref <3 x2 xf32 > to memref <3 x2 xf32 >
68+ return
69+ }
70+ // CHECK-LABEL: func.func @memref_copy_not_multiple_on_penultimate_dim
71+ // CHECK-SAME: %[[SOURCE:[A-Za-z0-9]+]]: memref<3x2xf32>
72+ // CHECK-SAME: %[[DEST:[A-Za-z0-9]+]]: memref<3x2xf32>
73+ // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
74+ // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
75+ // CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
76+ // CHECK: scf.for %[[ARG2:.+]] = %[[C0]] to %[[C3]] step %[[C2]]
77+ // CHECK: scf.for %[[ARG3:.+]] = %[[C0]] to %[[C2]] step %[[C2]]
78+ // CHECK: %[[MIN:.+]] = affine.min affine_map<(d0) -> (-d0 + 3, 2)>(%[[ARG2]])
79+ // CHECK: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[ARG2]], %[[ARG3]]] [%[[MIN]], 2] [1, 1]
80+ // CHECK: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[ARG2]], %[[ARG3]]] [%[[MIN]], 2] [1, 1]
81+ // CHECK: memref.copy %[[SOURCE_SUBVIEW]], %[[DEST_SUBVIEW]]
82+
83+ // -----
84+
85+ func.func @memref_copy_dynamic (%source: memref <?x4 xf32 >, %dest: memref <?x4 xf32 >) {
86+ memref.copy %source , %dest : memref <?x4 xf32 > to memref <?x4 xf32 >
87+ return
88+ }
89+ // CHECK-LABEL: func.func @memref_copy_dynamic
90+ // CHECK-SAME: %[[SOURCE:[A-Za-z0-9]+]]: memref<?x4xf32>
91+ // CHECK-SAME: %[[DEST:[A-Za-z0-9]+]]: memref<?x4xf32>
92+ // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
93+ // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
94+ // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
95+ // CHECK-DAG: %[[DIM:.+]] = memref.dim %[[SOURCE]], %[[C0]] : memref<?x4xf32>
96+ // CHECK: scf.for %[[ARG2:.+]] = %[[C0]] to %[[DIM]] step %[[C1]]
97+ // CHECK: scf.for %[[ARG3:.+]] = %[[C0]] to %[[C4]] step %[[C4]]
98+ // CHECK: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[ARG2]], %[[ARG3]]] [1, 4] [1, 1]
99+ // CHECK: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[ARG2]], %[[ARG3]]] [1, 4] [1, 1]
100+ // CHECK: %[[RD:.+]] = vector.transfer_read %[[SOURCE_SUBVIEW]]
101+ // CHECK: vector.transfer_write %[[RD]], %[[DEST_SUBVIEW]]
102+
103+ // -----
104+
105+ func.func @memref_copy_dynamic_inner_dim (%source: memref <4 x?xf32 >, %dest: memref <4 x?xf32 >) {
106+ memref.copy %source , %dest : memref <4 x?xf32 > to memref <4 x?xf32 >
107+ return
108+ }
109+ // CHECK-LABEL: func.func @memref_copy_dynamic_inner_dim
110+ // CHECK-SAME: %[[SOURCE:[A-Za-z0-9]+]]: memref<4x?xf32>
111+ // CHECK-SAME: %[[DEST:[A-Za-z0-9]+]]: memref<4x?xf32>
112+ // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
113+ // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
114+ // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
115+ // CHECK-DAG: %[[DIM:.+]] = memref.dim %[[SOURCE]], %[[C1]] : memref<4x?xf32>
116+ // CHECK: scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]]
117+ // CHECK: scf.for %[[ARG3:.+]] = %[[C0]] to %[[DIM]] step %[[C4]]
118+ // CHECK: %[[MIN:.+]] = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 4)>(%[[ARG3]])[%[[DIM]]]
119+ // CHECK: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[ARG2]], %[[ARG3]]] [1, %[[MIN]]] [1, 1]
120+ // CHECK: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[ARG2]], %[[ARG3]]] [1, %[[MIN]]] [1, 1]
121+ // CHECK: memref.copy %[[SOURCE_SUBVIEW]], %[[DEST_SUBVIEW]]
0 commit comments