@@ -47,6 +47,66 @@ func.func @pad_alloc_expand_shape(%a: memref<1024x1024xf32>) {
4747 return
4848}
4949
50+ // -----
51+ // CHECK-LABEL: func.func @no_pad_alloc_collapse_shape
52+ // CHECK: %[[A:.*]] = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
53+ // CHECK: %[[C:.*]] = memref.collapse_shape %[[A]] {{\[}}[0], [1, 2], [3, 4]]
54+ // CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into
55+ // CHECK-SAME: memref<4x32x64xf32, #gpu.address_space<workgroup>>
56+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
57+ // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
58+ // CHECK: %[[VEC_READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST_0]] {in_bounds = [true]} :
59+ // CHECK-SAME: memref<1024x1024xf32>, vector<4xf32>
60+ // CHECK: vector.transfer_write %[[VEC_READ]], %[[C]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} :
61+ // CHECK-SAME: vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
62+
63+
64+ func.func @no_pad_alloc_collapse_shape (%a: memref <1024 x1024 xf32 >) {
65+ %0 = memref.alloc () : memref <4 x2 x16 x8 x8 xf32 , #gpu.address_space <workgroup >>
66+ %1 = memref.collapse_shape %0 [[0 ], [1 , 2 ], [3 , 4 ]]
67+ : memref <4 x2 x16 x8 x8 xf32 , #gpu.address_space <workgroup >> into memref <4 x32 x64 xf32 , #gpu.address_space <workgroup >>
68+ %c0 = arith.constant 0 : index
69+ %cst_0 = arith.constant 0.000000e+00 : f32
70+ %3 = vector.transfer_read %a [%c0 , %c0 ], %cst_0 {in_bounds = [true ]} :
71+ memref <1024 x1024 xf32 >, vector <4 xf32 >
72+ vector.transfer_write %3 , %1 [%c0 , %c0 , %c0 ] {in_bounds = [true ]} :
73+ vector <4 xf32 >, memref <4 x32 x64 xf32 , #gpu.address_space <workgroup >>
74+ return
75+ }
76+
77+ // -----
78+
79+ // CHECK-LABEL: func.func @no_pad_alloc_collapse_shape_throughsubview
80+ // CHECK: %[[A:.*]] = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
81+ // CHECK: %[[S:.*]] = memref.subview %[[A]][0, 0, 0, 0, 0] [4, 2, 16, 8, 8] [1, 1, 1, 1, 1] :
82+ // CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> to
83+ // CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
84+ // CHECK: %[[C:.*]] = memref.collapse_shape %[[S]] {{\[}}[0], [1, 2], [3, 4]]
85+ // CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into
86+ // CHECK-SAME: memref<4x32x64xf32, #gpu.address_space<workgroup>>
87+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
88+ // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
89+ // CHECK: %[[VEC_READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true]} :
90+ // CHECK-SAME: memref<1024x1024xf32>, vector<4xf32>
91+ // CHECK: vector.transfer_write %[[VEC_READ]], %[[C]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} :
92+ // CHECK-SAME: vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
93+
94+
95+ func.func @no_pad_alloc_collapse_shape_throughsubview (%a: memref <1024 x1024 xf32 >) {
96+ %0 = memref.alloc () : memref <4 x2 x16 x8 x8 xf32 , #gpu.address_space <workgroup >>
97+ %subview = memref.subview %0 [0 , 0 , 0 , 0 , 0 ] [4 , 2 , 16 , 8 , 8 ] [1 , 1 , 1 , 1 , 1 ]
98+ : memref <4 x2 x16 x8 x8 xf32 , #gpu.address_space <workgroup >> to memref <4 x2 x16 x8 x8 xf32 , #gpu.address_space <workgroup >>
99+ %1 = memref.collapse_shape %subview [[0 ], [1 , 2 ], [3 , 4 ]]
100+ : memref <4 x2 x16 x8 x8 xf32 , #gpu.address_space <workgroup >> into memref <4 x32 x64 xf32 , #gpu.address_space <workgroup >>
101+ %c0 = arith.constant 0 : index
102+ %cst_0 = arith.constant 0.000000e+00 : f32
103+ %3 = vector.transfer_read %a [%c0 , %c0 ], %cst_0 {in_bounds = [true ]} :
104+ memref <1024 x1024 xf32 >, vector <4 xf32 >
105+ vector.transfer_write %3 , %1 [%c0 , %c0 , %c0 ] {in_bounds = [true ]} :
106+ vector <4 xf32 >, memref <4 x32 x64 xf32 , #gpu.address_space <workgroup >>
107+ return
108+ }
109+
50110// -----
51111
52112// CHECK-LABEL: func.func @pad_alloc_negative
0 commit comments