Skip to content

Commit f2f146f

Browse files
committed
use llvm.column.major.store for shape info
1 parent 3779c5b commit f2f146f

File tree

1 file changed

+39
-59
lines changed
  • llvm/test/Transforms/LowerMatrixIntrinsics

1 file changed

+39
-59
lines changed

llvm/test/Transforms/LowerMatrixIntrinsics/unary.ll

Lines changed: 39 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,14 @@ define void @fneg_2x2(ptr %in, ptr %out) {
88
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
99
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[COL_LOAD]]
1010
; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x float> [[COL_LOAD1]]
11-
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16
11+
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 4
1212
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr float, ptr [[OUT]], i64 2
13-
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP2]], align 8
13+
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP2]], align 4
1414
; CHECK-NEXT: ret void
1515
;
1616
%inv = load <4 x float>, ptr %in
1717
%op = fneg <4 x float> %inv
18-
%opt = call <4 x float> @llvm.matrix.transpose(<4 x float> %op, i32 2, i32 2)
19-
%optt = call <4 x float> @llvm.matrix.transpose(<4 x float> %opt, i32 2, i32 2)
20-
store <4 x float> %optt, ptr %out
18+
call void @llvm.matrix.column.major.store(<4 x float> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
2119
ret void
2220
}
2321

@@ -28,16 +26,14 @@ define void @trunc_2x2(ptr %in, ptr %out) {
2826
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i64>, ptr [[VEC_GEP]], align 16
2927
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[COL_LOAD]] to <2 x i32>
3028
; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[COL_LOAD1]] to <2 x i32>
31-
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 16
29+
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
3230
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
33-
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 8
31+
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 4
3432
; CHECK-NEXT: ret void
3533
;
3634
%inv = load <4 x i64>, ptr %in
3735
%op = trunc <4 x i64> %inv to <4 x i32>
38-
%opt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %op, i32 2, i32 2)
39-
%optt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %opt, i32 2, i32 2)
40-
store <4 x i32> %optt, ptr %out
36+
call void @llvm.matrix.column.major.store(<4 x i32> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
4137
ret void
4238
}
4339

@@ -48,16 +44,14 @@ define void @zext_2x2(ptr %in, ptr %out) {
4844
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i16>, ptr [[VEC_GEP]], align 4
4945
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[COL_LOAD]] to <2 x i32>
5046
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[COL_LOAD1]] to <2 x i32>
51-
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 16
47+
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
5248
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
53-
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 8
49+
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 4
5450
; CHECK-NEXT: ret void
5551
;
5652
%inv = load <4 x i16>, ptr %in
5753
%op = zext <4 x i16> %inv to <4 x i32>
58-
%opt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %op, i32 2, i32 2)
59-
%optt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %opt, i32 2, i32 2)
60-
store <4 x i32> %optt, ptr %out
54+
call void @llvm.matrix.column.major.store(<4 x i32> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
6155
ret void
6256
}
6357

@@ -68,16 +62,14 @@ define void @sext_2x2(ptr %in, ptr %out) {
6862
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i8>, ptr [[VEC_GEP]], align 2
6963
; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[COL_LOAD]] to <2 x i16>
7064
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[COL_LOAD1]] to <2 x i16>
71-
; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[OUT:%.*]], align 8
65+
; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[OUT:%.*]], align 2
7266
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i16, ptr [[OUT]], i64 2
73-
; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[VEC_GEP2]], align 4
67+
; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[VEC_GEP2]], align 2
7468
; CHECK-NEXT: ret void
7569
;
7670
%inv = load <4 x i8>, ptr %in
7771
%op = sext <4 x i8> %inv to <4 x i16>
78-
%opt = call <4 x i16> @llvm.matrix.transpose(<4 x i16> %op, i32 2, i32 2)
79-
%optt = call <4 x i16> @llvm.matrix.transpose(<4 x i16> %opt, i32 2, i32 2)
80-
store <4 x i16> %optt, ptr %out
72+
call void @llvm.matrix.column.major.store(<4 x i16> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
8173
ret void
8274
}
8375

@@ -88,16 +80,14 @@ define void @fptoui_2x2(ptr %in, ptr %out) {
8880
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
8981
; CHECK-NEXT: [[TMP1:%.*]] = fptoui <2 x float> [[COL_LOAD]] to <2 x i32>
9082
; CHECK-NEXT: [[TMP2:%.*]] = fptoui <2 x float> [[COL_LOAD1]] to <2 x i32>
91-
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 16
83+
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
9284
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
93-
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 8
85+
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 4
9486
; CHECK-NEXT: ret void
9587
;
9688
%inv = load <4 x float>, ptr %in
9789
%op = fptoui <4 x float> %inv to <4 x i32>
98-
%opt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %op, i32 2, i32 2)
99-
%optt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %opt, i32 2, i32 2)
100-
store <4 x i32> %optt, ptr %out
90+
call void @llvm.matrix.column.major.store(<4 x i32> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
10191
ret void
10292
}
10393

@@ -108,16 +98,14 @@ define void @fptosi_2x2(ptr %in, ptr %out) {
10898
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
10999
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[COL_LOAD]] to <2 x i32>
110100
; CHECK-NEXT: [[TMP2:%.*]] = fptosi <2 x float> [[COL_LOAD1]] to <2 x i32>
111-
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 16
101+
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
112102
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
113-
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 8
103+
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[VEC_GEP2]], align 4
114104
; CHECK-NEXT: ret void
115105
;
116106
%inv = load <4 x float>, ptr %in
117107
%op = fptosi <4 x float> %inv to <4 x i32>
118-
%opt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %op, i32 2, i32 2)
119-
%optt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %opt, i32 2, i32 2)
120-
store <4 x i32> %optt, ptr %out
108+
call void @llvm.matrix.column.major.store(<4 x i32> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
121109
ret void
122110
}
123111

@@ -128,16 +116,14 @@ define void @uitofp_2x2(ptr %in, ptr %out) {
128116
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i64>, ptr [[VEC_GEP]], align 16
129117
; CHECK-NEXT: [[TMP1:%.*]] = uitofp <2 x i64> [[COL_LOAD]] to <2 x double>
130118
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[COL_LOAD1]] to <2 x double>
131-
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
119+
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 8
132120
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
133-
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
121+
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 8
134122
; CHECK-NEXT: ret void
135123
;
136124
%inv = load <4 x i64>, ptr %in
137125
%op = uitofp <4 x i64> %inv to <4 x double>
138-
%opt = call <4 x double> @llvm.matrix.transpose(<4 x double> %op, i32 2, i32 2)
139-
%optt = call <4 x double> @llvm.matrix.transpose(<4 x double> %opt, i32 2, i32 2)
140-
store <4 x double> %optt, ptr %out
126+
call void @llvm.matrix.column.major.store(<4 x double> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
141127
ret void
142128
}
143129

@@ -148,16 +134,14 @@ define void @sitofp_2x2(ptr %in, ptr %out) {
148134
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i64>, ptr [[VEC_GEP]], align 16
149135
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <2 x i64> [[COL_LOAD]] to <2 x double>
150136
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[COL_LOAD1]] to <2 x double>
151-
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
137+
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 8
152138
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
153-
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
139+
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 8
154140
; CHECK-NEXT: ret void
155141
;
156142
%inv = load <4 x i64>, ptr %in
157143
%op = sitofp <4 x i64> %inv to <4 x double>
158-
%opt = call <4 x double> @llvm.matrix.transpose(<4 x double> %op, i32 2, i32 2)
159-
%optt = call <4 x double> @llvm.matrix.transpose(<4 x double> %opt, i32 2, i32 2)
160-
store <4 x double> %optt, ptr %out
144+
call void @llvm.matrix.column.major.store(<4 x double> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
161145
ret void
162146
}
163147

@@ -168,16 +152,14 @@ define void @fptrunc_2x2(ptr %in, ptr %out) {
168152
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
169153
; CHECK-NEXT: [[TMP1:%.*]] = fptrunc nnan <2 x double> [[COL_LOAD]] to <2 x float>
170154
; CHECK-NEXT: [[TMP2:%.*]] = fptrunc nnan <2 x double> [[COL_LOAD1]] to <2 x float>
171-
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16
155+
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 4
172156
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr float, ptr [[OUT]], i64 2
173-
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP2]], align 8
157+
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP2]], align 4
174158
; CHECK-NEXT: ret void
175159
;
176160
%inv = load <4 x double>, ptr %in
177161
%op = fptrunc nnan <4 x double> %inv to <4 x float>
178-
%opt = call <4 x float> @llvm.matrix.transpose(<4 x float> %op, i32 2, i32 2)
179-
%optt = call <4 x float> @llvm.matrix.transpose(<4 x float> %opt, i32 2, i32 2)
180-
store <4 x float> %optt, ptr %out
162+
call void @llvm.matrix.column.major.store(<4 x float> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
181163
ret void
182164
}
183165

@@ -188,16 +170,14 @@ define void @fpext_2x2(ptr %in, ptr %out) {
188170
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
189171
; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x float> [[COL_LOAD]] to <2 x double>
190172
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[COL_LOAD1]] to <2 x double>
191-
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
173+
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 8
192174
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
193-
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
175+
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 8
194176
; CHECK-NEXT: ret void
195177
;
196178
%inv = load <4 x float>, ptr %in
197179
%op = fpext <4 x float> %inv to <4 x double>
198-
%opt = call <4 x double> @llvm.matrix.transpose(<4 x double> %op, i32 2, i32 2)
199-
%optt = call <4 x double> @llvm.matrix.transpose(<4 x double> %opt, i32 2, i32 2)
200-
store <4 x double> %optt, ptr %out
180+
call void @llvm.matrix.column.major.store(<4 x double> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
201181
ret void
202182
}
203183

@@ -208,30 +188,30 @@ define void @bitcast_2x2_v4f64_to_v4i64(ptr %in, ptr %out) {
208188
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
209189
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[COL_LOAD]] to <2 x i64>
210190
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[COL_LOAD1]] to <2 x i64>
211-
; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[OUT:%.*]], align 32
191+
; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[OUT:%.*]], align 4
212192
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i64, ptr [[OUT]], i64 2
213-
; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[VEC_GEP2]], align 16
193+
; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[VEC_GEP2]], align 4
214194
; CHECK-NEXT: ret void
215195
;
216196
%inv = load <4 x double>, ptr %in
217197
%op = bitcast <4 x double> %inv to <4 x i64>
218-
%opt = call <4 x i64> @llvm.matrix.transpose(<4 x i64> %op, i32 2, i32 2)
219-
%optt = call <4 x i64> @llvm.matrix.transpose(<4 x i64> %opt, i32 2, i32 2)
220-
store <4 x i64> %optt, ptr %out
198+
call void @llvm.matrix.column.major.store(<4 x i64> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
221199
ret void
222200
}
223201

224202
define void @bitcast_2x2_i256_to_v4i64(ptr %in, ptr %out) {
225203
; CHECK-LABEL: @bitcast_2x2_i256_to_v4i64(
226204
; CHECK-NEXT: [[INV:%.*]] = load i256, ptr [[IN:%.*]], align 4
227205
; CHECK-NEXT: [[OP:%.*]] = bitcast i256 [[INV]] to <4 x double>
228-
; CHECK-NEXT: store <4 x double> [[OP]], ptr [[OUT:%.*]], align 32
206+
; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
207+
; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
208+
; CHECK-NEXT: store <2 x double> [[SPLIT]], ptr [[OUT:%.*]], align 8
209+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[OUT]], i64 2
210+
; CHECK-NEXT: store <2 x double> [[SPLIT1]], ptr [[VEC_GEP]], align 8
229211
; CHECK-NEXT: ret void
230212
;
231213
%inv = load i256, ptr %in
232214
%op = bitcast i256 %inv to <4 x double>
233-
%opt = call <4 x double> @llvm.matrix.transpose(<4 x double> %op, i32 2, i32 2)
234-
%optt = call <4 x double> @llvm.matrix.transpose(<4 x double> %opt, i32 2, i32 2)
235-
store <4 x double> %optt, ptr %out
215+
call void @llvm.matrix.column.major.store(<4 x double> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
236216
ret void
237217
}

0 commit comments

Comments
 (0)