Skip to content

Commit b1ceac4

Browse files
committed
disable extload/truncstore for v2i8/v2i16
1 parent 5e0991c commit b1ceac4

File tree

2 files changed

+33
-75
lines changed

2 files changed

+33
-75
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
731731
setTruncStoreAction(VT, MVT::i1, Expand);
732732
}
733733

734+
// Disable generations of extload/truncstore for v2i16/v2i8. The generic
735+
// expansion for these nodes when they are unaligned is incorrect if the
736+
// type is a vector.
737+
//
738+
// TODO: Fix the generic expansion for these nodes found in
739+
// TargetLowering::expandUnalignedLoad/Store.
740+
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
741+
MVT::v2i8, Expand);
742+
setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
743+
734744
// Register custom handling for illegal type loads/stores. We'll try to custom
735745
// lower almost all illegal types and logic in the lowering will discard cases
736746
// we can't handle.

llvm/test/CodeGen/NVPTX/i8x2-instructions.ll

Lines changed: 23 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -137,35 +137,27 @@ define void @test_store_i8x2_unaligned(ptr %ptr, <2 x i8> %a) {
137137
; O0-LABEL: test_store_i8x2_unaligned(
138138
; O0: {
139139
; O0-NEXT: .reg .b16 %rs<3>;
140-
; O0-NEXT: .reg .b32 %r<3>;
140+
; O0-NEXT: .reg .b32 %r<2>;
141141
; O0-NEXT: .reg .b64 %rd<2>;
142142
; O0-EMPTY:
143143
; O0-NEXT: // %bb.0:
144144
; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0];
145145
; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1];
146146
; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
147-
; O0-NEXT: st.b8 [%rd1], %r1;
148-
; O0-NEXT: shr.u32 %r2, %r1, 16;
149-
; O0-NEXT: st.b8 [%rd1+2], %r2;
150-
; O0-NEXT: st.b8 [%rd1+1], 0;
151-
; O0-NEXT: st.b8 [%rd1+3], 0;
147+
; O0-NEXT: st.b8 [%rd1+1], %rs2;
148+
; O0-NEXT: st.b8 [%rd1], %rs1;
152149
; O0-NEXT: ret;
153150
;
154151
; O3-LABEL: test_store_i8x2_unaligned(
155152
; O3: {
156153
; O3-NEXT: .reg .b16 %rs<3>;
157-
; O3-NEXT: .reg .b32 %r<3>;
158154
; O3-NEXT: .reg .b64 %rd<2>;
159155
; O3-EMPTY:
160156
; O3-NEXT: // %bb.0:
161157
; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0];
162158
; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1];
163-
; O3-NEXT: mov.b32 %r1, {%rs1, %rs2};
164-
; O3-NEXT: st.b8 [%rd1], %r1;
165-
; O3-NEXT: shr.u32 %r2, %r1, 16;
166-
; O3-NEXT: st.b8 [%rd1+2], %r2;
167-
; O3-NEXT: st.b8 [%rd1+3], 0;
168-
; O3-NEXT: st.b8 [%rd1+1], 0;
159+
; O3-NEXT: st.b8 [%rd1+1], %rs2;
160+
; O3-NEXT: st.b8 [%rd1], %rs1;
169161
; O3-NEXT: ret;
170162
store <2 x i8> %a, ptr %ptr, align 1
171163
ret void
@@ -178,9 +170,7 @@ define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
178170
; O0-EMPTY:
179171
; O0-NEXT: // %bb.0:
180172
; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0];
181-
; O0-NEXT: st.b8 [%rd1+3], 0;
182-
; O0-NEXT: st.b8 [%rd1+2], 2;
183-
; O0-NEXT: st.b8 [%rd1+1], 0;
173+
; O0-NEXT: st.b8 [%rd1+1], 2;
184174
; O0-NEXT: st.b8 [%rd1], 1;
185175
; O0-NEXT: ret;
186176
;
@@ -190,9 +180,7 @@ define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
190180
; O3-EMPTY:
191181
; O3-NEXT: // %bb.0:
192182
; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0];
193-
; O3-NEXT: st.b8 [%rd1+3], 0;
194-
; O3-NEXT: st.b8 [%rd1+2], 2;
195-
; O3-NEXT: st.b8 [%rd1+1], 0;
183+
; O3-NEXT: st.b8 [%rd1+1], 2;
196184
; O3-NEXT: st.b8 [%rd1], 1;
197185
; O3-NEXT: ret;
198186
store <2 x i8> <i8 1, i8 2>, ptr %ptr, align 1
@@ -202,46 +190,26 @@ define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
202190
define i32 @test_zext_load_i8x2_unaligned(ptr %ptr) {
203191
; O0-LABEL: test_zext_load_i8x2_unaligned(
204192
; O0: {
205-
; O0-NEXT: .local .align 2 .b8 __local_depot6[2];
206-
; O0-NEXT: .reg .b64 %SP;
207-
; O0-NEXT: .reg .b64 %SPL;
208-
; O0-NEXT: .reg .b16 %rs<5>;
209-
; O0-NEXT: .reg .b32 %r<2>;
193+
; O0-NEXT: .reg .b16 %rs<3>;
210194
; O0-NEXT: .reg .b64 %rd<2>;
211195
; O0-EMPTY:
212196
; O0-NEXT: // %bb.0:
213-
; O0-NEXT: mov.b64 %SPL, __local_depot6;
214-
; O0-NEXT: cvta.local.u64 %SP, %SPL;
215197
; O0-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0];
216-
; O0-NEXT: ld.b8 %rs1, [%rd1];
217-
; O0-NEXT: ld.b8 %rs2, [%rd1+1];
218-
; O0-NEXT: shl.b16 %rs3, %rs2, 8;
219-
; O0-NEXT: or.b16 %rs4, %rs3, %rs1;
220-
; O0-NEXT: st.b16 [%SP], %rs4;
221-
; O0-NEXT: ld.b16 %r1, [%SP];
222-
; O0-NEXT: st.param.b32 [func_retval0], %r1;
198+
; O0-NEXT: ld.b8 %rs1, [%rd1+1];
199+
; O0-NEXT: ld.b8 %rs2, [%rd1];
200+
; O0-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
223201
; O0-NEXT: ret;
224202
;
225203
; O3-LABEL: test_zext_load_i8x2_unaligned(
226204
; O3: {
227-
; O3-NEXT: .local .align 2 .b8 __local_depot6[2];
228-
; O3-NEXT: .reg .b64 %SP;
229-
; O3-NEXT: .reg .b64 %SPL;
230-
; O3-NEXT: .reg .b16 %rs<5>;
231-
; O3-NEXT: .reg .b32 %r<2>;
205+
; O3-NEXT: .reg .b16 %rs<3>;
232206
; O3-NEXT: .reg .b64 %rd<2>;
233207
; O3-EMPTY:
234208
; O3-NEXT: // %bb.0:
235-
; O3-NEXT: mov.b64 %SPL, __local_depot6;
236-
; O3-NEXT: cvta.local.u64 %SP, %SPL;
237209
; O3-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0];
238-
; O3-NEXT: ld.b8 %rs1, [%rd1];
239-
; O3-NEXT: ld.b8 %rs2, [%rd1+1];
240-
; O3-NEXT: shl.b16 %rs3, %rs2, 8;
241-
; O3-NEXT: or.b16 %rs4, %rs3, %rs1;
242-
; O3-NEXT: st.b16 [%SP], %rs4;
243-
; O3-NEXT: ld.b16 %r1, [%SP];
244-
; O3-NEXT: st.param.b32 [func_retval0], %r1;
210+
; O3-NEXT: ld.b8 %rs1, [%rd1+1];
211+
; O3-NEXT: ld.b8 %rs2, [%rd1];
212+
; O3-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
245213
; O3-NEXT: ret;
246214
%a = load <2 x i8>, ptr %ptr, align 1
247215
%b = zext <2 x i8> %a to <2 x i16>
@@ -252,46 +220,26 @@ define i32 @test_zext_load_i8x2_unaligned(ptr %ptr) {
252220
define i32 @test_sext_load_i8x2_unaligned(ptr %ptr) {
253221
; O0-LABEL: test_sext_load_i8x2_unaligned(
254222
; O0: {
255-
; O0-NEXT: .local .align 2 .b8 __local_depot7[2];
256-
; O0-NEXT: .reg .b64 %SP;
257-
; O0-NEXT: .reg .b64 %SPL;
258-
; O0-NEXT: .reg .b16 %rs<5>;
259-
; O0-NEXT: .reg .b32 %r<2>;
223+
; O0-NEXT: .reg .b16 %rs<3>;
260224
; O0-NEXT: .reg .b64 %rd<2>;
261225
; O0-EMPTY:
262226
; O0-NEXT: // %bb.0:
263-
; O0-NEXT: mov.b64 %SPL, __local_depot7;
264-
; O0-NEXT: cvta.local.u64 %SP, %SPL;
265227
; O0-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0];
266-
; O0-NEXT: ld.b8 %rs1, [%rd1];
267-
; O0-NEXT: ld.b8 %rs2, [%rd1+1];
268-
; O0-NEXT: shl.b16 %rs3, %rs2, 8;
269-
; O0-NEXT: or.b16 %rs4, %rs3, %rs1;
270-
; O0-NEXT: st.b16 [%SP], %rs4;
271-
; O0-NEXT: ld.s16 %r1, [%SP];
272-
; O0-NEXT: st.param.b32 [func_retval0], %r1;
228+
; O0-NEXT: ld.s8 %rs1, [%rd1+1];
229+
; O0-NEXT: ld.s8 %rs2, [%rd1];
230+
; O0-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
273231
; O0-NEXT: ret;
274232
;
275233
; O3-LABEL: test_sext_load_i8x2_unaligned(
276234
; O3: {
277-
; O3-NEXT: .local .align 2 .b8 __local_depot7[2];
278-
; O3-NEXT: .reg .b64 %SP;
279-
; O3-NEXT: .reg .b64 %SPL;
280-
; O3-NEXT: .reg .b16 %rs<5>;
281-
; O3-NEXT: .reg .b32 %r<2>;
235+
; O3-NEXT: .reg .b16 %rs<3>;
282236
; O3-NEXT: .reg .b64 %rd<2>;
283237
; O3-EMPTY:
284238
; O3-NEXT: // %bb.0:
285-
; O3-NEXT: mov.b64 %SPL, __local_depot7;
286-
; O3-NEXT: cvta.local.u64 %SP, %SPL;
287239
; O3-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0];
288-
; O3-NEXT: ld.b8 %rs1, [%rd1];
289-
; O3-NEXT: ld.b8 %rs2, [%rd1+1];
290-
; O3-NEXT: shl.b16 %rs3, %rs2, 8;
291-
; O3-NEXT: or.b16 %rs4, %rs3, %rs1;
292-
; O3-NEXT: st.b16 [%SP], %rs4;
293-
; O3-NEXT: ld.s16 %r1, [%SP];
294-
; O3-NEXT: st.param.b32 [func_retval0], %r1;
240+
; O3-NEXT: ld.s8 %rs1, [%rd1+1];
241+
; O3-NEXT: ld.s8 %rs2, [%rd1];
242+
; O3-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
295243
; O3-NEXT: ret;
296244
%a = load <2 x i8>, ptr %ptr, align 1
297245
%b = sext <2 x i8> %a to <2 x i16>

0 commit comments

Comments
 (0)