Skip to content

Commit 05e8600

Browse files
authored
[NVPTX] fix truncating/extending loads/stores for v2i32 (#163838)
1 parent 9be6744 commit 05e8600

File tree

2 files changed

+163
-10
lines changed

2 files changed

+163
-10
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,15 +749,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
749749
setTruncStoreAction(VT, MVT::i1, Expand);
750750
}
751751

752-
// Disable generations of extload/truncstore for v2i16/v2i8. The generic
752+
// Disable generations of extload/truncstore for v2i32/v2i16/v2i8. The generic
753753
// expansion for these nodes when they are unaligned is incorrect if the
754754
// type is a vector.
755755
//
756756
// TODO: Fix the generic expansion for these nodes found in
757757
// TargetLowering::expandUnalignedLoad/Store.
758758
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
759759
MVT::v2i8, Expand);
760+
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32,
761+
{MVT::v2i8, MVT::v2i16}, Expand);
760762
setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
763+
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
764+
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
761765

762766
// Register custom handling for illegal type loads/stores. We'll try to custom
763767
// lower almost all illegal types and logic in the lowering will discard cases

llvm/test/CodeGen/NVPTX/i32x2-instructions.ll

Lines changed: 158 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,29 +1141,88 @@ define <2 x i32> @test_select_cc(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x
11411141
ret <2 x i32> %r
11421142
}
11431143

1144-
define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 {
1145-
; CHECK-NOI32X2-LABEL: test_trunc_2xi32(
1144+
define <2 x i16> @test_trunc_2xi32_to_2xi16(<2 x i32> %a) #0 {
1145+
; CHECK-NOI32X2-LABEL: test_trunc_2xi32_to_2xi16(
11461146
; CHECK-NOI32X2: {
11471147
; CHECK-NOI32X2-NEXT: .reg .b32 %r<4>;
11481148
; CHECK-NOI32X2-EMPTY:
11491149
; CHECK-NOI32X2-NEXT: // %bb.0:
1150-
; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_2xi32_param_0];
1150+
; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_2xi32_to_2xi16_param_0];
11511151
; CHECK-NOI32X2-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
11521152
; CHECK-NOI32X2-NEXT: st.param.b32 [func_retval0], %r3;
11531153
; CHECK-NOI32X2-NEXT: ret;
11541154
;
1155-
; CHECK-I32X2-LABEL: test_trunc_2xi32(
1155+
; CHECK-I32X2-LABEL: test_trunc_2xi32_to_2xi16(
11561156
; CHECK-I32X2: {
1157+
; CHECK-I32X2-NEXT: .reg .b32 %r<4>;
11571158
; CHECK-I32X2-NEXT: .reg .b64 %rd<2>;
11581159
; CHECK-I32X2-EMPTY:
11591160
; CHECK-I32X2-NEXT: // %bb.0:
1160-
; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_trunc_2xi32_param_0];
1161-
; CHECK-I32X2-NEXT: st.param.b32 [func_retval0], %rd1;
1161+
; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_trunc_2xi32_to_2xi16_param_0];
1162+
; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1;
1163+
; CHECK-I32X2-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
1164+
; CHECK-I32X2-NEXT: st.param.b32 [func_retval0], %r3;
11621165
; CHECK-I32X2-NEXT: ret;
11631166
%r = trunc <2 x i32> %a to <2 x i16>
11641167
ret <2 x i16> %r
11651168
}
11661169

1170+
define <2 x i8> @test_trunc_2xi32_to_2xi8(<2 x i32> %a) #0 {
1171+
; CHECK-NOI32X2-LABEL: test_trunc_2xi32_to_2xi8(
1172+
; CHECK-NOI32X2: {
1173+
; CHECK-NOI32X2-NEXT: .reg .b16 %rs<3>;
1174+
; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>;
1175+
; CHECK-NOI32X2-EMPTY:
1176+
; CHECK-NOI32X2-NEXT: // %bb.0:
1177+
; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_2xi32_to_2xi8_param_0];
1178+
; CHECK-NOI32X2-NEXT: cvt.u16.u32 %rs1, %r2;
1179+
; CHECK-NOI32X2-NEXT: cvt.u16.u32 %rs2, %r1;
1180+
; CHECK-NOI32X2-NEXT: st.param.v2.b8 [func_retval0], {%rs2, %rs1};
1181+
; CHECK-NOI32X2-NEXT: ret;
1182+
;
1183+
; CHECK-I32X2-LABEL: test_trunc_2xi32_to_2xi8(
1184+
; CHECK-I32X2: {
1185+
; CHECK-I32X2-NEXT: .reg .b16 %rs<3>;
1186+
; CHECK-I32X2-NEXT: .reg .b32 %r<3>;
1187+
; CHECK-I32X2-NEXT: .reg .b64 %rd<2>;
1188+
; CHECK-I32X2-EMPTY:
1189+
; CHECK-I32X2-NEXT: // %bb.0:
1190+
; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_trunc_2xi32_to_2xi8_param_0];
1191+
; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1;
1192+
; CHECK-I32X2-NEXT: cvt.u16.u32 %rs1, %r2;
1193+
; CHECK-I32X2-NEXT: cvt.u16.u32 %rs2, %r1;
1194+
; CHECK-I32X2-NEXT: st.param.v2.b8 [func_retval0], {%rs2, %rs1};
1195+
; CHECK-I32X2-NEXT: ret;
1196+
%r = trunc <2 x i32> %a to <2 x i8>
1197+
ret <2 x i8> %r
1198+
}
1199+
1200+
define <2 x i1> @test_trunc_2xi32_to_2xi1(<2 x i32> %a) #0 {
1201+
; CHECK-NOI32X2-LABEL: test_trunc_2xi32_to_2xi1(
1202+
; CHECK-NOI32X2: {
1203+
; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>;
1204+
; CHECK-NOI32X2-EMPTY:
1205+
; CHECK-NOI32X2-NEXT: // %bb.0:
1206+
; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_2xi32_to_2xi1_param_0];
1207+
; CHECK-NOI32X2-NEXT: st.param.b8 [func_retval0], %r1;
1208+
; CHECK-NOI32X2-NEXT: st.param.b8 [func_retval0+1], %r2;
1209+
; CHECK-NOI32X2-NEXT: ret;
1210+
;
1211+
; CHECK-I32X2-LABEL: test_trunc_2xi32_to_2xi1(
1212+
; CHECK-I32X2: {
1213+
; CHECK-I32X2-NEXT: .reg .b32 %r<3>;
1214+
; CHECK-I32X2-NEXT: .reg .b64 %rd<2>;
1215+
; CHECK-I32X2-EMPTY:
1216+
; CHECK-I32X2-NEXT: // %bb.0:
1217+
; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_trunc_2xi32_to_2xi1_param_0];
1218+
; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1;
1219+
; CHECK-I32X2-NEXT: st.param.b8 [func_retval0], %r1;
1220+
; CHECK-I32X2-NEXT: st.param.b8 [func_retval0+1], %r2;
1221+
; CHECK-I32X2-NEXT: ret;
1222+
%r = trunc <2 x i32> %a to <2 x i1>
1223+
ret <2 x i1> %r
1224+
}
1225+
11671226
define <2 x i32> @test_trunc_2xi64(<2 x i64> %a) #0 {
11681227
; CHECK-LABEL: test_trunc_2xi64(
11691228
; CHECK: {
@@ -1180,14 +1239,14 @@ define <2 x i32> @test_trunc_2xi64(<2 x i64> %a) #0 {
11801239
ret <2 x i32> %r
11811240
}
11821241

1183-
define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
1184-
; CHECK-LABEL: test_zext_2xi32(
1242+
define <2 x i32> @test_zext_2xi16_to_2xi32(<2 x i16> %a) #0 {
1243+
; CHECK-LABEL: test_zext_2xi16_to_2xi32(
11851244
; CHECK: {
11861245
; CHECK-NEXT: .reg .b16 %rs<3>;
11871246
; CHECK-NEXT: .reg .b32 %r<4>;
11881247
; CHECK-EMPTY:
11891248
; CHECK-NEXT: // %bb.0:
1190-
; CHECK-NEXT: ld.param.b32 %r1, [test_zext_2xi32_param_0];
1249+
; CHECK-NEXT: ld.param.b32 %r1, [test_zext_2xi16_to_2xi32_param_0];
11911250
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
11921251
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
11931252
; CHECK-NEXT: cvt.u32.u16 %r3, %rs1;
@@ -1197,6 +1256,47 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
11971256
ret <2 x i32> %r
11981257
}
11991258

1259+
define <2 x i32> @test_zext_2xi8_to_2xi32(<2 x i8> %a) #0 {
1260+
; CHECK-LABEL: test_zext_2xi8_to_2xi32(
1261+
; CHECK: {
1262+
; CHECK-NEXT: .reg .b16 %rs<3>;
1263+
; CHECK-NEXT: .reg .b32 %r<4>;
1264+
; CHECK-EMPTY:
1265+
; CHECK-NEXT: // %bb.0:
1266+
; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_zext_2xi8_to_2xi32_param_0];
1267+
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
1268+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
1269+
; CHECK-NEXT: cvt.u32.u16 %r3, %rs1;
1270+
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2};
1271+
; CHECK-NEXT: ret;
1272+
%r = zext <2 x i8> %a to <2 x i32>
1273+
ret <2 x i32> %r
1274+
}
1275+
1276+
define <2 x i32> @test_zext_2xi1_to_2xi32(<2 x i1> %a) #0 {
1277+
; CHECK-LABEL: test_zext_2xi1_to_2xi32(
1278+
; CHECK: {
1279+
; CHECK-NEXT: .reg .pred %p<3>;
1280+
; CHECK-NEXT: .reg .b16 %rs<5>;
1281+
; CHECK-NEXT: .reg .b32 %r<5>;
1282+
; CHECK-EMPTY:
1283+
; CHECK-NEXT: // %bb.0:
1284+
; CHECK-NEXT: ld.param.b8 %rs1, [test_zext_2xi1_to_2xi32_param_0+1];
1285+
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
1286+
; CHECK-NEXT: setp.ne.b16 %p2, %rs2, 0;
1287+
; CHECK-NEXT: ld.param.b8 %rs3, [test_zext_2xi1_to_2xi32_param_0];
1288+
; CHECK-NEXT: and.b16 %rs4, %rs3, 1;
1289+
; CHECK-NEXT: setp.ne.b16 %p1, %rs4, 0;
1290+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
1291+
; CHECK-NEXT: and.b32 %r2, %r1, 1;
1292+
; CHECK-NEXT: cvt.u32.u16 %r3, %rs3;
1293+
; CHECK-NEXT: and.b32 %r4, %r3, 1;
1294+
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r2};
1295+
; CHECK-NEXT: ret;
1296+
%r = zext <2 x i1> %a to <2 x i32>
1297+
ret <2 x i32> %r
1298+
}
1299+
12001300
define <2 x i64> @test_zext_2xi64(<2 x i32> %a) #0 {
12011301
; CHECK-NOI32X2-LABEL: test_zext_2xi64(
12021302
; CHECK-NOI32X2: {
@@ -1566,6 +1666,55 @@ entry:
15661666
ret void
15671667
}
15681668

1669+
define <2 x i32> @test_sext_v2i8_to_v2i32 (<2 x i8> %a) {
1670+
; CHECK-LABEL: test_sext_v2i8_to_v2i32(
1671+
; CHECK: {
1672+
; CHECK-NEXT: .reg .b16 %rs<3>;
1673+
; CHECK-NEXT: .reg .b32 %r<6>;
1674+
; CHECK-EMPTY:
1675+
; CHECK-NEXT: // %bb.0:
1676+
; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_sext_v2i8_to_v2i32_param_0];
1677+
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
1678+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
1679+
; CHECK-NEXT: cvt.s32.s8 %r3, %r2;
1680+
; CHECK-NEXT: cvt.u32.u16 %r4, %rs1;
1681+
; CHECK-NEXT: cvt.s32.s8 %r5, %r4;
1682+
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r3};
1683+
; CHECK-NEXT: ret;
1684+
%r = sext <2 x i8> %a to <2 x i32>
1685+
ret <2 x i32> %r
1686+
}
1687+
1688+
define <2 x i32> @test_sext_v2i16_to_v2i32 (<2 x i16> %a) {
1689+
; CHECK-NOI32X2-LABEL: test_sext_v2i16_to_v2i32(
1690+
; CHECK-NOI32X2: {
1691+
; CHECK-NOI32X2-NEXT: .reg .b16 %rs<2>;
1692+
; CHECK-NOI32X2-NEXT: .reg .b32 %r<4>;
1693+
; CHECK-NOI32X2-EMPTY:
1694+
; CHECK-NOI32X2-NEXT: // %bb.0:
1695+
; CHECK-NOI32X2-NEXT: ld.param.b32 %r1, [test_sext_v2i16_to_v2i32_param_0];
1696+
; CHECK-NOI32X2-NEXT: cvt.s32.s16 %r2, %r1;
1697+
; CHECK-NOI32X2-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; }
1698+
; CHECK-NOI32X2-NEXT: cvt.s32.s16 %r3, %rs1;
1699+
; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r3};
1700+
; CHECK-NOI32X2-NEXT: ret;
1701+
;
1702+
; CHECK-I32X2-LABEL: test_sext_v2i16_to_v2i32(
1703+
; CHECK-I32X2: {
1704+
; CHECK-I32X2-NEXT: .reg .b16 %rs<2>;
1705+
; CHECK-I32X2-NEXT: .reg .b32 %r<4>;
1706+
; CHECK-I32X2-EMPTY:
1707+
; CHECK-I32X2-NEXT: // %bb.0:
1708+
; CHECK-I32X2-NEXT: ld.param.b32 %r1, [test_sext_v2i16_to_v2i32_param_0];
1709+
; CHECK-I32X2-NEXT: cvt.s32.s16 %r2, %r1;
1710+
; CHECK-I32X2-NEXT: mov.b32 {_, %rs1}, %r1;
1711+
; CHECK-I32X2-NEXT: cvt.s32.s16 %r3, %rs1;
1712+
; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r3};
1713+
; CHECK-I32X2-NEXT: ret;
1714+
%r = sext <2 x i16> %a to <2 x i32>
1715+
ret <2 x i32> %r
1716+
}
1717+
15691718
define <2 x float> @test_uitofp_v2i32(<2 x i32> %a) {
15701719
; CHECK-NOI32X2-LABEL: test_uitofp_v2i32(
15711720
; CHECK-NOI32X2: {

0 commit comments

Comments
 (0)