@@ -1015,29 +1015,33 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
10151015
10161016// Helper function template to reduce amount of boilerplate code for
10171017// opcode selection.
1018- static std::optional<unsigned >
1019- pickOpcodeForVT ( MVT::SimpleValueType VT, std::optional<unsigned > Opcode_i8,
1020- std::optional<unsigned > Opcode_i16 ,
1021- std::optional<unsigned > Opcode_i32 ,
1022- std::optional<unsigned > Opcode_i64 ) {
1018+ static std::optional<unsigned > pickOpcodeForVT (
1019+ MVT::SimpleValueType VT, std::optional<unsigned > Opcode_i8,
1020+ std::optional< unsigned > Opcode_i16, std::optional<unsigned > Opcode_i32 ,
1021+ std::optional< unsigned > Opcode_i64, std::optional<unsigned > Opcode_f32 ,
1022+ std::optional<unsigned > Opcode_f64 ) {
10231023 switch (VT) {
10241024 case MVT::i1:
10251025 case MVT::i8 :
10261026 return Opcode_i8;
1027- case MVT::f16 :
10281027 case MVT::i16 :
1028+ return Opcode_i16;
1029+ case MVT::i32 :
1030+ return Opcode_i32;
1031+ case MVT::i64 :
1032+ return Opcode_i64;
1033+ case MVT::f16 :
10291034 case MVT::bf16 :
10301035 return Opcode_i16;
10311036 case MVT::v2f16:
10321037 case MVT::v2bf16:
10331038 case MVT::v2i16:
10341039 case MVT::v4i8:
1035- case MVT::i32 :
1036- case MVT::f32 :
10371040 return Opcode_i32;
1038- case MVT::i64 :
1041+ case MVT::f32 :
1042+ return Opcode_f32;
10391043 case MVT::f64 :
1040- return Opcode_i64 ;
1044+ return Opcode_f64 ;
10411045 default :
10421046 return std::nullopt ;
10431047 }
@@ -1097,8 +1101,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
10971101 Chain};
10981102
10991103 const MVT::SimpleValueType TargetVT = LD->getSimpleValueType (0 ).SimpleTy ;
1100- const std::optional<unsigned > Opcode = pickOpcodeForVT (
1101- TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32, NVPTX::LD_i64);
1104+ const std::optional<unsigned > Opcode =
1105+ pickOpcodeForVT (TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32,
1106+ NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64);
11021107 if (!Opcode)
11031108 return false ;
11041109
@@ -1198,19 +1203,22 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
11981203 default :
11991204 return false ;
12001205 case NVPTXISD::LoadV2:
1201- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::LDV_i8_v2,
1202- NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2,
1203- NVPTX::LDV_i64_v2);
1206+ Opcode =
1207+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::LDV_i8_v2,
1208+ NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2,
1209+ NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2);
12041210 break ;
12051211 case NVPTXISD::LoadV4:
1206- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::LDV_i8_v4,
1207- NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4,
1208- NVPTX::LDV_i64_v4);
1212+ Opcode =
1213+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::LDV_i8_v4,
1214+ NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, NVPTX::LDV_i64_v4,
1215+ NVPTX::LDV_f32_v4, NVPTX::LDV_f64_v4);
12091216 break ;
12101217 case NVPTXISD::LoadV8:
12111218 Opcode =
12121219 pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1213- {/* no v8i16 */ }, NVPTX::LDV_i32_v8, {/* no v8i64 */ });
1220+ {/* no v8i16 */ }, NVPTX::LDV_i32_v8, {/* no v8i64 */ },
1221+ NVPTX::LDV_f32_v8, {/* no v8f64 */ });
12141222 break ;
12151223 }
12161224 if (!Opcode)
@@ -1278,42 +1286,48 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
12781286 Opcode = pickOpcodeForVT (
12791287 EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_GLOBAL_i8,
12801288 NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
1281- NVPTX::INT_PTX_LDG_GLOBAL_i64);
1289+ NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
1290+ NVPTX::INT_PTX_LDG_GLOBAL_f64);
12821291 break ;
12831292 case ISD::INTRINSIC_W_CHAIN:
12841293 Opcode = pickOpcodeForVT (
12851294 EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_GLOBAL_i8,
12861295 NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
1287- NVPTX::INT_PTX_LDU_GLOBAL_i64);
1296+ NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
1297+ NVPTX::INT_PTX_LDU_GLOBAL_f64);
12881298 break ;
12891299 case NVPTXISD::LoadV2:
12901300 Opcode = pickOpcodeForVT (
12911301 EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v2i8_ELE,
12921302 NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
1293- NVPTX::INT_PTX_LDG_G_v2i64_ELE);
1303+ NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
1304+ NVPTX::INT_PTX_LDG_G_v2f64_ELE);
12941305 break ;
12951306 case NVPTXISD::LDUV2:
12961307 Opcode = pickOpcodeForVT (
12971308 EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v2i8_ELE,
12981309 NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
1299- NVPTX::INT_PTX_LDU_G_v2i64_ELE);
1310+ NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
1311+ NVPTX::INT_PTX_LDU_G_v2f64_ELE);
13001312 break ;
13011313 case NVPTXISD::LoadV4:
13021314 Opcode = pickOpcodeForVT (
13031315 EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v4i8_ELE,
13041316 NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
1305- NVPTX::INT_PTX_LDG_G_v4i64_ELE);
1317+ NVPTX::INT_PTX_LDG_G_v4i64_ELE, NVPTX::INT_PTX_LDG_G_v4f32_ELE,
1318+ NVPTX::INT_PTX_LDG_G_v4f64_ELE);
13061319 break ;
13071320 case NVPTXISD::LDUV4:
1308- Opcode = pickOpcodeForVT (EltVT. getSimpleVT (). SimpleTy ,
1309- NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1310- NVPTX::INT_PTX_LDU_G_v4i16_ELE,
1311- NVPTX::INT_PTX_LDU_G_v4i32_ELE , {/* no v4i64 */ });
1321+ Opcode = pickOpcodeForVT (
1322+ EltVT. getSimpleVT (). SimpleTy , NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1323+ NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE ,
1324+ { /* no v4i64 */ }, NVPTX::INT_PTX_LDU_G_v4f32_ELE , {/* no v4f64 */ });
13121325 break ;
13131326 case NVPTXISD::LoadV8:
13141327 Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
13151328 {/* no v8i16 */ }, NVPTX::INT_PTX_LDG_G_v8i32_ELE,
1316- {/* no v8i64 */ });
1329+ {/* no v8i64 */ }, NVPTX::INT_PTX_LDG_G_v8f32_ELE,
1330+ {/* no v8f64 */ });
13171331 break ;
13181332 }
13191333 if (!Opcode)
@@ -1407,8 +1421,9 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
14071421
14081422 const MVT::SimpleValueType SourceVT =
14091423 Value.getNode ()->getSimpleValueType (0 ).SimpleTy ;
1410- const std::optional<unsigned > Opcode = pickOpcodeForVT (
1411- SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
1424+ const std::optional<unsigned > Opcode =
1425+ pickOpcodeForVT (SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32,
1426+ NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64);
14121427 if (!Opcode)
14131428 return false ;
14141429
@@ -1471,19 +1486,22 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
14711486 default :
14721487 return false ;
14731488 case NVPTXISD::StoreV2:
1474- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::STV_i8_v2,
1475- NVPTX::STV_i16_v2, NVPTX::STV_i32_v2,
1476- NVPTX::STV_i64_v2);
1489+ Opcode =
1490+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::STV_i8_v2,
1491+ NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2,
1492+ NVPTX::STV_f32_v2, NVPTX::STV_f64_v2);
14771493 break ;
14781494 case NVPTXISD::StoreV4:
1479- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::STV_i8_v4,
1480- NVPTX::STV_i16_v4, NVPTX::STV_i32_v4,
1481- NVPTX::STV_i64_v4);
1495+ Opcode =
1496+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::STV_i8_v4,
1497+ NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, NVPTX::STV_i64_v4,
1498+ NVPTX::STV_f32_v4, NVPTX::STV_f64_v4);
14821499 break ;
14831500 case NVPTXISD::StoreV8:
14841501 Opcode =
14851502 pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1486- {/* no v8i16 */ }, NVPTX::STV_i32_v8, {/* no v8i64 */ });
1503+ {/* no v8i16 */ }, NVPTX::STV_i32_v8, {/* no v8i64 */ },
1504+ NVPTX::STV_f32_v8, {/* no v8f64 */ });
14871505 break ;
14881506 }
14891507
@@ -1532,18 +1550,21 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
15321550 case 1 :
15331551 Opcode = pickOpcodeForVT (MemVT.getSimpleVT ().SimpleTy ,
15341552 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
1535- NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64);
1553+ NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
1554+ NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
15361555 break ;
15371556 case 2 :
15381557 Opcode =
15391558 pickOpcodeForVT (MemVT.getSimpleVT ().SimpleTy , NVPTX::LoadParamMemV2I8,
15401559 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
1541- NVPTX::LoadParamMemV2I64);
1560+ NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
1561+ NVPTX::LoadParamMemV2F64);
15421562 break ;
15431563 case 4 :
15441564 Opcode = pickOpcodeForVT (MemVT.getSimpleVT ().SimpleTy ,
15451565 NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16,
1546- NVPTX::LoadParamMemV4I32, {/* no v4i64 */ });
1566+ NVPTX::LoadParamMemV4I32, {/* no v4i64 */ },
1567+ NVPTX::LoadParamMemV4F32, {/* no v4f64 */ });
15471568 break ;
15481569 }
15491570 if (!Opcode)
@@ -1607,7 +1628,8 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
16071628 case 1 :
16081629 Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
16091630 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
1610- NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64);
1631+ NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
1632+ NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
16111633 if (Opcode == NVPTX::StoreRetvalI8) {
16121634 // Fine tune the opcode depending on the size of the operand.
16131635 // This helps to avoid creating redundant COPY instructions in
@@ -1627,12 +1649,14 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
16271649 case 2 :
16281650 Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
16291651 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
1630- NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64);
1652+ NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
1653+ NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
16311654 break ;
16321655 case 4 :
16331656 Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
16341657 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
1635- NVPTX::StoreRetvalV4I32, {/* no v4i64 */ });
1658+ NVPTX::StoreRetvalV4I32, {/* no v4i64 */ },
1659+ NVPTX::StoreRetvalV4F32, {/* no v4f64 */ });
16361660 break ;
16371661 }
16381662 if (!Opcode)
@@ -1803,12 +1827,14 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
18031827 // Use immediate version of store param
18041828 Opcode = pickOpcodeForVT (MemTy, NVPTX::StoreParamI8_i,
18051829 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
1806- NVPTX::StoreParamI64_i);
1830+ NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
1831+ NVPTX::StoreParamF64_i);
18071832 } else
18081833 Opcode =
18091834 pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
18101835 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
1811- NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r);
1836+ NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
1837+ NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
18121838 if (Opcode == NVPTX::StoreParamI8_r) {
18131839 // Fine tune the opcode depending on the size of the operand.
18141840 // This helps to avoid creating redundant COPY instructions in
0 commit comments