@@ -129,6 +129,7 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
129
129
return ;
130
130
case NVPTXISD::LoadV2:
131
131
case NVPTXISD::LoadV4:
132
+ case NVPTXISD::LoadV8:
132
133
if (tryLoadVector (N))
133
134
return ;
134
135
break ;
@@ -139,6 +140,7 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
139
140
break ;
140
141
case NVPTXISD::StoreV2:
141
142
case NVPTXISD::StoreV4:
143
+ case NVPTXISD::StoreV8:
142
144
if (tryStoreVector (N))
143
145
return ;
144
146
break ;
@@ -1012,11 +1014,11 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
1012
1014
1013
1015
// Helper function template to reduce amount of boilerplate code for
1014
1016
// opcode selection.
1015
- static std::optional<unsigned >
1016
- pickOpcodeForVT ( MVT::SimpleValueType VT, unsigned Opcode_i8,
1017
- unsigned Opcode_i16, unsigned Opcode_i32,
1018
- std::optional<unsigned > Opcode_i64, unsigned Opcode_f32,
1019
- std::optional<unsigned > Opcode_f64) {
1017
+ static std::optional<unsigned > pickOpcodeForVT (
1018
+ MVT::SimpleValueType VT, std::optional< unsigned > Opcode_i8,
1019
+ std::optional< unsigned > Opcode_i16, std::optional< unsigned > Opcode_i32,
1020
+ std::optional<unsigned > Opcode_i64, std::optional< unsigned > Opcode_f32,
1021
+ std::optional<unsigned > Opcode_f64) {
1020
1022
switch (VT) {
1021
1023
case MVT::i1:
1022
1024
case MVT::i8 :
@@ -1091,7 +1093,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
1091
1093
SDValue Ops[] = {getI32Imm (Ordering, DL),
1092
1094
getI32Imm (Scope, DL),
1093
1095
getI32Imm (CodeAddrSpace, DL),
1094
- getI32Imm (NVPTX::PTXLdStInstCode::Scalar, DL),
1095
1096
getI32Imm (FromType, DL),
1096
1097
getI32Imm (FromTypeWidth, DL),
1097
1098
Base,
@@ -1128,6 +1129,22 @@ static bool isSubVectorPackedInI32(EVT EltVT) {
1128
1129
return Isv2x16VT (EltVT) || EltVT == MVT::v4i8;
1129
1130
}
1130
1131
1132
+ static unsigned getLoadStoreVectorNumElts (SDNode *N) {
1133
+ switch (N->getOpcode ()) {
1134
+ case NVPTXISD::LoadV2:
1135
+ case NVPTXISD::StoreV2:
1136
+ return 2 ;
1137
+ case NVPTXISD::LoadV4:
1138
+ case NVPTXISD::StoreV4:
1139
+ return 4 ;
1140
+ case NVPTXISD::LoadV8:
1141
+ case NVPTXISD::StoreV8:
1142
+ return 8 ;
1143
+ default :
1144
+ llvm_unreachable (" Unexpected opcode" );
1145
+ }
1146
+ }
1147
+
1131
1148
bool NVPTXDAGToDAGISel::tryLoadVector (SDNode *N) {
1132
1149
MemSDNode *MemSD = cast<MemSDNode>(N);
1133
1150
const EVT MemEVT = MemSD->getMemoryVT ();
@@ -1159,35 +1176,21 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1159
1176
? NVPTX::PTXLdStInstCode::Signed
1160
1177
: NVPTX::PTXLdStInstCode::Untyped;
1161
1178
1162
- unsigned VecType;
1163
- unsigned FromTypeWidth;
1164
- switch (N->getOpcode ()) {
1165
- case NVPTXISD::LoadV2:
1166
- FromTypeWidth = TotalWidth / 2 ;
1167
- VecType = NVPTX::PTXLdStInstCode::V2;
1168
- break ;
1169
- case NVPTXISD::LoadV4:
1170
- FromTypeWidth = TotalWidth / 4 ;
1171
- VecType = NVPTX::PTXLdStInstCode::V4;
1172
- break ;
1173
- default :
1174
- return false ;
1175
- }
1179
+ unsigned FromTypeWidth = TotalWidth / getLoadStoreVectorNumElts (N);
1176
1180
1177
1181
if (isSubVectorPackedInI32 (EltVT)) {
1178
1182
assert (ExtensionType == ISD::NON_EXTLOAD);
1179
1183
EltVT = MVT::i32 ;
1180
1184
}
1181
1185
1182
1186
assert (isPowerOf2_32 (FromTypeWidth) && FromTypeWidth >= 8 &&
1183
- FromTypeWidth <= 128 && TotalWidth <= 128 && " Invalid width for load" );
1187
+ FromTypeWidth <= 128 && TotalWidth <= 256 && " Invalid width for load" );
1184
1188
1185
1189
SDValue Offset, Base;
1186
1190
SelectADDR (N->getOperand (1 ), Base, Offset);
1187
1191
SDValue Ops[] = {getI32Imm (Ordering, DL),
1188
1192
getI32Imm (Scope, DL),
1189
1193
getI32Imm (CodeAddrSpace, DL),
1190
- getI32Imm (VecType, DL),
1191
1194
getI32Imm (FromType, DL),
1192
1195
getI32Imm (FromTypeWidth, DL),
1193
1196
Base,
@@ -1205,9 +1208,16 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1205
1208
NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2);
1206
1209
break ;
1207
1210
case NVPTXISD::LoadV4:
1208
- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::LDV_i8_v4,
1209
- NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, std::nullopt,
1210
- NVPTX::LDV_f32_v4, std::nullopt);
1211
+ Opcode =
1212
+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::LDV_i8_v4,
1213
+ NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, NVPTX::LDV_i64_v4,
1214
+ NVPTX::LDV_f32_v4, NVPTX::LDV_f64_v4);
1215
+ break ;
1216
+ case NVPTXISD::LoadV8:
1217
+ Opcode =
1218
+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1219
+ {/* no v8i16 */ }, NVPTX::LDV_i32_v8, {/* no v8i64 */ },
1220
+ NVPTX::LDV_f32_v8, {/* no v8f64 */ });
1211
1221
break ;
1212
1222
}
1213
1223
if (!Opcode)
@@ -1303,13 +1313,20 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1303
1313
Opcode = pickOpcodeForVT (
1304
1314
EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v4i8_ELE,
1305
1315
NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
1306
- std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt);
1316
+ NVPTX::INT_PTX_LDG_G_v4i64_ELE, NVPTX::INT_PTX_LDG_G_v4f32_ELE,
1317
+ NVPTX::INT_PTX_LDG_G_v4f64_ELE);
1307
1318
break ;
1308
1319
case NVPTXISD::LDUV4:
1309
1320
Opcode = pickOpcodeForVT (
1310
1321
EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1311
1322
NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
1312
- std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt);
1323
+ {/* no v4i64 */ }, NVPTX::INT_PTX_LDU_G_v4f32_ELE, {/* no v4f64 */ });
1324
+ break ;
1325
+ case NVPTXISD::LoadV8:
1326
+ Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1327
+ {/* no v8i16 */ }, NVPTX::INT_PTX_LDG_G_v8i32_ELE,
1328
+ {/* no v8i64 */ }, NVPTX::INT_PTX_LDG_G_v8f32_ELE,
1329
+ {/* no v8f64 */ });
1313
1330
break ;
1314
1331
}
1315
1332
if (!Opcode)
@@ -1395,7 +1412,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1395
1412
getI32Imm (Ordering, DL),
1396
1413
getI32Imm (Scope, DL),
1397
1414
getI32Imm (CodeAddrSpace, DL),
1398
- getI32Imm (NVPTX::PTXLdStInstCode::Scalar, DL),
1399
1415
getI32Imm (NVPTX::PTXLdStInstCode::Untyped, DL),
1400
1416
getI32Imm (ToTypeWidth, DL),
1401
1417
Base,
@@ -1443,41 +1459,24 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1443
1459
// - for integer type, always use 'u'
1444
1460
const unsigned TotalWidth = StoreVT.getSimpleVT ().getSizeInBits ();
1445
1461
1446
- SmallVector<SDValue, 12 > Ops;
1447
- SDValue N2;
1448
- unsigned VecType;
1449
- unsigned ToTypeWidth;
1462
+ unsigned NumElts = getLoadStoreVectorNumElts (N);
1450
1463
1451
- switch (N->getOpcode ()) {
1452
- case NVPTXISD::StoreV2:
1453
- VecType = NVPTX::PTXLdStInstCode::V2;
1454
- Ops.append ({N->getOperand (1 ), N->getOperand (2 )});
1455
- N2 = N->getOperand (3 );
1456
- ToTypeWidth = TotalWidth / 2 ;
1457
- break ;
1458
- case NVPTXISD::StoreV4:
1459
- VecType = NVPTX::PTXLdStInstCode::V4;
1460
- Ops.append ({N->getOperand (1 ), N->getOperand (2 ), N->getOperand (3 ),
1461
- N->getOperand (4 )});
1462
- N2 = N->getOperand (5 );
1463
- ToTypeWidth = TotalWidth / 4 ;
1464
- break ;
1465
- default :
1466
- return false ;
1467
- }
1464
+ SmallVector<SDValue, 16 > Ops (N->ops ().slice (1 , NumElts));
1465
+ SDValue N2 = N->getOperand (NumElts + 1 );
1466
+ unsigned ToTypeWidth = TotalWidth / NumElts;
1468
1467
1469
1468
if (isSubVectorPackedInI32 (EltVT)) {
1470
1469
EltVT = MVT::i32 ;
1471
1470
}
1472
1471
1473
1472
assert (isPowerOf2_32 (ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
1474
- TotalWidth <= 128 && " Invalid width for store" );
1473
+ TotalWidth <= 256 && " Invalid width for store" );
1475
1474
1476
1475
SDValue Offset, Base;
1477
1476
SelectADDR (N2, Base, Offset);
1478
1477
1479
1478
Ops.append ({getI32Imm (Ordering, DL), getI32Imm (Scope, DL),
1480
- getI32Imm (CodeAddrSpace, DL), getI32Imm (VecType, DL),
1479
+ getI32Imm (CodeAddrSpace, DL),
1481
1480
getI32Imm (NVPTX::PTXLdStInstCode::Untyped, DL),
1482
1481
getI32Imm (ToTypeWidth, DL), Base, Offset, Chain});
1483
1482
@@ -1492,9 +1491,16 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1492
1491
NVPTX::STV_f32_v2, NVPTX::STV_f64_v2);
1493
1492
break ;
1494
1493
case NVPTXISD::StoreV4:
1495
- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::STV_i8_v4,
1496
- NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, std::nullopt,
1497
- NVPTX::STV_f32_v4, std::nullopt);
1494
+ Opcode =
1495
+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , NVPTX::STV_i8_v4,
1496
+ NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, NVPTX::STV_i64_v4,
1497
+ NVPTX::STV_f32_v4, NVPTX::STV_f64_v4);
1498
+ break ;
1499
+ case NVPTXISD::StoreV8:
1500
+ Opcode =
1501
+ pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1502
+ {/* no v8i16 */ }, NVPTX::STV_i32_v8, {/* no v8i64 */ },
1503
+ NVPTX::STV_f32_v8, {/* no v8f64 */ });
1498
1504
break ;
1499
1505
}
1500
1506
@@ -1554,10 +1560,10 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
1554
1560
NVPTX::LoadParamMemV2F64);
1555
1561
break ;
1556
1562
case 4 :
1557
- Opcode =
1558
- pickOpcodeForVT (MemVT. getSimpleVT (). SimpleTy , NVPTX::LoadParamMemV4I8,
1559
- NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
1560
- std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt );
1563
+ Opcode = pickOpcodeForVT (MemVT. getSimpleVT (). SimpleTy ,
1564
+ NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16 ,
1565
+ NVPTX::LoadParamMemV4I32, { /* no v4i64 */ } ,
1566
+ NVPTX::LoadParamMemV4F32, { /* no v4f64 */ } );
1561
1567
break ;
1562
1568
}
1563
1569
if (!Opcode)
@@ -1648,8 +1654,8 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
1648
1654
case 4 :
1649
1655
Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
1650
1656
NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
1651
- NVPTX::StoreRetvalV4I32, std::nullopt ,
1652
- NVPTX::StoreRetvalV4F32, std::nullopt );
1657
+ NVPTX::StoreRetvalV4I32, { /* no v4i64 */ } ,
1658
+ NVPTX::StoreRetvalV4F32, { /* no v4f64 */ } );
1653
1659
break ;
1654
1660
}
1655
1661
if (!Opcode)
0 commit comments