@@ -107,6 +107,8 @@ class DpasFuncsResolution : public FunctionPass, public InstVisitor<DpasFuncsRes
107
107
// /
108
108
static const StringRef SG_PREFIX_IDPAS16;
109
109
static const StringRef SG_PREFIX_FDPAS16;
110
+ static const StringRef SG_PREFIX_IDPAS32N16;
111
+ static const StringRef SG_PREFIX_FDPAS32N16;
110
112
// PVC+: pure hf/bf dpas builtins
111
113
static const StringRef WI_PREFIX_HFDPAS;
112
114
static const StringRef WI_PREFIX_BFDPAS;
@@ -200,6 +202,8 @@ const StringRef DpasFuncsResolution::WI_PREFIX_IDPAS = "__builtin_IB_idpas";
200
202
const StringRef DpasFuncsResolution::WI_PREFIX_FDPAS = " __builtin_IB_fdpas" ;
201
203
const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS16 = " __builtin_IB_sub_group16_idpas" ;
202
204
const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS16 = " __builtin_IB_sub_group16_fdpas" ;
205
+ const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS32N16 = " __builtin_IB_sub_group32n16_idpas" ;
206
+ const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS32N16 = " __builtin_IB_sub_group32n16_fdpas" ;
203
207
// PVC+: pure hf/bf dpas builtins
204
208
const StringRef DpasFuncsResolution::WI_PREFIX_HFDPAS = " __builtin_IB_hfdpas" ;
205
209
const StringRef DpasFuncsResolution::WI_PREFIX_BFDPAS = " __builtin_IB_bfdpas" ;
@@ -263,6 +267,11 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
263
267
264
268
bool IsDpasw = false ;
265
269
bool IsIDpas = false ;
270
+ // Dimension N is platform specific and is directly correlated to minimum subgroup-size for
271
+ // given platform. If DPAS with the same M, N, K dimensions is executed within a subgroup
272
+ // twice the size of minimum subgroup-size, each work item must contain half of the data
273
+ // compared to the minimum subgroup-size.
274
+ bool IsDoubleSubgroup = false ;
266
275
int DstTy, AccTy, PA, PB, SD, RC;
267
276
GenISAIntrinsic::ID iid = GenISAIntrinsic::no_intrinsic;
268
277
bool doVerify = false ;
@@ -277,12 +286,26 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
277
286
if (!demangleSuffix (funcName, SG_PREFIX_LEN, false , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
278
287
return ;
279
288
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
289
+ } else if (funcName.startswith (DpasFuncsResolution::SG_PREFIX_IDPAS32N16)) {
290
+ const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_IDPAS32N16.size ();
291
+ IsIDpas = true ;
292
+ IsDoubleSubgroup = true ;
293
+ if (!demangleSuffix (funcName, SG_PREFIX_LEN, false , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
294
+ return ;
295
+ iid = GenISAIntrinsic::GenISA_sub_group_dpas;
280
296
} else if (funcName.startswith (DpasFuncsResolution::SG_PREFIX_FDPAS16)) {
281
297
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS16.size ();
282
298
IsIDpas = false ;
283
299
if (!demangleSuffix (funcName, SG_PREFIX_LEN, true , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
284
300
return ;
285
301
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
302
+ } else if (funcName.startswith (DpasFuncsResolution::SG_PREFIX_FDPAS32N16)) {
303
+ const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS32N16.size ();
304
+ IsIDpas = false ;
305
+ IsDoubleSubgroup = true ;
306
+ if (!demangleSuffix (funcName, SG_PREFIX_LEN, true , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
307
+ return ;
308
+ iid = GenISAIntrinsic::GenISA_sub_group_dpas;
286
309
}
287
310
else {
288
311
return ;
@@ -363,6 +386,14 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
363
386
Type *A_BaseTy = ATy->getScalarType ();
364
387
Type *B_BaseTy = BTy->getScalarType ();
365
388
389
+ if (IsDoubleSubgroup) {
390
+ IGC_ASSERT_MESSAGE (RC >= 2 , " ICE: repeat count of DPAS for double subgroup-size must be >= 2!" );
391
+ D_nelts *= 2 ;
392
+ ACC_nelts *= 2 ;
393
+ A_nelts *= 2 ;
394
+ B_nelts *= 2 ;
395
+ }
396
+
366
397
if (IsIDpas) {
367
398
uint32_t Abits = getPrecisionInBits ((PrecisionType)PA);
368
399
uint32_t Bbits = getPrecisionInBits ((PrecisionType)PB);
0 commit comments