@@ -107,6 +107,8 @@ class DpasFuncsResolution : public FunctionPass, public InstVisitor<DpasFuncsRes
107107 // /
108108 static const StringRef SG_PREFIX_IDPAS16;
109109 static const StringRef SG_PREFIX_FDPAS16;
110+ static const StringRef SG_PREFIX_IDPAS32N16;
111+ static const StringRef SG_PREFIX_FDPAS32N16;
110112 // PVC+: pure hf/bf dpas builtins
111113 static const StringRef WI_PREFIX_HFDPAS;
112114 static const StringRef WI_PREFIX_BFDPAS;
@@ -200,6 +202,8 @@ const StringRef DpasFuncsResolution::WI_PREFIX_IDPAS = "__builtin_IB_idpas";
200202const StringRef DpasFuncsResolution::WI_PREFIX_FDPAS = " __builtin_IB_fdpas" ;
201203const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS16 = " __builtin_IB_sub_group16_idpas" ;
202204const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS16 = " __builtin_IB_sub_group16_fdpas" ;
205+ const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS32N16 = " __builtin_IB_sub_group32n16_idpas" ;
206+ const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS32N16 = " __builtin_IB_sub_group32n16_fdpas" ;
203207// PVC+: pure hf/bf dpas builtins
204208const StringRef DpasFuncsResolution::WI_PREFIX_HFDPAS = " __builtin_IB_hfdpas" ;
205209const StringRef DpasFuncsResolution::WI_PREFIX_BFDPAS = " __builtin_IB_bfdpas" ;
@@ -263,6 +267,11 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
263267
264268 bool IsDpasw = false ;
265269 bool IsIDpas = false ;
270+ // Dimension N is platform specific and is directly correlated to minimum subgroup-size for
271+ // given platform. If DPAS with the same M, N, K dimensions is executed within a subgroup
272+ // twice the size of minimum subgroup-size, each work item must contain half of the data
273+ // compared to the minimum subgroup-size.
274+ bool IsDoubleSubgroup = false ;
266275 int DstTy, AccTy, PA, PB, SD, RC;
267276 GenISAIntrinsic::ID iid = GenISAIntrinsic::no_intrinsic;
268277 bool doVerify = false ;
@@ -277,12 +286,26 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
277286 if (!demangleSuffix (funcName, SG_PREFIX_LEN, false , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
278287 return ;
279288 iid = GenISAIntrinsic::GenISA_sub_group_dpas;
289+ } else if (funcName.startswith (DpasFuncsResolution::SG_PREFIX_IDPAS32N16)) {
290+ const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_IDPAS32N16.size ();
291+ IsIDpas = true ;
292+ IsDoubleSubgroup = true ;
293+ if (!demangleSuffix (funcName, SG_PREFIX_LEN, false , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
294+ return ;
295+ iid = GenISAIntrinsic::GenISA_sub_group_dpas;
280296 } else if (funcName.startswith (DpasFuncsResolution::SG_PREFIX_FDPAS16)) {
281297 const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS16.size ();
282298 IsIDpas = false ;
283299 if (!demangleSuffix (funcName, SG_PREFIX_LEN, true , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
284300 return ;
285301 iid = GenISAIntrinsic::GenISA_sub_group_dpas;
302+ } else if (funcName.startswith (DpasFuncsResolution::SG_PREFIX_FDPAS32N16)) {
303+ const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS32N16.size ();
304+ IsIDpas = false ;
305+ IsDoubleSubgroup = true ;
306+ if (!demangleSuffix (funcName, SG_PREFIX_LEN, true , IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr ))
307+ return ;
308+ iid = GenISAIntrinsic::GenISA_sub_group_dpas;
286309 }
287310 else {
288311 return ;
@@ -363,6 +386,14 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
363386 Type *A_BaseTy = ATy->getScalarType ();
364387 Type *B_BaseTy = BTy->getScalarType ();
365388
389+ if (IsDoubleSubgroup) {
390+ IGC_ASSERT_MESSAGE (RC >= 2 , " ICE: repeat count of DPAS for double subgroup-size must be >= 2!" );
391+ D_nelts *= 2 ;
392+ ACC_nelts *= 2 ;
393+ A_nelts *= 2 ;
394+ B_nelts *= 2 ;
395+ }
396+
366397 if (IsIDpas) {
367398 uint32_t Abits = getPrecisionInBits ((PrecisionType)PA);
368399 uint32_t Bbits = getPrecisionInBits ((PrecisionType)PB);
0 commit comments