@@ -5678,6 +5678,130 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
56785678 AL.getNumArgs () > 2 ? AL.getArgAsExpr (2 ) : nullptr );
56795679}
56805680
5681+ static std::pair<Expr *, int >
5682+ makeClusterDimsArgExpr (Sema &S, Expr *E, const CUDAClusterDimsAttr &AL,
5683+ const unsigned Idx) {
5684+ if (S.DiagnoseUnexpandedParameterPack (E))
5685+ return {nullptr , 0 };
5686+
5687+ // Accept template arguments for now as they depend on something else.
5688+ // We'll get to check them when they eventually get instantiated.
5689+ if (E->isValueDependent ())
5690+ return {E, 1 };
5691+
5692+ std::optional<llvm::APSInt> I = llvm::APSInt (64 );
5693+ if (!(I = E->getIntegerConstantExpr (S.Context ))) {
5694+ S.Diag (E->getExprLoc (), diag::err_attribute_argument_n_type)
5695+ << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange ();
5696+ return {nullptr , 0 };
5697+ }
5698+ // Make sure we can fit it in 4 bits.
5699+ if (!I->isIntN (4 )) {
5700+ S.Diag (E->getExprLoc (), diag::err_ice_too_large)
5701+ << toString (*I, 10 , false ) << 4 << /* Unsigned */ 1 ;
5702+ return {nullptr , 0 };
5703+ }
5704+ if (*I < 0 )
5705+ S.Diag (E->getExprLoc (), diag::warn_attribute_argument_n_negative)
5706+ << &AL << Idx << E->getSourceRange ();
5707+
5708+ // We may need to perform implicit conversion of the argument.
5709+ InitializedEntity Entity = InitializedEntity::InitializeParameter (
5710+ S.Context , S.Context .getConstType (S.Context .IntTy ), /* consume*/ false );
5711+ ExprResult ValArg = S.PerformCopyInitialization (Entity, SourceLocation (), E);
5712+ assert (!ValArg.isInvalid () &&
5713+ " Unexpected PerformCopyInitialization() failure." );
5714+
5715+ return {ValArg.getAs <Expr>(), I->getZExtValue ()};
5716+ }
5717+
5718+ CUDAClusterDimsAttr *Sema::createClusterDimsAttr (const AttributeCommonInfo &CI,
5719+ Expr *X, Expr *Y, Expr *Z) {
5720+ CUDAClusterDimsAttr TmpAttr (Context, CI, X, Y, Z);
5721+
5722+ int ValX = 1 ;
5723+ int ValY = 1 ;
5724+ int ValZ = 1 ;
5725+
5726+ std::tie (X, ValX) = makeClusterDimsArgExpr (*this , X, TmpAttr, /* Idx=*/ 0 );
5727+ if (!X)
5728+ return nullptr ;
5729+
5730+ if (Y) {
5731+ std::tie (Y, ValY) = makeClusterDimsArgExpr (*this , Y, TmpAttr, /* Idx=*/ 1 );
5732+ if (!Y)
5733+ return nullptr ;
5734+ }
5735+
5736+ if (Z) {
5737+ std::tie (Z, ValZ) = makeClusterDimsArgExpr (*this , Z, TmpAttr, /* Idx=*/ 2 );
5738+ if (!Z)
5739+ return nullptr ;
5740+ }
5741+
5742+ int FlatDim = ValX * ValY * ValZ;
5743+ auto TT = (!Context.getLangOpts ().CUDAIsDevice && Context.getAuxTargetInfo ())
5744+ ? Context.getAuxTargetInfo ()->getTriple ()
5745+ : Context.getTargetInfo ().getTriple ();
5746+ int MaxDim = 1 ;
5747+ if (TT.isNVPTX ())
5748+ MaxDim = 8 ;
5749+ else if (TT.isAMDGPU ())
5750+ MaxDim = 16 ;
5751+ else
5752+ return nullptr ;
5753+
5754+ // A maximum of 8 thread blocks in a cluster is supported as a portable
5755+ // cluster size in CUDA. The number is 16 for AMDGPU.
5756+ if (FlatDim > MaxDim) {
5757+ Diag (CI.getLoc (), diag::err_cuda_cluster_dims_too_large) << MaxDim;
5758+ return nullptr ;
5759+ }
5760+
5761+ return ::new (Context) CUDAClusterDimsAttr (Context, CI, X, Y, Z);
5762+ }
5763+
5764+ void Sema::addClusterDimsAttr (Decl *D, const AttributeCommonInfo &CI, Expr *X,
5765+ Expr *Y, Expr *Z) {
5766+ if (auto *Attr = createClusterDimsAttr (CI, X, Y, Z))
5767+ D->addAttr (Attr);
5768+ }
5769+
5770+ void Sema::addNoClusterAttr (Decl *D, const AttributeCommonInfo &CI) {
5771+ if (CUDANoClusterAttr *Attr = ::new (Context) CUDANoClusterAttr (Context, CI))
5772+ D->addAttr (Attr);
5773+ }
5774+
5775+ static void handleClusterDimsAttr (Sema &S, Decl *D, const ParsedAttr &AL) {
5776+ auto &TTI = S.Context .getTargetInfo ();
5777+ auto Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5778+ if ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5779+ (TTI.getTriple ().isAMDGPU () && Arch < clang::OffloadArch::GFX1250)) {
5780+ S.Diag (AL.getLoc (), diag::err_cuda_cluster_attr_not_supported) << 0 ;
5781+ return ;
5782+ }
5783+
5784+ if (!AL.checkAtLeastNumArgs (S, /* Num=*/ 1 ) ||
5785+ !AL.checkAtMostNumArgs (S, /* Num=*/ 3 ))
5786+ return ;
5787+
5788+ S.addClusterDimsAttr (D, AL, AL.getArgAsExpr (0 ),
5789+ AL.getNumArgs () > 1 ? AL.getArgAsExpr (1 ) : nullptr ,
5790+ AL.getNumArgs () > 2 ? AL.getArgAsExpr (2 ) : nullptr );
5791+ }
5792+
5793+ static void handleNoClusterAttr (Sema &S, Decl *D, const ParsedAttr &AL) {
5794+ auto &TTI = S.Context .getTargetInfo ();
5795+ auto Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5796+ if ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5797+ (TTI.getTriple ().isAMDGPU () && Arch < clang::OffloadArch::GFX1250)) {
5798+ S.Diag (AL.getLoc (), diag::err_cuda_cluster_attr_not_supported) << 1 ;
5799+ return ;
5800+ }
5801+
5802+ S.addNoClusterAttr (D, AL);
5803+ }
5804+
56815805static void handleArgumentWithTypeTagAttr (Sema &S, Decl *D,
56825806 const ParsedAttr &AL) {
56835807 if (!AL.isArgIdent (0 )) {
@@ -7129,6 +7253,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
71297253 case ParsedAttr::AT_CUDALaunchBounds:
71307254 handleLaunchBoundsAttr (S, D, AL);
71317255 break ;
7256+ case ParsedAttr::AT_CUDAClusterDims:
7257+ handleClusterDimsAttr (S, D, AL);
7258+ break ;
7259+ case ParsedAttr::AT_CUDANoCluster:
7260+ handleNoClusterAttr (S, D, AL);
7261+ break ;
71327262 case ParsedAttr::AT_Restrict:
71337263 handleRestrictAttr (S, D, AL);
71347264 break ;
0 commit comments