@@ -5654,6 +5654,130 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
56545654 AL.getNumArgs () > 2 ? AL.getArgAsExpr (2 ) : nullptr );
56555655}
56565656
5657+ static std::pair<Expr *, int >
5658+ makeClusterDimsArgExpr (Sema &S, Expr *E, const CUDAClusterDimsAttr &AL,
5659+ const unsigned Idx) {
5660+ if (S.DiagnoseUnexpandedParameterPack (E))
5661+ return {nullptr , 0 };
5662+
5663+ // Accept template arguments for now as they depend on something else.
5664+ // We'll get to check them when they eventually get instantiated.
5665+ if (E->isValueDependent ())
5666+ return {E, 1 };
5667+
5668+ std::optional<llvm::APSInt> I = llvm::APSInt (64 );
5669+ if (!(I = E->getIntegerConstantExpr (S.Context ))) {
5670+ S.Diag (E->getExprLoc (), diag::err_attribute_argument_n_type)
5671+ << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange ();
5672+ return {nullptr , 0 };
5673+ }
5674+ // Make sure we can fit it in 4 bits.
5675+ if (!I->isIntN (4 )) {
5676+ S.Diag (E->getExprLoc (), diag::err_ice_too_large)
5677+ << toString (*I, 10 , false ) << 4 << /* Unsigned */ 1 ;
5678+ return {nullptr , 0 };
5679+ }
5680+ if (*I < 0 )
5681+ S.Diag (E->getExprLoc (), diag::warn_attribute_argument_n_negative)
5682+ << &AL << Idx << E->getSourceRange ();
5683+
5684+ // We may need to perform implicit conversion of the argument.
5685+ InitializedEntity Entity = InitializedEntity::InitializeParameter (
5686+ S.Context , S.Context .getConstType (S.Context .IntTy ), /* consume*/ false );
5687+ ExprResult ValArg = S.PerformCopyInitialization (Entity, SourceLocation (), E);
5688+ assert (!ValArg.isInvalid () &&
5689+ " Unexpected PerformCopyInitialization() failure." );
5690+
5691+ return {ValArg.getAs <Expr>(), I->getZExtValue ()};
5692+ }
5693+
5694+ CUDAClusterDimsAttr *Sema::createClusterDimsAttr (const AttributeCommonInfo &CI,
5695+ Expr *X, Expr *Y, Expr *Z) {
5696+ CUDAClusterDimsAttr TmpAttr (Context, CI, X, Y, Z);
5697+
5698+ int ValX = 1 ;
5699+ int ValY = 1 ;
5700+ int ValZ = 1 ;
5701+
5702+ std::tie (X, ValX) = makeClusterDimsArgExpr (*this , X, TmpAttr, /* Idx=*/ 0 );
5703+ if (!X)
5704+ return nullptr ;
5705+
5706+ if (Y) {
5707+ std::tie (Y, ValY) = makeClusterDimsArgExpr (*this , Y, TmpAttr, /* Idx=*/ 1 );
5708+ if (!Y)
5709+ return nullptr ;
5710+ }
5711+
5712+ if (Z) {
5713+ std::tie (Z, ValZ) = makeClusterDimsArgExpr (*this , Z, TmpAttr, /* Idx=*/ 2 );
5714+ if (!Z)
5715+ return nullptr ;
5716+ }
5717+
5718+ int FlatDim = ValX * ValY * ValZ;
5719+ auto TT = (!Context.getLangOpts ().CUDAIsDevice && Context.getAuxTargetInfo ())
5720+ ? Context.getAuxTargetInfo ()->getTriple ()
5721+ : Context.getTargetInfo ().getTriple ();
5722+ int MaxDim = 1 ;
5723+ if (TT.isNVPTX ())
5724+ MaxDim = 8 ;
5725+ else if (TT.isAMDGPU ())
5726+ MaxDim = 16 ;
5727+ else
5728+ return nullptr ;
5729+
5730+ // A maximum of 8 thread blocks in a cluster is supported as a portable
5731+ // cluster size in CUDA. The number is 16 for AMDGPU.
5732+ if (FlatDim > MaxDim) {
5733+ Diag (CI.getLoc (), diag::err_cuda_cluster_dims_too_large) << MaxDim;
5734+ return nullptr ;
5735+ }
5736+
5737+ return ::new (Context) CUDAClusterDimsAttr (Context, CI, X, Y, Z);
5738+ }
5739+
5740+ void Sema::addClusterDimsAttr (Decl *D, const AttributeCommonInfo &CI, Expr *X,
5741+ Expr *Y, Expr *Z) {
5742+ if (auto *Attr = createClusterDimsAttr (CI, X, Y, Z))
5743+ D->addAttr (Attr);
5744+ }
5745+
5746+ void Sema::addNoClusterAttr (Decl *D, const AttributeCommonInfo &CI) {
5747+ if (CUDANoClusterAttr *Attr = ::new (Context) CUDANoClusterAttr (Context, CI))
5748+ D->addAttr (Attr);
5749+ }
5750+
5751+ static void handleClusterDimsAttr (Sema &S, Decl *D, const ParsedAttr &AL) {
5752+ auto &TTI = S.Context .getTargetInfo ();
5753+ auto Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5754+ if ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5755+ (TTI.getTriple ().isAMDGPU () && Arch < clang::OffloadArch::GFX1250)) {
5756+ S.Diag (AL.getLoc (), diag::err_cuda_cluster_attr_not_supported) << 0 ;
5757+ return ;
5758+ }
5759+
5760+ if (!AL.checkAtLeastNumArgs (S, /* Num=*/ 1 ) ||
5761+ !AL.checkAtMostNumArgs (S, /* Num=*/ 3 ))
5762+ return ;
5763+
5764+ S.addClusterDimsAttr (D, AL, AL.getArgAsExpr (0 ),
5765+ AL.getNumArgs () > 1 ? AL.getArgAsExpr (1 ) : nullptr ,
5766+ AL.getNumArgs () > 2 ? AL.getArgAsExpr (2 ) : nullptr );
5767+ }
5768+
5769+ static void handleNoClusterAttr (Sema &S, Decl *D, const ParsedAttr &AL) {
5770+ auto &TTI = S.Context .getTargetInfo ();
5771+ auto Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5772+ if ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5773+ (TTI.getTriple ().isAMDGPU () && Arch < clang::OffloadArch::GFX1250)) {
5774+ S.Diag (AL.getLoc (), diag::err_cuda_cluster_attr_not_supported) << 1 ;
5775+ return ;
5776+ }
5777+
5778+ S.addNoClusterAttr (D, AL);
5779+ }
5780+
56575781static void handleArgumentWithTypeTagAttr (Sema &S, Decl *D,
56585782 const ParsedAttr &AL) {
56595783 if (!AL.isArgIdent (0 )) {
@@ -7105,6 +7229,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
71057229 case ParsedAttr::AT_CUDALaunchBounds:
71067230 handleLaunchBoundsAttr (S, D, AL);
71077231 break ;
7232+ case ParsedAttr::AT_CUDAClusterDims:
7233+ handleClusterDimsAttr (S, D, AL);
7234+ break ;
7235+ case ParsedAttr::AT_CUDANoCluster:
7236+ handleNoClusterAttr (S, D, AL);
7237+ break ;
71087238 case ParsedAttr::AT_Restrict:
71097239 handleRestrictAttr (S, D, AL);
71107240 break ;
0 commit comments