@@ -5676,6 +5676,114 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
56765676 AL.getNumArgs () > 2 ? AL.getArgAsExpr (2 ) : nullptr );
56775677}
56785678
5679+ static std::pair<Expr *, int >
5680+ makeClusterDimsArgExpr (Sema &S, Expr *E, const CUDAClusterDimsAttr &AL,
5681+ const unsigned Idx) {
5682+ if (!E || S.DiagnoseUnexpandedParameterPack (E))
5683+ return {};
5684+
5685+ // Accept template arguments for now as they depend on something else.
5686+ // We'll get to check them when they eventually get instantiated.
5687+ if (E->isInstantiationDependent ())
5688+ return {E, 1 };
5689+
5690+ std::optional<llvm::APSInt> I = E->getIntegerConstantExpr (S.Context );
5691+ if (!I) {
5692+ S.Diag (E->getExprLoc (), diag::err_attribute_argument_n_type)
5693+ << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange ();
5694+ return {};
5695+ }
5696+ // Make sure we can fit it in 4 bits.
5697+ if (!I->isIntN (4 )) {
5698+ S.Diag (E->getExprLoc (), diag::err_ice_too_large)
5699+ << toString (*I, 10 , false ) << 4 << /* Unsigned=*/ 1 ;
5700+ return {};
5701+ }
5702+ if (*I < 0 ) {
5703+ S.Diag (E->getExprLoc (), diag::warn_attribute_argument_n_negative)
5704+ << &AL << Idx << E->getSourceRange ();
5705+ }
5706+
5707+ return {ConstantExpr::Create (S.getASTContext (), E, APValue (*I)),
5708+ I->getZExtValue ()};
5709+ }
5710+
5711+ CUDAClusterDimsAttr *Sema::createClusterDimsAttr (const AttributeCommonInfo &CI,
5712+ Expr *X, Expr *Y, Expr *Z) {
5713+ CUDAClusterDimsAttr TmpAttr (Context, CI, X, Y, Z);
5714+
5715+ auto [NewX, ValX] = makeClusterDimsArgExpr (*this , X, TmpAttr, /* Idx=*/ 0 );
5716+ auto [NewY, ValY] = makeClusterDimsArgExpr (*this , Y, TmpAttr, /* Idx=*/ 1 );
5717+ auto [NewZ, ValZ] = makeClusterDimsArgExpr (*this , Z, TmpAttr, /* Idx=*/ 2 );
5718+
5719+ if (!NewX || (Y && !NewY) || (Z && !NewZ))
5720+ return nullptr ;
5721+
5722+ int FlatDim = ValX * ValY * ValZ;
5723+ const llvm::Triple TT =
5724+ (!Context.getLangOpts ().CUDAIsDevice && Context.getAuxTargetInfo ())
5725+ ? Context.getAuxTargetInfo ()->getTriple ()
5726+ : Context.getTargetInfo ().getTriple ();
5727+ int MaxDim = 1 ;
5728+ if (TT.isNVPTX ())
5729+ MaxDim = 8 ;
5730+ else if (TT.isAMDGPU ())
5731+ MaxDim = 16 ;
5732+ else
5733+ return nullptr ;
5734+
5735+ // A maximum of 8 thread blocks in a cluster is supported as a portable
5736+ // cluster size in CUDA. The number is 16 for AMDGPU.
5737+ if (FlatDim > MaxDim) {
5738+ Diag (CI.getLoc (), diag::err_cluster_dims_too_large) << MaxDim << FlatDim;
5739+ return nullptr ;
5740+ }
5741+
5742+ return CUDAClusterDimsAttr::Create (Context, NewX, NewY, NewZ, CI);
5743+ }
5744+
5745+ void Sema::addClusterDimsAttr (Decl *D, const AttributeCommonInfo &CI, Expr *X,
5746+ Expr *Y, Expr *Z) {
5747+ if (auto *Attr = createClusterDimsAttr (CI, X, Y, Z))
5748+ D->addAttr (Attr);
5749+ }
5750+
5751+ void Sema::addNoClusterAttr (Decl *D, const AttributeCommonInfo &CI) {
5752+ D->addAttr (CUDANoClusterAttr::Create (Context, CI));
5753+ }
5754+
5755+ static void handleClusterDimsAttr (Sema &S, Decl *D, const ParsedAttr &AL) {
5756+ const TargetInfo &TTI = S.Context .getTargetInfo ();
5757+ OffloadArch Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5758+ if ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5759+ (TTI.getTriple ().isAMDGPU () &&
5760+ !TTI.hasFeatureEnabled (TTI.getTargetOpts ().FeatureMap , " clusters" ))) {
5761+ S.Diag (AL.getLoc (), diag::err_cluster_attr_not_supported) << AL;
5762+ return ;
5763+ }
5764+
5765+ if (!AL.checkAtLeastNumArgs (S, /* Num=*/ 1 ) ||
5766+ !AL.checkAtMostNumArgs (S, /* Num=*/ 3 ))
5767+ return ;
5768+
5769+ S.addClusterDimsAttr (D, AL, AL.getArgAsExpr (0 ),
5770+ AL.getNumArgs () > 1 ? AL.getArgAsExpr (1 ) : nullptr ,
5771+ AL.getNumArgs () > 2 ? AL.getArgAsExpr (2 ) : nullptr );
5772+ }
5773+
5774+ static void handleNoClusterAttr (Sema &S, Decl *D, const ParsedAttr &AL) {
5775+ const TargetInfo &TTI = S.Context .getTargetInfo ();
5776+ OffloadArch Arch = StringToOffloadArch (TTI.getTargetOpts ().CPU );
5777+ if ((TTI.getTriple ().isNVPTX () && Arch < clang::OffloadArch::SM_90) ||
5778+ (TTI.getTriple ().isAMDGPU () &&
5779+ !TTI.hasFeatureEnabled (TTI.getTargetOpts ().FeatureMap , " clusters" ))) {
5780+ S.Diag (AL.getLoc (), diag::err_cluster_attr_not_supported) << AL;
5781+ return ;
5782+ }
5783+
5784+ S.addNoClusterAttr (D, AL);
5785+ }
5786+
56795787static void handleArgumentWithTypeTagAttr (Sema &S, Decl *D,
56805788 const ParsedAttr &AL) {
56815789 if (!AL.isArgIdent (0 )) {
@@ -7141,6 +7249,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
71417249 case ParsedAttr::AT_CUDALaunchBounds:
71427250 handleLaunchBoundsAttr (S, D, AL);
71437251 break ;
7252+ case ParsedAttr::AT_CUDAClusterDims:
7253+ handleClusterDimsAttr (S, D, AL);
7254+ break ;
7255+ case ParsedAttr::AT_CUDANoCluster:
7256+ handleNoClusterAttr (S, D, AL);
7257+ break ;
71447258 case ParsedAttr::AT_Restrict:
71457259 handleRestrictAttr (S, D, AL);
71467260 break ;
0 commit comments