@@ -20492,8 +20492,8 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
2049220492#undef MMA_VARIANTS_B1_XOR
2049320493}
2049420494
20495- static Value *MakeLdgLdu (unsigned IntrinsicID, CodeGenFunction &CGF,
20496- const CallExpr *E) {
20495+ static Value *MakeLdu (unsigned IntrinsicID, CodeGenFunction &CGF,
20496+ const CallExpr *E) {
2049720497 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
2049820498 QualType ArgType = E->getArg(0)->getType();
2049920499 clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
@@ -20503,6 +20503,21 @@ static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
2050320503 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
2050420504}
2050520505
20506+ static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
20507+ Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20508+ QualType ArgType = E->getArg(0)->getType();
20509+ clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
20510+ llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
20511+
20512+ // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
20513+ auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
20514+ auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
20515+ MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
20516+ LD->setMetadata(LLVMContext::MD_invariant_load, MD);
20517+
20518+ return LD;
20519+ }
20520+
2050620521static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
2050720522 const CallExpr *E) {
2050820523 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
@@ -20536,9 +20551,11 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
2053620551 return nullptr;
2053720552 }
2053820553
20539- if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
20540- IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20541- return MakeLdgLdu(IntrinsicID, CGF, E);
20554+ if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
20555+ return MakeLdg(CGF, E);
20556+
20557+ if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20558+ return MakeLdu(IntrinsicID, CGF, E);
2054220559
2054320560 SmallVector<Value *, 16> Args;
2054420561 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
@@ -20675,16 +20692,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
2067520692 case NVPTX::BI__nvvm_ldg_ul2:
2067620693 case NVPTX::BI__nvvm_ldg_ull:
2067720694 case NVPTX::BI__nvvm_ldg_ull2:
20678- // PTX Interoperability section 2.2: "For a vector with an even number of
20679- // elements, its alignment is set to number of elements times the alignment
20680- // of its member: n*alignof(t)."
20681- return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
2068220695 case NVPTX::BI__nvvm_ldg_f:
2068320696 case NVPTX::BI__nvvm_ldg_f2:
2068420697 case NVPTX::BI__nvvm_ldg_f4:
2068520698 case NVPTX::BI__nvvm_ldg_d:
2068620699 case NVPTX::BI__nvvm_ldg_d2:
20687- return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20700+ // PTX Interoperability section 2.2: "For a vector with an even number of
20701+ // elements, its alignment is set to number of elements times the alignment
20702+ // of its member: n*alignof(t)."
20703+ return MakeLdg(*this, E);
2068820704
2068920705 case NVPTX::BI__nvvm_ldu_c:
2069020706 case NVPTX::BI__nvvm_ldu_sc:
@@ -20715,13 +20731,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
2071520731 case NVPTX::BI__nvvm_ldu_ul2:
2071620732 case NVPTX::BI__nvvm_ldu_ull:
2071720733 case NVPTX::BI__nvvm_ldu_ull2:
20718- return MakeLdgLdu (Intrinsic::nvvm_ldu_global_i, *this, E);
20734+ return MakeLdu (Intrinsic::nvvm_ldu_global_i, *this, E);
2071920735 case NVPTX::BI__nvvm_ldu_f:
2072020736 case NVPTX::BI__nvvm_ldu_f2:
2072120737 case NVPTX::BI__nvvm_ldu_f4:
2072220738 case NVPTX::BI__nvvm_ldu_d:
2072320739 case NVPTX::BI__nvvm_ldu_d2:
20724- return MakeLdgLdu (Intrinsic::nvvm_ldu_global_f, *this, E);
20740+ return MakeLdu (Intrinsic::nvvm_ldu_global_f, *this, E);
2072520741
2072620742 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
2072720743 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
@@ -21195,14 +21211,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
2119521211 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
2119621212 *this);
2119721213 case NVPTX::BI__nvvm_ldg_h:
21198- return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
2119921214 case NVPTX::BI__nvvm_ldg_h2:
21200- return MakeHalfType(Intrinsic::nvvm_ldg_global_f , BuiltinID, E, *this);
21215+ return MakeHalfType(Intrinsic::not_intrinsic , BuiltinID, E, *this);
2120121216 case NVPTX::BI__nvvm_ldu_h:
21217+ case NVPTX::BI__nvvm_ldu_h2:
2120221218 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21203- case NVPTX::BI__nvvm_ldu_h2: {
21204- return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21205- }
2120621219 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
2120721220 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
2120821221 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
0 commit comments