@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
117117 return LLT::scalar (32 );
118118}
119119
120- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
121- const RegisterBankInfo &RBI);
122-
123- static void unmergeReadAnyLane (MachineIRBuilder &B,
124- SmallVectorImpl<Register> &SgprDstParts,
125- LLT UnmergeTy, Register VgprSrc,
126- const RegisterBankInfo &RBI) {
120+ template <typename ReadLaneFnTy>
121+ static Register buildReadLane (MachineIRBuilder &, Register,
122+ const RegisterBankInfo &, ReadLaneFnTy);
123+
124+ template <typename ReadLaneFnTy>
125+ static void
126+ unmergeReadAnyLane (MachineIRBuilder &B, SmallVectorImpl<Register> &SgprDstParts,
127+ LLT UnmergeTy, Register VgprSrc, const RegisterBankInfo &RBI,
128+ ReadLaneFnTy BuildRL) {
127129 const RegisterBank *VgprRB = &RBI.getRegBank (AMDGPU::VGPRRegBankID);
128130 auto Unmerge = B.buildUnmerge ({VgprRB, UnmergeTy}, VgprSrc);
129131 for (unsigned i = 0 ; i < Unmerge->getNumOperands () - 1 ; ++i) {
130- SgprDstParts.push_back (buildReadAnyLane (B, Unmerge.getReg (i), RBI));
132+ SgprDstParts.push_back (buildReadLane (B, Unmerge.getReg (i), RBI, BuildRL ));
131133 }
132134}
133135
134- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
135- const RegisterBankInfo &RBI) {
136+ template <typename ReadLaneFnTy>
137+ static Register buildReadLane (MachineIRBuilder &B, Register VgprSrc,
138+ const RegisterBankInfo &RBI,
139+ ReadLaneFnTy BuildRL) {
136140 LLT Ty = B.getMRI ()->getType (VgprSrc);
137141 const RegisterBank *SgprRB = &RBI.getRegBank (AMDGPU::SGPRRegBankID);
138142 if (Ty.getSizeInBits () == 32 ) {
139- return B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {{ SgprRB, Ty}}, {VgprSrc})
140- .getReg (0 );
143+ Register SgprDst = B. getMRI ()-> createVirtualRegister ({ SgprRB, Ty});
144+ return BuildRL (B, SgprDst, VgprSrc) .getReg (0 );
141145 }
142146
143147 SmallVector<Register, 8 > SgprDstParts;
144- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
148+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
149+ BuildRL);
145150
146151 return B.buildMergeLikeInstr ({SgprRB, Ty}, SgprDstParts).getReg (0 );
147152}
148153
149- void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
150- Register VgprSrc, const RegisterBankInfo &RBI) {
154+ template <typename ReadLaneFnTy>
155+ static void buildReadLane (MachineIRBuilder &B, Register SgprDst,
156+ Register VgprSrc, const RegisterBankInfo &RBI,
157+ ReadLaneFnTy BuildReadLane) {
151158 LLT Ty = B.getMRI ()->getType (VgprSrc);
152159 if (Ty.getSizeInBits () == 32 ) {
153- B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, { SgprDst}, { VgprSrc} );
160+ BuildReadLane (B, SgprDst, VgprSrc);
154161 return ;
155162 }
156163
157164 SmallVector<Register, 8 > SgprDstParts;
158- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
165+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
166+ BuildReadLane);
159167
160168 B.buildMergeLikeInstr (SgprDst, SgprDstParts).getReg (0 );
161169}
170+
171+ void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
172+ Register VgprSrc, const RegisterBankInfo &RBI) {
173+ return buildReadLane (
174+ B, SgprDst, VgprSrc, RBI,
175+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176+ return B.buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
177+ });
178+ }
179+
180+ void AMDGPU::buildReadFirstLane (MachineIRBuilder &B, Register SgprDst,
181+ Register VgprSrc, const RegisterBankInfo &RBI) {
182+ return buildReadLane (
183+ B, SgprDst, VgprSrc, RBI,
184+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185+ return B.buildIntrinsic (Intrinsic::amdgcn_readfirstlane, SgprDst)
186+ .addReg (VgprSrc);
187+ });
188+ }
0 commit comments