@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
117117 return LLT::scalar (32 );
118118}
119119
120- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
121- const RegisterBankInfo &RBI);
120+ using ReadLaneFnTy =
121+ function_ref<MachineInstrBuilder(MachineIRBuilder &, Register, Register)>;
122+
123+ static Register buildReadLane (MachineIRBuilder &, Register,
124+ const RegisterBankInfo &, ReadLaneFnTy);
122125
123126static void unmergeReadAnyLane (MachineIRBuilder &B,
124127 SmallVectorImpl<Register> &SgprDstParts,
125128 LLT UnmergeTy, Register VgprSrc,
126- const RegisterBankInfo &RBI) {
129+ const RegisterBankInfo &RBI,
130+ ReadLaneFnTy BuildRL) {
127131 const RegisterBank *VgprRB = &RBI.getRegBank (AMDGPU::VGPRRegBankID);
128132 auto Unmerge = B.buildUnmerge ({VgprRB, UnmergeTy}, VgprSrc);
129133 for (unsigned i = 0 ; i < Unmerge->getNumOperands () - 1 ; ++i) {
130- SgprDstParts.push_back (buildReadAnyLane (B, Unmerge.getReg (i), RBI));
134+ SgprDstParts.push_back (buildReadLane (B, Unmerge.getReg (i), RBI, BuildRL ));
131135 }
132136}
133137
134- static Register buildReadAnyLane (MachineIRBuilder &B, Register VgprSrc,
135- const RegisterBankInfo &RBI) {
138+ static Register buildReadLane (MachineIRBuilder &B, Register VgprSrc,
139+ const RegisterBankInfo &RBI,
140+ ReadLaneFnTy BuildRL) {
136141 LLT Ty = B.getMRI ()->getType (VgprSrc);
137142 const RegisterBank *SgprRB = &RBI.getRegBank (AMDGPU::SGPRRegBankID);
138143 if (Ty.getSizeInBits () == 32 ) {
139- return B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {{ SgprRB, Ty}}, {VgprSrc})
140- .getReg (0 );
144+ Register SgprDst = B. getMRI ()-> createVirtualRegister ({ SgprRB, Ty});
145+ return BuildRL (B, SgprDst, VgprSrc) .getReg (0 );
141146 }
142147
143148 SmallVector<Register, 8 > SgprDstParts;
144- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
149+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
150+ BuildRL);
145151
146152 return B.buildMergeLikeInstr ({SgprRB, Ty}, SgprDstParts).getReg (0 );
147153}
148154
149- void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
150- Register VgprSrc, const RegisterBankInfo &RBI) {
155+ static void buildReadLane (MachineIRBuilder &B, Register SgprDst,
156+ Register VgprSrc, const RegisterBankInfo &RBI,
157+ ReadLaneFnTy BuildReadLane) {
151158 LLT Ty = B.getMRI ()->getType (VgprSrc);
152159 if (Ty.getSizeInBits () == 32 ) {
153- B. buildInstr (AMDGPU::G_AMDGPU_READANYLANE, { SgprDst}, { VgprSrc} );
160+ BuildReadLane (B, SgprDst, VgprSrc);
154161 return ;
155162 }
156163
157164 SmallVector<Register, 8 > SgprDstParts;
158- unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI);
165+ unmergeReadAnyLane (B, SgprDstParts, getReadAnyLaneSplitTy (Ty), VgprSrc, RBI,
166+ BuildReadLane);
159167
160168 B.buildMergeLikeInstr (SgprDst, SgprDstParts).getReg (0 );
161169}
170+
171+ void AMDGPU::buildReadAnyLane (MachineIRBuilder &B, Register SgprDst,
172+ Register VgprSrc, const RegisterBankInfo &RBI) {
173+ return buildReadLane (
174+ B, SgprDst, VgprSrc, RBI,
175+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176+ return B.buildInstr (AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
177+ });
178+ }
179+
180+ void AMDGPU::buildReadFirstLane (MachineIRBuilder &B, Register SgprDst,
181+ Register VgprSrc, const RegisterBankInfo &RBI) {
182+ return buildReadLane (
183+ B, SgprDst, VgprSrc, RBI,
184+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185+ return B.buildIntrinsic (Intrinsic::amdgcn_readfirstlane, SgprDst)
186+ .addReg (VgprSrc);
187+ });
188+ }
0 commit comments