@@ -1484,7 +1484,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14841484 Register DstReg = MI.getOperand (0 ).getReg ();
14851485 LLT Ty = MRI.getType (DstReg);
14861486
1487+ const LLT S64 = LLT::scalar (64 );
14871488 const LLT S32 = LLT::scalar (32 );
1489+ const LLT S16 = LLT::scalar (16 );
14881490
14891491 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1 ;
14901492 Register SrcReg = MI.getOperand (FirstOpnd).getReg ();
@@ -1494,6 +1496,18 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14941496 const RegisterBank *DstBank =
14951497 OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
14961498 if (DstBank == &AMDGPU::VGPRRegBank) {
1499+ if (Ty == S16) {
1500+ ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::VGPRRegBank);
1501+ B.setInsertPt (B.getMBB (), MI);
1502+ LegalizerHelper Helper (B.getMF (), ApplyBank, B);
1503+
1504+ Helper.widenScalarDst (MI, S32);
1505+ Helper.widenScalarSrc (MI, S32, 1 , AMDGPU::G_ANYEXT);
1506+ Helper.widenScalarSrc (MI, S32, 2 , AMDGPU::G_ZEXT);
1507+ Helper.widenScalarSrc (MI, S32, 3 , AMDGPU::G_ZEXT);
1508+ return true ;
1509+ }
1510+
14971511 if (Ty == S32)
14981512 return true ;
14991513
@@ -1553,6 +1567,11 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15531567
15541568 ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::SGPRRegBank);
15551569
1570+ if (Ty == S16) {
1571+ OffsetReg = B.buildAnyExtOrTrunc (S32, OffsetReg).getReg (0 );
1572+ WidthReg = B.buildAnyExtOrTrunc (S32, WidthReg).getReg (0 );
1573+ }
1574+
15561575 // Ensure the high bits are clear to insert the offset.
15571576 auto OffsetMask = B.buildConstant (S32, maskTrailingOnes<unsigned >(6 ));
15581577 auto ClampOffset = B.buildAnd (S32, OffsetReg, OffsetMask);
@@ -1567,13 +1586,21 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15671586
15681587 // TODO: It might be worth using a pseudo here to avoid scc clobber and
15691588 // register class constraints.
1570- unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1571- (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1589+ unsigned Opc = ( Ty != S64) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
1590+ : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
15721591
1573- auto MIB = B.buildInstr (Opc, {DstReg}, {SrcReg, MergedInputs});
1592+ Register BFEDst = DstReg;
1593+ if (Ty == S16) {
1594+ BFEDst = MRI.createGenericVirtualRegister (S32);
1595+ MRI.setRegBank (BFEDst, AMDGPU::SGPRRegBank);
1596+ }
1597+ auto MIB = B.buildInstr (Opc, {BFEDst}, {SrcReg, MergedInputs});
15741598 if (!constrainSelectedInstRegOperands (*MIB, *TII, *TRI, *this ))
15751599 llvm_unreachable (" failed to constrain BFE" );
15761600
1601+ if (BFEDst != DstReg)
1602+ B.buildZExtOrTrunc (DstReg, BFEDst);
1603+
15771604 MI.eraseFromParent ();
15781605 return true ;
15791606}
0 commit comments