@@ -56,20 +56,8 @@ class GCNRewritePartialRegUsesImpl {
5656 // / size. Return true if the change has been made.
5757 bool rewriteReg (Register Reg) const ;
5858
59- // / Value type for SubRegMap below.
60- struct SubRegInfo {
61- // / Register class required to hold the value stored in the SubReg.
62- const TargetRegisterClass *RC;
63-
64- // / Index for the right-shifted subregister. If 0 this is the "covering"
65- // / subreg i.e. subreg that covers all others. Covering subreg becomes the
66- // / whole register after the replacement.
67- unsigned SubReg = AMDGPU::NoSubRegister;
68- SubRegInfo (const TargetRegisterClass *RC_ = nullptr ) : RC(RC_) {}
69- };
70-
71- // / Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
72- using SubRegMap = SmallDenseMap<unsigned , SubRegInfo>;
59+ // / Map OldSubReg -> NewSubReg. Used as in/out container.
60+ using SubRegMap = SmallDenseMap<unsigned , unsigned >;
7361
7462 // / Given register class RC and the set of used subregs as keys in the SubRegs
7563 // / map return new register class and indexes of right-shifted subregs as
@@ -78,24 +66,22 @@ class GCNRewritePartialRegUsesImpl {
7866 const TargetRegisterClass *getMinSizeReg (const TargetRegisterClass *RC,
7967 SubRegMap &SubRegs) const ;
8068
81- // / Given regclass RC and pairs of [OldSubReg, SubRegRC ] in SubRegs try to
69+ // / Given regclass RC and pairs of [OldSubReg, NewSubReg ] in SubRegs try to
8270 // / find new regclass such that:
8371 // / 1. It has subregs obtained by shifting each OldSubReg by RShift number
8472 // / of bits to the right. Every "shifted" subreg should have the same
8573 // / SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
8674 // / all other subregs in pairs. Basically such subreg becomes a whole
8775 // / register.
88- // / 2. Resulting register class contains registers of minimal size but not
89- // / less than RegNumBits.
76+ // / 2. Resulting register class contains registers of minimal size.
9077 // /
91- // / SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
78+ // / SubRegs is map of OldSubReg -> NewSubReg and is used as in/out
9279 // / parameter:
9380 // / OldSubReg - input parameter,
94- // / SubRegRC - input parameter (cannot be null),
9581 // / NewSubReg - output, contains shifted subregs on return.
9682 const TargetRegisterClass *
9783 getRegClassWithShiftedSubregs (const TargetRegisterClass *RC, unsigned RShift,
98- unsigned RegNumBits, unsigned CoverSubregIdx,
84+ unsigned CoverSubregIdx,
9985 SubRegMap &SubRegs) const ;
10086
10187 // / Update live intervals after rewriting OldReg to NewReg with SubRegs map
@@ -105,9 +91,6 @@ class GCNRewritePartialRegUsesImpl {
10591
10692 // / Helper methods.
10793
108- // / Return reg class expected by a MO's parent instruction for a given MO.
109- const TargetRegisterClass *getOperandRegClass (MachineOperand &MO) const ;
110-
11194 // / Find right-shifted by RShift amount version of the SubReg if it exists,
11295 // / return 0 otherwise.
11396 unsigned shiftSubReg (unsigned SubReg, unsigned RShift) const ;
@@ -221,20 +204,23 @@ GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
221204
222205const TargetRegisterClass *
223206GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs (
224- const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits ,
225- unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
207+ const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx ,
208+ SubRegMap &SubRegs) const {
226209
227210 unsigned RCAlign = TRI->getRegClassAlignmentNumBits (RC);
228211 LLVM_DEBUG (dbgs () << " Shift " << RShift << " , reg align " << RCAlign
229212 << ' \n ' );
230213
231214 BitVector ClassMask (getAllocatableAndAlignedRegClassMask (RCAlign));
232- for (auto &[OldSubReg, SRI] : SubRegs) {
233- auto &[SubRegRC, NewSubReg] = SRI;
234- assert (SubRegRC);
215+ for (auto &[OldSubReg, NewSubReg] : SubRegs) {
216+ LLVM_DEBUG (dbgs () << " " << TRI->getSubRegIndexName (OldSubReg) << ' :' );
235217
236- LLVM_DEBUG (dbgs () << " " << TRI->getSubRegIndexName (OldSubReg) << ' :'
237- << TRI->getRegClassName (SubRegRC)
218+ auto *SubRegRC = TRI->getSubRegisterClass (RC, OldSubReg);
219+ if (!SubRegRC) {
220+ LLVM_DEBUG (dbgs () << " couldn't find target regclass\n " );
221+ return nullptr ;
222+ }
223+ LLVM_DEBUG (dbgs () << TRI->getRegClassName (SubRegRC)
238224 << (SubRegRC->isAllocatable () ? " " : " not alloc" )
239225 << " -> " );
240226
@@ -266,27 +252,23 @@ GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
266252 // ClassMask is the set of all register classes such that each class is
267253 // allocatable, aligned, has all shifted subregs and each subreg has required
268254 // register class (see SubRegRC above). Now select first (that is largest)
269- // register class with registers of minimal but not less than RegNumBits size.
270- // We have to check register size because we may encounter classes of smaller
271- // registers like VReg_1 in some situations.
255+ // register class with registers of minimal size.
272256 const TargetRegisterClass *MinRC = nullptr ;
273257 unsigned MinNumBits = std::numeric_limits<unsigned >::max ();
274258 for (unsigned ClassID : ClassMask.set_bits ()) {
275259 auto *RC = TRI->getRegClass (ClassID);
276260 unsigned NumBits = TRI->getRegSizeInBits (*RC);
277- if (NumBits < MinNumBits && NumBits >= RegNumBits ) {
261+ if (NumBits < MinNumBits) {
278262 MinNumBits = NumBits;
279263 MinRC = RC;
280264 }
281- if (MinNumBits == RegNumBits)
282- break ;
283265 }
284266#ifndef NDEBUG
285267 if (MinRC) {
286268 assert (MinRC->isAllocatable () && TRI->isRegClassAligned (MinRC, RCAlign));
287- for (auto [SubReg, SRI ] : SubRegs)
288- // Check that all registers in MinRC support SRI.SubReg subregister.
289- assert (MinRC == TRI->getSubClassWithSubReg (MinRC, SRI. SubReg ));
269+ for (auto [OldSubReg, NewSubReg ] : SubRegs)
270+ // Check that all registers in MinRC support NewSubReg subregister.
271+ assert (MinRC == TRI->getSubClassWithSubReg (MinRC, NewSubReg ));
290272 }
291273#endif
292274 // There might be zero RShift - in this case we just trying to find smaller
@@ -317,8 +299,7 @@ GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
317299 // If covering subreg is found shift everything so the covering subreg would
318300 // be in the rightmost position.
319301 if (CoverSubreg != AMDGPU::NoSubRegister)
320- return getRegClassWithShiftedSubregs (RC, Offset, End - Offset, CoverSubreg,
321- SubRegs);
302+ return getRegClassWithShiftedSubregs (RC, Offset, CoverSubreg, SubRegs);
322303
323304 // Otherwise find subreg with maximum required alignment and shift it and all
324305 // other subregs to the rightmost possible position with respect to the
@@ -344,7 +325,7 @@ GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
344325 llvm_unreachable (" misaligned subreg" );
345326
346327 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
347- return getRegClassWithShiftedSubregs (RC, RShift, End - RShift, 0 , SubRegs);
328+ return getRegClassWithShiftedSubregs (RC, RShift, 0 , SubRegs);
348329}
349330
350331// Only the subrange's lanemasks of the original interval need to be modified.
@@ -390,7 +371,7 @@ void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
390371 return ;
391372 }
392373
393- if (unsigned NewSubReg = I->second . SubReg )
374+ if (unsigned NewSubReg = I->second )
394375 NewLI.createSubRangeFrom (Allocator,
395376 TRI->getSubRegIndexLaneMask (NewSubReg), SR);
396377 else // This is the covering subreg (0 index) - set it as main range.
@@ -404,53 +385,23 @@ void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
404385 LIS->removeInterval (OldReg);
405386}
406387
407- const TargetRegisterClass *
408- GCNRewritePartialRegUsesImpl::getOperandRegClass (MachineOperand &MO) const {
409- MachineInstr *MI = MO.getParent ();
410- return TII->getRegClass (TII->get (MI->getOpcode ()), MI->getOperandNo (&MO), TRI,
411- *MI->getParent ()->getParent ());
412- }
413-
414388bool GCNRewritePartialRegUsesImpl::rewriteReg (Register Reg) const {
415- auto Range = MRI->reg_nodbg_operands (Reg);
416- if (Range.empty () || any_of (Range, [](MachineOperand &MO) {
417- return MO.getSubReg () == AMDGPU::NoSubRegister; // Whole reg used. [1]
418- }))
389+
390+ // Collect used subregs.
391+ SubRegMap SubRegs;
392+ for (MachineOperand &MO : MRI->reg_nodbg_operands (Reg)) {
393+ if (MO.getSubReg () == AMDGPU::NoSubRegister)
394+ return false ; // Whole reg used.
395+ SubRegs.try_emplace (MO.getSubReg ());
396+ }
397+
398+ if (SubRegs.empty ())
419399 return false ;
420400
421401 auto *RC = MRI->getRegClass (Reg);
422402 LLVM_DEBUG (dbgs () << " Try to rewrite partial reg " << printReg (Reg, TRI)
423403 << ' :' << TRI->getRegClassName (RC) << ' \n ' );
424404
425- // Collect used subregs and their reg classes infered from instruction
426- // operands.
427- SubRegMap SubRegs;
428- for (MachineOperand &MO : Range) {
429- const unsigned SubReg = MO.getSubReg ();
430- assert (SubReg != AMDGPU::NoSubRegister); // Due to [1].
431- LLVM_DEBUG (dbgs () << " " << TRI->getSubRegIndexName (SubReg) << ' :' );
432-
433- const auto [I, Inserted] = SubRegs.try_emplace (SubReg);
434- const TargetRegisterClass *&SubRegRC = I->second .RC ;
435-
436- if (Inserted)
437- SubRegRC = TRI->getSubRegisterClass (RC, SubReg);
438-
439- if (SubRegRC) {
440- if (const TargetRegisterClass *OpDescRC = getOperandRegClass (MO)) {
441- LLVM_DEBUG (dbgs () << TRI->getRegClassName (SubRegRC) << " & "
442- << TRI->getRegClassName (OpDescRC) << " = " );
443- SubRegRC = TRI->getCommonSubClass (SubRegRC, OpDescRC);
444- }
445- }
446-
447- if (!SubRegRC) {
448- LLVM_DEBUG (dbgs () << " couldn't find target regclass\n " );
449- return false ;
450- }
451- LLVM_DEBUG (dbgs () << TRI->getRegClassName (SubRegRC) << ' \n ' );
452- }
453-
454405 auto *NewRC = getMinSizeReg (RC, SubRegs);
455406 if (!NewRC) {
456407 LLVM_DEBUG (dbgs () << " No improvement achieved\n " );
@@ -469,9 +420,9 @@ bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
469420 // TODO: create some DI shift expression?
470421 if (MO.isDebug () && MO.getSubReg () == 0 )
471422 continue ;
472- unsigned SubReg = SubRegs[MO.getSubReg ()]. SubReg ;
473- MO.setSubReg (SubReg );
474- if (SubReg == AMDGPU::NoSubRegister && MO.isDef ())
423+ unsigned NewSubReg = SubRegs[MO.getSubReg ()];
424+ MO.setSubReg (NewSubReg );
425+ if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef ())
475426 MO.setIsUndef (false );
476427 }
477428
0 commit comments