Skip to content

Commit e80e49f

Browse files
arsenmmahesh-attarde
authored andcommitted
Greedy: Make trySplitAroundHintReg try to match hints with subreg copies (llvm#160294)
This is essentially the same patch as 116ca95; when trying to match a physreg hint, try to find a compatible physreg if there is a subregister copy. This has the slight difference of using getSubReg on the hint instead of getMatchingSuperReg (the other use should also use getSubReg instead, it's faster). At the moment this turns out to have very little effect. The adjacent code needs better handling of subregisters, so continue adding this piecemeal. The X86 test shows a net reduction in real instructions, plus a few new kills.
1 parent 6b82007 commit e80e49f

File tree

2 files changed

+50
-30
lines changed

2 files changed

+50
-30
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,21 +1383,37 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
13831383
// Compute the cost of assigning a non Hint physical register to VirtReg.
13841384
// We define it as the total frequency of broken COPY instructions to/from
13851385
// Hint register, and after split, they can be deleted.
1386-
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1387-
if (!TII->isFullCopyInstr(Instr))
1386+
1387+
// FIXME: This is miscounting the costs with subregisters. In particular, this
1388+
// should support recognizing SplitKit formed copy bundles instead of direct
1389+
// copy instructions, which will appear in the same block.
1390+
for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
1391+
const MachineInstr &Instr = *Opnd.getParent();
1392+
if (!Instr.isCopy() || Opnd.isImplicit())
13881393
continue;
1389-
Register OtherReg = Instr.getOperand(1).getReg();
1390-
if (OtherReg == Reg) {
1391-
OtherReg = Instr.getOperand(0).getReg();
1392-
if (OtherReg == Reg)
1393-
continue;
1394-
// Check if VirtReg interferes with OtherReg after this COPY instruction.
1395-
if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1396-
continue;
1397-
}
1394+
1395+
// Look for the other end of the copy.
1396+
const bool IsDef = Opnd.isDef();
1397+
const MachineOperand &OtherOpnd = Instr.getOperand(IsDef);
1398+
Register OtherReg = OtherOpnd.getReg();
1399+
assert(Reg == Opnd.getReg());
1400+
if (OtherReg == Reg)
1401+
continue;
1402+
1403+
unsigned SubReg = Opnd.getSubReg();
1404+
unsigned OtherSubReg = OtherOpnd.getSubReg();
1405+
if (SubReg && OtherSubReg && SubReg != OtherSubReg)
1406+
continue;
1407+
1408+
// Check if VirtReg interferes with OtherReg after this COPY instruction.
1409+
if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1410+
continue;
1411+
13981412
MCRegister OtherPhysReg =
13991413
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1400-
if (OtherPhysReg == Hint)
1414+
MCRegister ThisHint =
1415+
SubReg ? TRI->getSubReg(Hint, SubReg) : MCRegister(Hint);
1416+
if (OtherPhysReg == ThisHint)
14011417
Cost += MBFI->getBlockFreq(Instr.getParent());
14021418
}
14031419

llvm/test/CodeGen/X86/atomic-bit-test.ll

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -469,52 +469,56 @@ entry:
469469
define i16 @use_in_diff_bb() nounwind {
470470
; X86-LABEL: use_in_diff_bb:
471471
; X86: # %bb.0: # %entry
472-
; X86-NEXT: pushl %esi
473-
; X86-NEXT: movzwl v16, %esi
472+
; X86-NEXT: movzwl v16, %eax
474473
; X86-NEXT: .p2align 4
475474
; X86-NEXT: .LBB17_1: # %atomicrmw.start
476475
; X86-NEXT: # =>This Inner Loop Header: Depth=1
477-
; X86-NEXT: movl %esi, %ecx
476+
; X86-NEXT: movl %eax, %ecx
478477
; X86-NEXT: orl $1, %ecx
479-
; X86-NEXT: movl %esi, %eax
478+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
480479
; X86-NEXT: lock cmpxchgw %cx, v16
481-
; X86-NEXT: movl %eax, %esi
480+
; X86-NEXT: # kill: def $ax killed $ax def $eax
482481
; X86-NEXT: jne .LBB17_1
483482
; X86-NEXT: # %bb.2: # %atomicrmw.end
484-
; X86-NEXT: xorl %eax, %eax
485-
; X86-NEXT: testb %al, %al
483+
; X86-NEXT: xorl %ecx, %ecx
484+
; X86-NEXT: testb %cl, %cl
486485
; X86-NEXT: jne .LBB17_4
487486
; X86-NEXT: # %bb.3:
487+
; X86-NEXT: pushl %esi
488+
; X86-NEXT: movl %eax, %esi
488489
; X86-NEXT: calll foo@PLT
489-
; X86-NEXT: .LBB17_4:
490-
; X86-NEXT: andl $1, %esi
491490
; X86-NEXT: movl %esi, %eax
492491
; X86-NEXT: popl %esi
492+
; X86-NEXT: .LBB17_4:
493+
; X86-NEXT: andl $1, %eax
494+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
493495
; X86-NEXT: retl
494496
;
495497
; X64-LABEL: use_in_diff_bb:
496498
; X64: # %bb.0: # %entry
497-
; X64-NEXT: pushq %rbx
498-
; X64-NEXT: movzwl v16(%rip), %ebx
499+
; X64-NEXT: movzwl v16(%rip), %eax
499500
; X64-NEXT: .p2align 4
500501
; X64-NEXT: .LBB17_1: # %atomicrmw.start
501502
; X64-NEXT: # =>This Inner Loop Header: Depth=1
502-
; X64-NEXT: movl %ebx, %ecx
503+
; X64-NEXT: movl %eax, %ecx
503504
; X64-NEXT: orl $1, %ecx
504-
; X64-NEXT: movl %ebx, %eax
505+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
505506
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
506-
; X64-NEXT: movl %eax, %ebx
507+
; X64-NEXT: # kill: def $ax killed $ax def $eax
507508
; X64-NEXT: jne .LBB17_1
508509
; X64-NEXT: # %bb.2: # %atomicrmw.end
509-
; X64-NEXT: xorl %eax, %eax
510-
; X64-NEXT: testb %al, %al
510+
; X64-NEXT: xorl %ecx, %ecx
511+
; X64-NEXT: testb %cl, %cl
511512
; X64-NEXT: jne .LBB17_4
512513
; X64-NEXT: # %bb.3:
514+
; X64-NEXT: pushq %rbx
515+
; X64-NEXT: movl %eax, %ebx
513516
; X64-NEXT: callq foo@PLT
514-
; X64-NEXT: .LBB17_4:
515-
; X64-NEXT: andl $1, %ebx
516517
; X64-NEXT: movl %ebx, %eax
517518
; X64-NEXT: popq %rbx
519+
; X64-NEXT: .LBB17_4:
520+
; X64-NEXT: andl $1, %eax
521+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
518522
; X64-NEXT: retq
519523
entry:
520524
%0 = atomicrmw or ptr @v16, i16 1 monotonic, align 2

0 commit comments

Comments
 (0)