-
Notifications
You must be signed in to change notification settings - Fork 15k
RegAllocGreedy: Check if copied lanes are live in trySplitAroundHintReg #160424
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-systemz @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesFor subregister copies, do a subregister live check instead of checking Full diff: https://github.com/llvm/llvm-project/pull/160424.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 8e6cf3e6b51b3..6e0585b2e9e55 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1406,8 +1406,28 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
continue;
// Check if VirtReg interferes with OtherReg after this COPY instruction.
- if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
- continue;
+ if (Opnd.readsReg()) {
+ SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot();
+
+ if (SubReg) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if (IsDef)
+ Mask = ~Mask;
+
+ if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) {
+ if ((S.LaneMask & Mask).any()) {
+ if (S.liveAt(Index))
+ return true;
+ }
+ return false;
+ })) {
+ continue;
+ }
+ } else {
+ if (VirtReg.liveAt(Index))
+ continue;
+ }
+ }
MCRegister OtherPhysReg =
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index d3d641357ae58..eb7c1b632dba9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -235,7 +235,7 @@ define half @f12_half(half %dummy, half %val, ptr %dest) {
; CHECK-NEXT: blah %f0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB11_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
@@ -344,7 +344,7 @@ define half @f15_half(half %val, half %dummy, ptr %dest) {
; CHECK-NEXT: blah %f2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB15_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index ec1b8a3c8d6d9..f998128af95f8 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -335,84 +335,83 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: andl $-16, %esp
; X86-SLOW-NEXT: subl $32, %esp
-; X86-SLOW-NEXT: movl 24(%ebp), %esi
+; X86-SLOW-NEXT: movl 24(%ebp), %edi
; X86-SLOW-NEXT: movl 28(%ebp), %eax
; X86-SLOW-NEXT: movl 48(%ebp), %edx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: testb $64, %cl
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %ebx
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, %edx
-; X86-SLOW-NEXT: movl 32(%ebp), %esi
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edi, %edx
+; X86-SLOW-NEXT: movl 32(%ebp), %edi
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, %ebx
; X86-SLOW-NEXT: movl 36(%ebp), %eax
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl 40(%ebp), %ecx
; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl 44(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ebx
-; X86-SLOW-NEXT: testb $32, %bl
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: notb %bl
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: orl %esi, %edi
+; X86-SLOW-NEXT: orl %eax, %edi
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %eax, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: movl %ebx, %eax
+; X86-SLOW-NEXT: shrl %ebx
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %esi
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %esi
; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %ebx
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 8(%ebp), %eax
-; X86-SLOW-NEXT: movl %ebx, 12(%eax)
-; X86-SLOW-NEXT: movl %esi, 8(%eax)
-; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %edi, (%eax)
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %ebx, 8(%eax)
+; X86-SLOW-NEXT: movl %edi, 4(%eax)
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 544ab7fc77374..c307833e488c9 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -322,79 +322,79 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: subl $16, %esp
; X86-SLOW-NEXT: movl 24(%ebp), %edx
; X86-SLOW-NEXT: movl 28(%ebp), %esi
-; X86-SLOW-NEXT: movl 48(%ebp), %ebx
+; X86-SLOW-NEXT: movl 48(%ebp), %edi
; X86-SLOW-NEXT: movl 56(%ebp), %eax
; X86-SLOW-NEXT: testb $64, %al
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %eax
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
; X86-SLOW-NEXT: movl 32(%ebp), %edx
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 36(%ebp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl 40(%ebp), %eax
-; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl 44(%ebp), %eax
+; X86-SLOW-NEXT: movl 40(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: movl 56(%ebp), %ebx
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %ebx
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, %edi
+; X86-SLOW-NEXT: movl (%esp), %ebx # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: notb %al
-; X86-SLOW-NEXT: movl %ebx, %edi
-; X86-SLOW-NEXT: addl %ebx, %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl %edx, %ebx
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: orl %edi, %edx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: addl %edi, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl %ebx, %edi
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-SLOW-NEXT: leal (%edi,%edi), %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SLOW-NEXT: leal (%eax,%eax), %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: addl %esi, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl 8(%ebp), %ecx
-; X86-SLOW-NEXT: movl %esi, 12(%ecx)
-; X86-SLOW-NEXT: movl %ebx, 8(%ecx)
-; X86-SLOW-NEXT: movl %edx, 4(%ecx)
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-SLOW-NEXT: movl %eax, (%ecx)
-; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl 8(%ebp), %eax
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %ebx, 4(%eax)
+; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
|
205e4cd to
5f93f5c
Compare
| ; CHECK-NEXT: #NO_APP | ||
| ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT | ||
| ; CHECK-NEXT: ltebr %f0, %f0 | ||
| ; CHECK-NEXT: ltebr %f1, %f0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like a regression?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems neutral?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just noise perhaps, but it seems odd to use %f1 when %f0 is "available".
From a distance it looks like it could be a side-effect of the code now considering the liveness of defs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one nit that I think helps readability
For subregister copies, do a subregister live check instead of checking the main range. Doesn't do much yet, the split analysis still does not track live ranges.
5f93f5c to
661b22d
Compare
…eg (llvm#160424) For subregister copies, do a subregister live check instead of checking the main range. Doesn't do much yet, the split analysis still does not track live ranges.

For subregister copies, do a subregister live check instead of checking
the main range. Doesn't do much yet, the split analysis still does not
track live ranges.