Skip to content

Commit 2e46de8

Browse files
committed
Address review comments
Adds reproducer to show previous regressions are gone. Also remove code that calculates live ranges for physregs, as I don't have any tests that cover this case, and the previous reproducers don't trigger this code. This suggests to me that the code in llvm#121734 may be sufficient to fix this issue.
1 parent f5266f5 commit 2e46de8

File tree

2 files changed

+126
-7
lines changed

2 files changed

+126
-7
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,7 +1496,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14961496
// If lanemasks need to be tracked, compile the lanemask of the NewMI
14971497
// implicit def operands to avoid subranges for the super-regs from
14981498
// being removed by code later on in this function.
1499-
if (MRI->shouldTrackSubRegLiveness(DstReg))
1499+
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
15001500
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
15011501
}
15021502
}
@@ -1979,12 +1979,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19791979
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
19801980
DefinedLanes |= UnusedLanes;
19811981
}
1982-
} else if (DstIsPhys) {
1983-
// Ensure we have a computed liverange for all regunits,
1984-
// as this is required by the scheduler/regpressure tracker,
1985-
// see: https://github.com/llvm/llvm-project/issues/76416
1986-
for (MCRegUnit Unit : TRI->regunits(DstReg))
1987-
LIS->getRegUnit(Unit);
19881982
}
19891983

19901984
MachineInstrBuilder MIB(*MF, UseMI);

llvm/test/CodeGen/X86/pr76416.ll

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-unknown-linux-gnu"
6+
7+
;
8+
; Reproducer from https://github.com/llvm/llvm-project/issues/76416
9+
;
10+
11+
@load_p = external global ptr, align 8
12+
@load_data = external global i8, align 1
13+
14+
define dso_local void @pr76416() {
15+
; CHECK-LABEL: pr76416:
16+
; CHECK: # %bb.0: # %entry
17+
; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
18+
; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
19+
; CHECK-NEXT: jg .LBB0_3
20+
; CHECK-NEXT: .p2align 4
21+
; CHECK-NEXT: .LBB0_2: # %for.body
22+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
23+
; CHECK-NEXT: xorl %eax, %eax
24+
; CHECK-NEXT: #APP
25+
; CHECK-NEXT: #NO_APP
26+
; CHECK-NEXT: incl -{{[0-9]+}}(%rsp)
27+
; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
28+
; CHECK-NEXT: jle .LBB0_2
29+
; CHECK-NEXT: .LBB0_3: # %for.end
30+
; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
31+
; CHECK-NEXT: movq load_p@GOTPCREL(%rip), %rax
32+
; CHECK-NEXT: movq load_data@GOTPCREL(%rip), %rcx
33+
; CHECK-NEXT: .p2align 4
34+
; CHECK-NEXT: .LBB0_4: # %for.cond1
35+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
36+
; CHECK-NEXT: #APP
37+
; CHECK-NEXT: #NO_APP
38+
; CHECK-NEXT: movq (%rax), %rdx
39+
; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rsi
40+
; CHECK-NEXT: movzbl (%rdx,%rsi), %edx
41+
; CHECK-NEXT: movb %dl, (%rcx)
42+
; CHECK-NEXT: leal 1(%rsi), %edx
43+
; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
44+
; CHECK-NEXT: jmp .LBB0_4
45+
entry:
46+
%i = alloca i32, align 4
47+
store i32 0, ptr %i, align 4
48+
br label %for.cond
49+
50+
for.cond: ; preds = %for.body, %entry
51+
%0 = load i32, ptr %i, align 4
52+
%cmp = icmp slt i32 %0, 4
53+
br i1 %cmp, label %for.body, label %for.end
54+
55+
for.body: ; preds = %for.cond
56+
call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind
57+
%1 = load i32, ptr %i, align 4
58+
%inc = add nsw i32 %1, 1
59+
store i32 %inc, ptr %i, align 4
60+
br label %for.cond
61+
62+
for.end: ; preds = %for.cond
63+
store i32 0, ptr %i, align 4
64+
br label %for.cond1
65+
66+
for.cond1: ; preds = %for.cond1, %for.end
67+
call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind
68+
%2 = load ptr, ptr @load_p, align 8
69+
%regs = getelementptr inbounds { [4 x i8] }, ptr %2, i32 0, i32 0
70+
%3 = load i32, ptr %i, align 4
71+
%idxprom = sext i32 %3 to i64
72+
%arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
73+
%4 = load i8, ptr %arrayidx, align 1
74+
store i8 %4, ptr @load_data, align 1
75+
%5 = load i32, ptr %i, align 4
76+
%inc5 = add nsw i32 %5, 1
77+
store i32 %inc5, ptr %i, align 4
78+
br label %for.cond1
79+
}
80+
81+
;
82+
; Related reproducer as reported on https://github.com/llvm/llvm-project/commit/0e46b49de43349f8cbb2a7d4c6badef6d16e31ae#commitcomment-136147998
83+
;
84+
85+
define void @f(i1 %cmp.not.i.i.i) {
86+
; CHECK-LABEL: f:
87+
; CHECK: # %bb.0: # %entry
88+
; CHECK-NEXT: pushq %rax
89+
; CHECK-NEXT: .cfi_def_cfa_offset 16
90+
; CHECK-NEXT: movl 0, %eax
91+
; CHECK-NEXT: xorl %ecx, %ecx
92+
; CHECK-NEXT: sarl %cl, %eax
93+
; CHECK-NEXT: movl $1, %edx
94+
; CHECK-NEXT: xorl %ecx, %ecx
95+
; CHECK-NEXT: shrl %cl, %edx
96+
; CHECK-NEXT: imull %eax, %edx
97+
; CHECK-NEXT: movslq %edx, %rsi
98+
; CHECK-NEXT: xorl %eax, %eax
99+
; CHECK-NEXT: xorl %edi, %edi
100+
; CHECK-NEXT: xorl %edx, %edx
101+
; CHECK-NEXT: callq *%rax
102+
entry:
103+
br label %for.cond10.preheader
104+
105+
trap: ; preds = %for.body13
106+
unreachable
107+
108+
for.cond10.preheader: ; preds = %while.cond.i.i.i, %entry
109+
%indvars.iv = phi i64 [ 0, %entry ], [ 1, %while.cond.i.i.i ]
110+
%0 = trunc i64 %indvars.iv to i32
111+
br label %for.body13
112+
113+
for.body13: ; preds = %for.cond10.preheader
114+
%1 = load i32, ptr null, align 4
115+
%shr = ashr i32 %1, %0
116+
%shr15 = ashr i32 1, %0
117+
%mul16 = mul i32 %shr15, %shr
118+
%conv = sext i32 %mul16 to i64
119+
call void null(ptr null, i64 %conv, ptr null)
120+
br i1 false, label %while.cond.i.i.i, label %trap
121+
122+
while.cond.i.i.i: ; preds = %while.cond.i.i.i, %for.body13
123+
br i1 %cmp.not.i.i.i, label %for.cond10.preheader, label %while.cond.i.i.i
124+
}
125+

0 commit comments

Comments
 (0)