Skip to content

Commit 975fba1

Browse files
authored
[RegisterCoalescer] Prune live range of early-clobber from live-in (llvm#157628)
Fixes llvm#134424 Fixes llvm#71023 Refer to the context of llvm#71024, when RegisterCoalescer tries to merge `early-clobber %1:vr = PseudoVRGATHER_VI_M1 undef %1, ...`, JoinVals reports `CR_Replace` as the conflict with `undef` can be ignored. However, when pruning values, we need to remove any live ranges that overlap a `CR_Replace` resolution. `LiveIntervals::pruneValue` missed pruning the early-clobber part of the live ranges. This patch implements it by removing the ranges from live-in. I am not familiar with the RegisterCoalescer component. Any advice is appreciated. llvm#156249 seems to be related, but not resolved with this patch. I am still investigating.
1 parent 3e0bf3d commit 975fba1

File tree

3 files changed

+137
-1
lines changed

3 files changed

+137
-1
lines changed

llvm/lib/CodeGen/LiveIntervals.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
661661
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
662662
SmallVectorImpl<SlotIndex> *EndPoints) {
663663
LiveQueryResult LRQ = LR.Query(Kill);
664-
VNInfo *VNI = LRQ.valueOutOrDead();
664+
// LR may have liveness reachable from early clobber slot, which may be
665+
// only live-in instead of live-out of the instruction.
666+
// For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR.
667+
VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn();
665668
if (!VNI)
666669
return;
667670

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -O1 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
3+
4+
define i32 @pr134424(i64 %input_value, i32 %base_value, i1 %cond_flag1, i1 %cond_flag2, i1 %cond_flag3) {
5+
; CHECK-LABEL: pr134424:
6+
; CHECK: # %bb.0: # %for.body.us.preheader.i
7+
; CHECK-NEXT: andi a3, a3, 1
8+
; CHECK-NEXT: andi a5, a2, 1
9+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
10+
; CHECK-NEXT: vmv.v.x v8, a0
11+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma
12+
; CHECK-NEXT: vmv.s.x v8, zero
13+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
14+
; CHECK-NEXT: vmv.v.i v0, 14
15+
; CHECK-NEXT: mv a2, a1
16+
; CHECK-NEXT: bnez a5, .LBB0_2
17+
; CHECK-NEXT: # %bb.1: # %for.body.us.preheader.i
18+
; CHECK-NEXT: li a2, 1
19+
; CHECK-NEXT: .LBB0_2: # %for.body.us.preheader.i
20+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
21+
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
22+
; CHECK-NEXT: andi a4, a4, 1
23+
; CHECK-NEXT: mv a0, a1
24+
; CHECK-NEXT: bnez a3, .LBB0_4
25+
; CHECK-NEXT: # %bb.3: # %for.body.us.preheader.i
26+
; CHECK-NEXT: li a0, 1
27+
; CHECK-NEXT: .LBB0_4: # %for.body.us.preheader.i
28+
; CHECK-NEXT: vmsle.vi v0, v8, 0
29+
; CHECK-NEXT: sext.w a2, a2
30+
; CHECK-NEXT: bnez a4, .LBB0_6
31+
; CHECK-NEXT: # %bb.5: # %for.body.us.preheader.i
32+
; CHECK-NEXT: li a1, 1
33+
; CHECK-NEXT: .LBB0_6: # %for.body.us.preheader.i
34+
; CHECK-NEXT: sext.w a0, a0
35+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
36+
; CHECK-NEXT: vmv.v.i v8, 0
37+
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
38+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
39+
; CHECK-NEXT: vredmin.vs v8, v8, v8
40+
; CHECK-NEXT: vmv.x.s a3, v8
41+
; CHECK-NEXT: sext.w a1, a1
42+
; CHECK-NEXT: bge a3, a2, .LBB0_11
43+
; CHECK-NEXT: # %bb.7: # %for.body.us.preheader.i
44+
; CHECK-NEXT: bge a0, a1, .LBB0_12
45+
; CHECK-NEXT: .LBB0_8: # %for.body.us.preheader.i
46+
; CHECK-NEXT: blt a3, a0, .LBB0_10
47+
; CHECK-NEXT: .LBB0_9: # %for.body.us.preheader.i
48+
; CHECK-NEXT: mv a3, a0
49+
; CHECK-NEXT: .LBB0_10: # %for.body.us.preheader.i
50+
; CHECK-NEXT: sw a3, 0(zero)
51+
; CHECK-NEXT: li a0, 0
52+
; CHECK-NEXT: ret
53+
; CHECK-NEXT: .LBB0_11: # %for.body.us.preheader.i
54+
; CHECK-NEXT: mv a3, a2
55+
; CHECK-NEXT: blt a0, a1, .LBB0_8
56+
; CHECK-NEXT: .LBB0_12: # %for.body.us.preheader.i
57+
; CHECK-NEXT: mv a0, a1
58+
; CHECK-NEXT: bge a3, a0, .LBB0_9
59+
; CHECK-NEXT: j .LBB0_10
60+
for.body.us.preheader.i:
61+
%partial_vector = insertelement <4 x i64> zeroinitializer, i64 %input_value, i64 1
62+
%comparison_vector = shufflevector <4 x i64> %partial_vector, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
63+
%comparison_result = icmp sle <4 x i64> %comparison_vector, zeroinitializer
64+
%selected_value1 = select i1 %cond_flag1, i32 %base_value, i32 1
65+
%selected_value2 = select i1 %cond_flag2, i32 %base_value, i32 1
66+
%selected_value3 = select i1 %cond_flag3, i32 %base_value, i32 1
67+
%bool_to_int = zext <4 x i1> %comparison_result to <4 x i32>
68+
%extended_vector = shufflevector <4 x i32> %bool_to_int, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
69+
%vector_min = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %extended_vector)
70+
%min1 = call i32 @llvm.smin.i32(i32 %vector_min, i32 %selected_value1)
71+
%min2 = call i32 @llvm.smin.i32(i32 %selected_value2, i32 %selected_value3)
72+
%final_min = call i32 @llvm.smin.i32(i32 %min1, i32 %min2)
73+
store i32 %final_min, ptr null, align 4
74+
ret i32 0
75+
}
76+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=register-coalescer -o - %s | FileCheck %s
3+
4+
---
5+
name: pr71023
6+
tracksRegLiveness: true
7+
body: |
8+
; CHECK-LABEL: name: pr71023
9+
; CHECK: bb.0:
10+
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
11+
; CHECK-NEXT: liveins: $x10, $v8, $v10
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
14+
; CHECK-NEXT: undef [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_2:vrn8m1 = PseudoVMV_V_I_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
15+
; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_6:vrn8m1 = COPY undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2
16+
; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3
17+
; CHECK-NEXT: PseudoBR %bb.1
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: bb.1:
20+
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3
23+
; CHECK-NEXT: PseudoBR %bb.2
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: bb.2:
26+
; CHECK-NEXT: successors: %bb.3(0x80000000)
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: bb.3:
29+
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
30+
; CHECK-NEXT: early-clobber [[PseudoVMV_V_I_M1_]].sub_vrm1_0:vrn8m1 = PseudoVRGATHER_VI_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_0, [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
31+
; CHECK-NEXT: PseudoVSSEG6E8_V_M1_MASK [[PseudoVMV_V_I_M1_]].sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5, undef [[DEF]], killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1)
32+
; CHECK-NEXT: PseudoRET
33+
bb.0:
34+
successors: %bb.3(0x40000000), %bb.1(0x40000000)
35+
liveins: $x10, $v8, $v10
36+
%0:gpr = IMPLICIT_DEF
37+
%1:vrnov0 = PseudoVMV_V_I_M1 undef %1, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
38+
%2:vrnov0 = IMPLICIT_DEF
39+
undef %3.sub_vrm1_0:vrn6m1nov0 = COPY undef %1
40+
%3.sub_vrm1_3:vrn6m1nov0 = COPY %2
41+
%3.sub_vrm1_4:vrn6m1nov0 = COPY undef %1
42+
BNE undef %0, $x0, %bb.3
43+
PseudoBR %bb.1
44+
bb.1:
45+
successors: %bb.3(0x40000000), %bb.2(0x40000000)
46+
BNE killed undef %0, $x0, %bb.3
47+
PseudoBR %bb.2
48+
bb.2:
49+
successors: %bb.3(0x80000000)
50+
bb.3:
51+
%4:vr = IMPLICIT_DEF
52+
early-clobber %4:vr = PseudoVRGATHER_VI_M1 undef %4, killed %1, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
53+
undef %5.sub_vrm1_0:vrn6m1 = COPY killed %4
54+
%5.sub_vrm1_5:vrn6m1 = COPY killed %2
55+
PseudoVSSEG6E8_V_M1_MASK killed %5, undef %0, killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1)
56+
PseudoRET
57+
...

0 commit comments

Comments
 (0)