Skip to content

Commit 2175c6c

Browse files
authored
[RISCV] Set AllocationPriority in line with LMUL (#131176)
This mechanism causes the greedy register allocator to prefer allocating register classes with higher priority first. This helps to ensure that high LMUL registers obtain a register without having to go through the eviction mechanism. In practice, it seems to cause a bunch of code churn, and some minor improvement around widening and narrowing operations. In a few of the widening tests, we have what look like code size regressions because we end up with two smaller register class copies instead of one larger one after the instruction. However, in any larger code sequence, these are likely to be folded into the producing instructions. (But so were the wider copies after the operation.) Two observations: 1) We're not setting the greedy-regclass-priority-trumps-globalness flag on the register class, so this doesn't help long mask ranges. I thought about doing that, but the benefit is non-obvious, so I decided it was worth a separate change at minimum. 2) We could arguably set the priority higher for the register classes that exclude v0. I tried that, and it caused a whole bunch of further churn. I may return to it in a separate patch.
1 parent 6ada38b commit 2175c6c

File tree

184 files changed

+8357
-8261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

184 files changed

+8357
-8261
lines changed

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,8 @@ class VReg<list<ValueType> regTypes, dag regList, int Vlmul, int nf = 1>
720720

721721
let Size = !mul(VLMul, NF, 64);
722722
let CopyCost = !mul(VLMul, NF);
723+
// Prefer to allocate high LMUL registers first.
724+
let AllocationPriority = !if(!gt(Vlmul, 1), Vlmul, 0);
723725
}
724726

725727
defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,

llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ define signext i32 @sum(ptr %a, i32 signext %n, i1 %prof.min.iters.check, <vscal
2020
; CHECK-NEXT: ret
2121
; CHECK-NEXT: .LBB0_4: # %vector.ph
2222
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
23-
; CHECK-NEXT: vmv.s.x v8, zero
24-
; CHECK-NEXT: vmv.v.i v12, 0
23+
; CHECK-NEXT: vmv.s.x v12, zero
24+
; CHECK-NEXT: vmv.v.i v8, 0
2525
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
26-
; CHECK-NEXT: vredsum.vs v8, v12, v8, v0.t
26+
; CHECK-NEXT: vredsum.vs v8, v8, v12, v0.t
2727
; CHECK-NEXT: vmv.x.s a0, v8
2828
; CHECK-NEXT: ret
2929
entry:

llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,12 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
106106
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
107107
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
108108
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
109-
; CHECK-NEXT: vle8.v v8, (a0)
110-
; CHECK-NEXT: vid.v v16
111-
; CHECK-NEXT: vsaddu.vx v16, v16, a1
112-
; CHECK-NEXT: vmsltu.vx v0, v16, a2
113-
; CHECK-NEXT: vsext.vf8 v16, v8
114-
; CHECK-NEXT: vsaddu.vx v8, v16, a1
109+
; CHECK-NEXT: vle8.v v16, (a0)
110+
; CHECK-NEXT: vid.v v8
111+
; CHECK-NEXT: vsaddu.vx v8, v8, a1
112+
; CHECK-NEXT: vmsltu.vx v0, v8, a2
113+
; CHECK-NEXT: vsext.vf8 v8, v16
114+
; CHECK-NEXT: vsaddu.vx v8, v8, a1
115115
; CHECK-NEXT: vmsltu.vx v16, v8, a2
116116
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
117117
; CHECK-NEXT: vslideup.vi v0, v16, 2

llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,21 @@ define void @test(ptr %ref_array, ptr %sad_array) {
99
; RV32: # %bb.0: # %entry
1010
; RV32-NEXT: th.lwd a2, a3, (a0), 0, 3
1111
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
12-
; RV32-NEXT: vle8.v v8, (a2)
12+
; RV32-NEXT: vle8.v v12, (a2)
1313
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
14-
; RV32-NEXT: vzext.vf4 v12, v8
15-
; RV32-NEXT: vmv.s.x v8, zero
16-
; RV32-NEXT: vredsum.vs v9, v12, v8
17-
; RV32-NEXT: vmv.x.s a0, v9
14+
; RV32-NEXT: vzext.vf4 v8, v12
15+
; RV32-NEXT: vmv.s.x v12, zero
16+
; RV32-NEXT: vredsum.vs v8, v8, v12
17+
; RV32-NEXT: vmv.x.s a0, v8
1818
; RV32-NEXT: th.swia a0, (a1), 4, 0
1919
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
20-
; RV32-NEXT: vle8.v v9, (a3)
21-
; RV32-NEXT: vmv.v.i v10, 0
20+
; RV32-NEXT: vle8.v v13, (a3)
21+
; RV32-NEXT: vmv.v.i v8, 0
2222
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
23-
; RV32-NEXT: vslideup.vi v9, v10, 4
23+
; RV32-NEXT: vslideup.vi v13, v8, 4
2424
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
25-
; RV32-NEXT: vzext.vf4 v12, v9
26-
; RV32-NEXT: vredsum.vs v8, v12, v8
25+
; RV32-NEXT: vzext.vf4 v8, v13
26+
; RV32-NEXT: vredsum.vs v8, v8, v12
2727
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2828
; RV32-NEXT: vse32.v v8, (a1)
2929
; RV32-NEXT: ret
@@ -32,21 +32,21 @@ define void @test(ptr %ref_array, ptr %sad_array) {
3232
; RV64: # %bb.0: # %entry
3333
; RV64-NEXT: th.ldd a2, a3, (a0), 0, 4
3434
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
35-
; RV64-NEXT: vle8.v v8, (a2)
35+
; RV64-NEXT: vle8.v v12, (a2)
3636
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
37-
; RV64-NEXT: vzext.vf4 v12, v8
38-
; RV64-NEXT: vmv.s.x v8, zero
39-
; RV64-NEXT: vredsum.vs v9, v12, v8
40-
; RV64-NEXT: vmv.x.s a0, v9
37+
; RV64-NEXT: vzext.vf4 v8, v12
38+
; RV64-NEXT: vmv.s.x v12, zero
39+
; RV64-NEXT: vredsum.vs v8, v8, v12
40+
; RV64-NEXT: vmv.x.s a0, v8
4141
; RV64-NEXT: th.swia a0, (a1), 4, 0
4242
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
43-
; RV64-NEXT: vle8.v v9, (a3)
44-
; RV64-NEXT: vmv.v.i v10, 0
43+
; RV64-NEXT: vle8.v v13, (a3)
44+
; RV64-NEXT: vmv.v.i v8, 0
4545
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
46-
; RV64-NEXT: vslideup.vi v9, v10, 4
46+
; RV64-NEXT: vslideup.vi v13, v8, 4
4747
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
48-
; RV64-NEXT: vzext.vf4 v12, v9
49-
; RV64-NEXT: vredsum.vs v8, v12, v8
48+
; RV64-NEXT: vzext.vf4 v8, v13
49+
; RV64-NEXT: vredsum.vs v8, v8, v12
5050
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5151
; RV64-NEXT: vse32.v v8, (a1)
5252
; RV64-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) {
88
; CHECK-LABEL: interleave:
99
; CHECK: # %bb.0: # %entry
1010
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11-
; CHECK-NEXT: vwaddu.vv v10, v8, v9
11+
; CHECK-NEXT: vmv1r.v v10, v9
12+
; CHECK-NEXT: vmv1r.v v11, v8
13+
; CHECK-NEXT: vwaddu.vv v8, v11, v10
1214
; CHECK-NEXT: li a0, -1
13-
; CHECK-NEXT: vwmaccu.vx v10, a0, v9
14-
; CHECK-NEXT: vmv2r.v v8, v10
15+
; CHECK-NEXT: vwmaccu.vx v8, a0, v10
1516
; CHECK-NEXT: ret
1617
entry:
1718
%v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>

llvm/test/CodeGen/RISCV/rvv/compressstore.ll

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,12 @@ define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data
200200
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
201201
; RV64-NEXT: vmv1r.v v7, v8
202202
; RV64-NEXT: li a2, 128
203-
; RV64-NEXT: vslidedown.vi v9, v0, 1
203+
; RV64-NEXT: vslidedown.vi v8, v0, 1
204204
; RV64-NEXT: vmv.x.s a3, v0
205205
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
206206
; RV64-NEXT: vle8.v v24, (a1)
207207
; RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma
208-
; RV64-NEXT: vmv.x.s a1, v9
208+
; RV64-NEXT: vmv.x.s a1, v8
209209
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
210210
; RV64-NEXT: vcompress.vm v8, v16, v0
211211
; RV64-NEXT: vcpop.m a4, v0
@@ -227,14 +227,14 @@ define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data
227227
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
228228
; RV32-NEXT: vmv1r.v v7, v8
229229
; RV32-NEXT: li a2, 128
230-
; RV32-NEXT: vslidedown.vi v9, v0, 1
230+
; RV32-NEXT: vslidedown.vi v8, v0, 1
231231
; RV32-NEXT: li a3, 32
232232
; RV32-NEXT: vmv.x.s a4, v0
233233
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
234234
; RV32-NEXT: vle8.v v24, (a1)
235235
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
236-
; RV32-NEXT: vsrl.vx v6, v9, a3
237-
; RV32-NEXT: vmv.x.s a1, v9
236+
; RV32-NEXT: vsrl.vx v6, v8, a3
237+
; RV32-NEXT: vmv.x.s a1, v8
238238
; RV32-NEXT: vsrl.vx v5, v0, a3
239239
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
240240
; RV32-NEXT: vcompress.vm v8, v16, v0
@@ -438,16 +438,16 @@ define void @test_compresstore_v128i16(ptr %p, <128 x i1> %mask, <128 x i16> %da
438438
; RV64-NEXT: vcompress.vm v24, v8, v0
439439
; RV64-NEXT: vcpop.m a2, v0
440440
; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
441-
; RV64-NEXT: vslidedown.vi v8, v0, 8
441+
; RV64-NEXT: vslidedown.vi v7, v0, 8
442442
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
443-
; RV64-NEXT: vcompress.vm v0, v16, v8
444-
; RV64-NEXT: vcpop.m a1, v8
443+
; RV64-NEXT: vcompress.vm v8, v16, v7
444+
; RV64-NEXT: vcpop.m a1, v7
445445
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
446446
; RV64-NEXT: vse16.v v24, (a0)
447447
; RV64-NEXT: slli a2, a2, 1
448448
; RV64-NEXT: add a0, a0, a2
449449
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
450-
; RV64-NEXT: vse16.v v0, (a0)
450+
; RV64-NEXT: vse16.v v8, (a0)
451451
; RV64-NEXT: ret
452452
;
453453
; RV32-LABEL: test_compresstore_v128i16:
@@ -635,16 +635,16 @@ define void @test_compresstore_v64i32(ptr %p, <64 x i1> %mask, <64 x i32> %data)
635635
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
636636
; RV64-NEXT: vse32.v v24, (a0)
637637
; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
638-
; RV64-NEXT: vslidedown.vi v8, v0, 4
638+
; RV64-NEXT: vslidedown.vi v24, v0, 4
639639
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
640640
; RV64-NEXT: vmv.x.s a1, v0
641-
; RV64-NEXT: vcompress.vm v24, v16, v8
642-
; RV64-NEXT: vcpop.m a2, v8
641+
; RV64-NEXT: vcompress.vm v8, v16, v24
642+
; RV64-NEXT: vcpop.m a2, v24
643643
; RV64-NEXT: cpopw a1, a1
644644
; RV64-NEXT: slli a1, a1, 2
645645
; RV64-NEXT: add a0, a0, a1
646646
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
647-
; RV64-NEXT: vse32.v v24, (a0)
647+
; RV64-NEXT: vse32.v v8, (a0)
648648
; RV64-NEXT: ret
649649
;
650650
; RV32-LABEL: test_compresstore_v64i32:
@@ -654,16 +654,16 @@ define void @test_compresstore_v64i32(ptr %p, <64 x i1> %mask, <64 x i32> %data)
654654
; RV32-NEXT: vcompress.vm v24, v8, v0
655655
; RV32-NEXT: vcpop.m a2, v0
656656
; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
657-
; RV32-NEXT: vslidedown.vi v8, v0, 4
657+
; RV32-NEXT: vslidedown.vi v7, v0, 4
658658
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
659-
; RV32-NEXT: vcompress.vm v0, v16, v8
660-
; RV32-NEXT: vcpop.m a1, v8
659+
; RV32-NEXT: vcompress.vm v8, v16, v7
660+
; RV32-NEXT: vcpop.m a1, v7
661661
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
662662
; RV32-NEXT: vse32.v v24, (a0)
663663
; RV32-NEXT: slli a2, a2, 2
664664
; RV32-NEXT: add a0, a0, a2
665665
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
666-
; RV32-NEXT: vse32.v v0, (a0)
666+
; RV32-NEXT: vse32.v v8, (a0)
667667
; RV32-NEXT: ret
668668
entry:
669669
tail call void @llvm.masked.compressstore.v64i32(<64 x i32> %data, ptr align 4 %p, <64 x i1> %mask)
@@ -796,18 +796,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data)
796796
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
797797
; RV64-NEXT: vse64.v v24, (a0)
798798
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
799-
; RV64-NEXT: vslidedown.vi v8, v0, 2
799+
; RV64-NEXT: vslidedown.vi v24, v0, 2
800800
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
801801
; RV64-NEXT: vmv.x.s a1, v0
802802
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
803-
; RV64-NEXT: vcompress.vm v24, v16, v8
803+
; RV64-NEXT: vcompress.vm v8, v16, v24
804804
; RV64-NEXT: zext.h a1, a1
805805
; RV64-NEXT: cpopw a1, a1
806806
; RV64-NEXT: slli a1, a1, 3
807807
; RV64-NEXT: add a0, a0, a1
808-
; RV64-NEXT: vcpop.m a1, v8
808+
; RV64-NEXT: vcpop.m a1, v24
809809
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
810-
; RV64-NEXT: vse64.v v24, (a0)
810+
; RV64-NEXT: vse64.v v8, (a0)
811811
; RV64-NEXT: ret
812812
;
813813
; RV32-LABEL: test_compresstore_v32i64:
@@ -818,18 +818,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data)
818818
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
819819
; RV32-NEXT: vse64.v v24, (a0)
820820
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
821-
; RV32-NEXT: vslidedown.vi v8, v0, 2
821+
; RV32-NEXT: vslidedown.vi v24, v0, 2
822822
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
823823
; RV32-NEXT: vmv.x.s a1, v0
824824
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
825-
; RV32-NEXT: vcompress.vm v24, v16, v8
825+
; RV32-NEXT: vcompress.vm v8, v16, v24
826826
; RV32-NEXT: zext.h a1, a1
827827
; RV32-NEXT: cpop a1, a1
828828
; RV32-NEXT: slli a1, a1, 3
829829
; RV32-NEXT: add a0, a0, a1
830-
; RV32-NEXT: vcpop.m a1, v8
830+
; RV32-NEXT: vcpop.m a1, v24
831831
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
832-
; RV32-NEXT: vse64.v v24, (a0)
832+
; RV32-NEXT: vse64.v v8, (a0)
833833
; RV32-NEXT: ret
834834
entry:
835835
tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask)

0 commit comments

Comments
 (0)