Skip to content

Commit 552c0d1

Browse files
authored
[RISCV] Use XORI/SLLI/ADDI to when materializing select of constants (#155845)
This case is the inverse of the one introduced in #155644. The complexity with the inversion is that we need to also invert the condition before shifting it. I had originally planned to only do so when the condition was "cheaply" invertible (i.e. didn't require the xori), but when looking more closely at the diffs I noticed that while the XORI prevents this from being an icount improvement, and actually lengthens slightly the critical path, it does still reduce the number of registers needed.
1 parent d254aed commit 552c0d1

File tree

3 files changed

+40
-36
lines changed

3 files changed

+40
-36
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9287,13 +9287,19 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
92879287
}
92889288
}
92899289

9290-
// Use SHL/ADDI to avoid having to materialize a constant in register
9291-
// TODO: Handle the inverse case when the condition can be cheaply flipped
9290+
// Use SHL/ADDI (and possible XORI) to avoid having to materialize
9291+
// a constant in register
92929292
if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
92939293
SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
92949294
SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
92959295
return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
92969296
}
9297+
if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9298+
SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9299+
CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9300+
SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9301+
return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9302+
}
92979303

92989304
auto getCost = [&](const APInt &Delta, const APInt &Addend) {
92999305
const int DeltaCost = RISCVMatInt::getIntMatCost(

llvm/test/CodeGen/RISCV/select-const.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
8585
;
8686
; RV32ZICOND-LABEL: select_const_int_harder:
8787
; RV32ZICOND: # %bb.0:
88-
; RV32ZICOND-NEXT: li a1, 32
89-
; RV32ZICOND-NEXT: czero.nez a0, a1, a0
88+
; RV32ZICOND-NEXT: xori a0, a0, 1
89+
; RV32ZICOND-NEXT: slli a0, a0, 5
9090
; RV32ZICOND-NEXT: addi a0, a0, 6
9191
; RV32ZICOND-NEXT: ret
9292
;
@@ -112,8 +112,8 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
112112
;
113113
; RV64ZICOND-LABEL: select_const_int_harder:
114114
; RV64ZICOND: # %bb.0:
115-
; RV64ZICOND-NEXT: li a1, 32
116-
; RV64ZICOND-NEXT: czero.nez a0, a1, a0
115+
; RV64ZICOND-NEXT: xori a0, a0, 1
116+
; RV64ZICOND-NEXT: slli a0, a0, 5
117117
; RV64ZICOND-NEXT: addiw a0, a0, 6
118118
; RV64ZICOND-NEXT: ret
119119
%1 = select i1 %a, i32 6, i32 38
@@ -636,8 +636,7 @@ define i32 @diff_shl_addi(i32 signext %x) {
636636
; RV32ZICOND-LABEL: diff_shl_addi:
637637
; RV32ZICOND: # %bb.0:
638638
; RV32ZICOND-NEXT: srli a0, a0, 31
639-
; RV32ZICOND-NEXT: lui a1, 4
640-
; RV32ZICOND-NEXT: czero.eqz a0, a1, a0
639+
; RV32ZICOND-NEXT: slli a0, a0, 14
641640
; RV32ZICOND-NEXT: addi a0, a0, 25
642641
; RV32ZICOND-NEXT: ret
643642
;
@@ -666,8 +665,7 @@ define i32 @diff_shl_addi(i32 signext %x) {
666665
; RV64ZICOND-LABEL: diff_shl_addi:
667666
; RV64ZICOND: # %bb.0:
668667
; RV64ZICOND-NEXT: srli a0, a0, 63
669-
; RV64ZICOND-NEXT: lui a1, 4
670-
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
668+
; RV64ZICOND-NEXT: slli a0, a0, 14
671669
; RV64ZICOND-NEXT: addiw a0, a0, 25
672670
; RV64ZICOND-NEXT: ret
673671
%cmp = icmp sgt i32 %x, -1

llvm/test/CodeGen/RISCV/select.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,15 +1750,15 @@ define i32 @select_cst5(i1 zeroext %cond) {
17501750
;
17511751
; RV64IMXVTCONDOPS-LABEL: select_cst5:
17521752
; RV64IMXVTCONDOPS: # %bb.0:
1753-
; RV64IMXVTCONDOPS-NEXT: li a1, 2
1754-
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1753+
; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
1754+
; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 1
17551755
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
17561756
; RV64IMXVTCONDOPS-NEXT: ret
17571757
;
17581758
; CHECKZICOND-LABEL: select_cst5:
17591759
; CHECKZICOND: # %bb.0:
1760-
; CHECKZICOND-NEXT: li a1, 2
1761-
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
1760+
; CHECKZICOND-NEXT: xori a0, a0, 1
1761+
; CHECKZICOND-NEXT: slli a0, a0, 1
17621762
; CHECKZICOND-NEXT: addi a0, a0, 2047
17631763
; CHECKZICOND-NEXT: ret
17641764
%ret = select i1 %cond, i32 2047, i32 2049
@@ -1826,22 +1826,22 @@ define i32 @select_cst_diff2(i1 zeroext %cond) {
18261826
;
18271827
; RV64IMXVTCONDOPS-LABEL: select_cst_diff2:
18281828
; RV64IMXVTCONDOPS: # %bb.0:
1829-
; RV64IMXVTCONDOPS-NEXT: li a1, 2
1830-
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1829+
; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
1830+
; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 1
18311831
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 120
18321832
; RV64IMXVTCONDOPS-NEXT: ret
18331833
;
18341834
; RV32IMZICOND-LABEL: select_cst_diff2:
18351835
; RV32IMZICOND: # %bb.0:
1836-
; RV32IMZICOND-NEXT: li a1, 2
1837-
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
1836+
; RV32IMZICOND-NEXT: xori a0, a0, 1
1837+
; RV32IMZICOND-NEXT: slli a0, a0, 1
18381838
; RV32IMZICOND-NEXT: addi a0, a0, 120
18391839
; RV32IMZICOND-NEXT: ret
18401840
;
18411841
; RV64IMZICOND-LABEL: select_cst_diff2:
18421842
; RV64IMZICOND: # %bb.0:
1843-
; RV64IMZICOND-NEXT: li a1, 2
1844-
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
1843+
; RV64IMZICOND-NEXT: xori a0, a0, 1
1844+
; RV64IMZICOND-NEXT: slli a0, a0, 1
18451845
; RV64IMZICOND-NEXT: addiw a0, a0, 120
18461846
; RV64IMZICOND-NEXT: ret
18471847
%ret = select i1 %cond, i32 120, i32 122
@@ -1949,15 +1949,15 @@ define i32 @select_cst_diff4_invert(i1 zeroext %cond) {
19491949
;
19501950
; RV64IMXVTCONDOPS-LABEL: select_cst_diff4_invert:
19511951
; RV64IMXVTCONDOPS: # %bb.0:
1952-
; RV64IMXVTCONDOPS-NEXT: li a1, 4
1953-
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1952+
; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
1953+
; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 2
19541954
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
19551955
; RV64IMXVTCONDOPS-NEXT: ret
19561956
;
19571957
; CHECKZICOND-LABEL: select_cst_diff4_invert:
19581958
; CHECKZICOND: # %bb.0:
1959-
; CHECKZICOND-NEXT: li a1, 4
1960-
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
1959+
; CHECKZICOND-NEXT: xori a0, a0, 1
1960+
; CHECKZICOND-NEXT: slli a0, a0, 2
19611961
; CHECKZICOND-NEXT: addi a0, a0, 6
19621962
; CHECKZICOND-NEXT: ret
19631963
%ret = select i1 %cond, i32 6, i32 10
@@ -2029,22 +2029,22 @@ define i32 @select_cst_diff8_invert(i1 zeroext %cond) {
20292029
;
20302030
; RV64IMXVTCONDOPS-LABEL: select_cst_diff8_invert:
20312031
; RV64IMXVTCONDOPS: # %bb.0:
2032-
; RV64IMXVTCONDOPS-NEXT: li a1, 8
2033-
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
2032+
; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
2033+
; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 3
20342034
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
20352035
; RV64IMXVTCONDOPS-NEXT: ret
20362036
;
20372037
; RV32IMZICOND-LABEL: select_cst_diff8_invert:
20382038
; RV32IMZICOND: # %bb.0:
2039-
; RV32IMZICOND-NEXT: li a1, 8
2040-
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
2039+
; RV32IMZICOND-NEXT: xori a0, a0, 1
2040+
; RV32IMZICOND-NEXT: slli a0, a0, 3
20412041
; RV32IMZICOND-NEXT: addi a0, a0, 6
20422042
; RV32IMZICOND-NEXT: ret
20432043
;
20442044
; RV64IMZICOND-LABEL: select_cst_diff8_invert:
20452045
; RV64IMZICOND: # %bb.0:
2046-
; RV64IMZICOND-NEXT: li a1, 8
2047-
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
2046+
; RV64IMZICOND-NEXT: xori a0, a0, 1
2047+
; RV64IMZICOND-NEXT: slli a0, a0, 3
20482048
; RV64IMZICOND-NEXT: addiw a0, a0, 6
20492049
; RV64IMZICOND-NEXT: ret
20502050
%ret = select i1 %cond, i32 6, i32 14
@@ -2117,22 +2117,22 @@ define i32 @select_cst_diff1024_invert(i1 zeroext %cond) {
21172117
;
21182118
; RV64IMXVTCONDOPS-LABEL: select_cst_diff1024_invert:
21192119
; RV64IMXVTCONDOPS: # %bb.0:
2120-
; RV64IMXVTCONDOPS-NEXT: li a1, 1024
2121-
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
2120+
; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
2121+
; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 10
21222122
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
21232123
; RV64IMXVTCONDOPS-NEXT: ret
21242124
;
21252125
; RV32IMZICOND-LABEL: select_cst_diff1024_invert:
21262126
; RV32IMZICOND: # %bb.0:
2127-
; RV32IMZICOND-NEXT: li a1, 1024
2128-
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
2127+
; RV32IMZICOND-NEXT: xori a0, a0, 1
2128+
; RV32IMZICOND-NEXT: slli a0, a0, 10
21292129
; RV32IMZICOND-NEXT: addi a0, a0, 6
21302130
; RV32IMZICOND-NEXT: ret
21312131
;
21322132
; RV64IMZICOND-LABEL: select_cst_diff1024_invert:
21332133
; RV64IMZICOND: # %bb.0:
2134-
; RV64IMZICOND-NEXT: li a1, 1024
2135-
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
2134+
; RV64IMZICOND-NEXT: xori a0, a0, 1
2135+
; RV64IMZICOND-NEXT: slli a0, a0, 10
21362136
; RV64IMZICOND-NEXT: addiw a0, a0, 6
21372137
; RV64IMZICOND-NEXT: ret
21382138
%ret = select i1 %cond, i32 6, i32 1030

0 commit comments

Comments
 (0)