Skip to content

Commit f7b9e3f

Browse files
committed
[GISel][RISCV][AArch64] Support legalizing G_SCMP/G_UCMP to sub(isgt, islt).
Convert the LLT to EVT and call TargetLowering::shouldExpandCmpUsingSelects to determine if we should do this. We don't have a getSetccResultType, so I'm boolean extending the compares to the result type and using that. If the compares legalize to the same type, these extends will get removed. Unfortunately, if the compares legalize to a different type, we end up with truncates or extends that might not be optimally placed. I wonder if we can work around this by adding widening/narrowing rules for the G_SCMP/G_UCMP before lowering?
1 parent e81130a commit f7b9e3f

File tree

8 files changed

+284
-802
lines changed

8 files changed

+284
-802
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7920,6 +7920,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
79207920

79217921
Register Dst = Cmp->getReg(0);
79227922
LLT DstTy = MRI.getType(Dst);
7923+
LLT SrcTy = MRI.getType(Cmp->getReg(1));
79237924
LLT CmpTy = DstTy.changeElementSize(1);
79247925

79257926
CmpInst::Predicate LTPredicate = Cmp->isSigned()
@@ -7929,16 +7930,30 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
79297930
? CmpInst::Predicate::ICMP_SGT
79307931
: CmpInst::Predicate::ICMP_UGT;
79317932

7932-
auto One = MIRBuilder.buildConstant(DstTy, 1);
79337933
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
79347934
auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
79357935
Cmp->getRHSReg());
7936-
auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
7937-
7938-
auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
79397936
auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
79407937
Cmp->getRHSReg());
7941-
MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
7938+
7939+
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
7940+
const DataLayout &DL = MIRBuilder.getDataLayout();
7941+
if (TLI.shouldExpandCmpUsingSelects(
7942+
getApproximateEVTForLLT(SrcTy, DL, Ctx))) {
7943+
auto One = MIRBuilder.buildConstant(DstTy, 1);
7944+
auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
7945+
7946+
auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
7947+
MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
7948+
} else {
7949+
if (TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false))
7950+
std::swap(IsGT, IsLT);
7951+
unsigned BoolExtOp =
7952+
MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
7953+
IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
7954+
IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
7955+
MIRBuilder.buildSub(Dst, IsGT, IsLT);
7956+
}
79427957

79437958
MI.eraseFromParent();
79447959
return Legalized;

llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ body: |
88
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
99
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
1010
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]]
11+
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
1112
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
1213
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1314
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
14-
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
1515
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
1616
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
1717
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -31,10 +31,10 @@ body: |
3131
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
3232
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
3333
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
34+
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
3435
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
3536
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3637
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
37-
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
3838
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
3939
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
4040
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -61,42 +61,13 @@ body: |
6161
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2
6262
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3
6363
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
64-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
65-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
6664
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
67-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
68-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
69-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
70-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
71-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
72-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
73-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]]
74-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
75-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
76-
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
77-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
78-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]]
79-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
80-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
81-
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
82-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]]
83-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
84-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
8565
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
86-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
87-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
88-
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
89-
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
90-
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]]
91-
; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
92-
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>)
93-
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>)
94-
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
95-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]]
96-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]]
97-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]]
98-
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>)
99-
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT5]], 2
66+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
67+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
68+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[TRUNC]], [[TRUNC1]]
69+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[SUB]](<4 x s16>)
70+
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT]], 2
10071
; CHECK-NEXT: $q0 = COPY [[SEXT_INREG]](<4 x s32>)
10172
%0:_(s32) = COPY $w0
10273
%1:_(s32) = COPY $w1
@@ -125,15 +96,15 @@ body: |
12596
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
12697
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
12798
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]]
128-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
129-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
130-
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
13199
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[DEF]]
132100
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
133101
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
134-
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
102+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
103+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
104+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
105+
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
135106
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
136-
; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT2]](s32), [[C2]], [[SELECT1]]
107+
; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT1]](s32), [[C2]], [[SELECT2]]
137108
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT3]], 2
138109
; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32)
139110
%0:_(s64) = COPY $x0

0 commit comments

Comments
 (0)