Skip to content

Commit 600f2e1

Browse files
committed
[X86] Remove SETB_C8r/SETB_C16r pseudo instructions. Use SETB_C32r and EXTRACT_SUBREG instead.
Only 32 and 64 bit SBB are dependency breaking instructons on some CPUs. The 8 and 16 bit forms have to preserve upper bits of the GPR. This patch removes the smaller forms and selects the wider form instead. I had to do this with custom code as the tblgen generated code glued the eflags copytoreg to the extract_subreg instead of to the SETB pseudo. Longer term I think we can remove X86ISD::SETCC_CARRY and use (X86ISD::SBB zero, zero). We'll want to keep the pseudo and select (X86ISD::SBB zero, zero) to either a MOV32r0+SBB for targets where there is no dependency break and SETB_C32/SETB_C64 for targets that have a dependency break. May want some way to avoid the MOV32r0 if the instruction that produced the carry flag happened to def a register that we can use for the dependency. I think the flag copy lowering should be using NEG instead of SUB to handle SETB. That would avoid the MOV32r0 there. Or maybe it should use a ADC with -1 to recreate the carry flag and keep the SETB? That would avoid a MOVZX on the input of the SUB. Differential Revision: https://reviews.llvm.org/D74024
1 parent 25de3f9 commit 600f2e1

File tree

8 files changed

+67
-71
lines changed

8 files changed

+67
-71
lines changed

llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -639,8 +639,6 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
639639
FlagsKilled = true;
640640

641641
switch (MI.getOpcode()) {
642-
case X86::SETB_C8r:
643-
case X86::SETB_C16r:
644642
case X86::SETB_C32r:
645643
case X86::SETB_C64r:
646644
// Use custom lowering for arithmetic that is merely extending the
@@ -1057,24 +1055,9 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
10571055

10581056
unsigned Sub;
10591057
switch (SetBI.getOpcode()) {
1060-
case X86::SETB_C8r:
1061-
Sub = X86::SUB8rr;
1062-
break;
1063-
1064-
case X86::SETB_C16r:
1065-
Sub = X86::SUB16rr;
1066-
break;
1067-
1068-
case X86::SETB_C32r:
1069-
Sub = X86::SUB32rr;
1070-
break;
1071-
1072-
case X86::SETB_C64r:
1073-
Sub = X86::SUB64rr;
1074-
break;
1075-
1076-
default:
1077-
llvm_unreachable("Invalid SETB_C* opcode!");
1058+
default: llvm_unreachable("Invalid SETB_C* opcode!");
1059+
case X86::SETB_C32r: Sub = X86::SUB32rr; break;
1060+
case X86::SETB_C64r: Sub = X86::SUB64rr; break;
10781061
}
10791062
Register ResultReg = MRI->createVirtualRegister(&SetBRC);
10801063
BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5270,6 +5270,35 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
52705270
if (foldLoadStoreIntoMemOperand(Node))
52715271
return;
52725272
break;
5273+
5274+
case X86ISD::SETCC_CARRY: {
5275+
// We have to do this manually because tblgen will put the eflags copy in
5276+
// the wrong place if we use an extract_subreg in the pattern.
5277+
MVT VT = Node->getSimpleValueType(0);
5278+
SDValue Chain = CurDAG->getEntryNode();
5279+
5280+
// Copy flags to the EFLAGS register and glue it to next node.
5281+
SDValue EFLAGS = CurDAG->getCopyToReg(Chain, dl, X86::EFLAGS,
5282+
Node->getOperand(1), SDValue());
5283+
Chain = EFLAGS;
5284+
5285+
// Create a 64-bit instruction if the result is 64-bits otherwise use the
5286+
// 32-bit version.
5287+
unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
5288+
MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
5289+
SDValue Result = SDValue(
5290+
CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
5291+
5292+
// For less than 32-bits we need to extract from the 32-bit node.
5293+
if (VT == MVT::i8 || VT == MVT::i16) {
5294+
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
5295+
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
5296+
}
5297+
5298+
ReplaceUses(SDValue(Node, 0), Result);
5299+
CurDAG->RemoveDeadNode(Node);
5300+
return;
5301+
}
52735302
}
52745303

52755304
SelectCode(Node);

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -308,18 +308,13 @@ def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
308308
def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
309309

310310
// Use sbb to materialize carry bit.
311-
let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
311+
let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteADC],
312+
hasSideEffects = 0 in {
312313
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
313314
// However, Pat<> can't replicate the destination reg into the inputs of the
314315
// result.
315-
def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
316-
[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
317-
def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
318-
[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
319-
def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
320-
[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
321-
def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
322-
[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
316+
def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", []>;
317+
def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", []>;
323318
} // isCodeGenOnly
324319

325320

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4085,10 +4085,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
40854085
case X86::MOV32ImmSExti8:
40864086
case X86::MOV64ImmSExti8:
40874087
return ExpandMOVImmSExti8(MIB, *this, Subtarget);
4088-
case X86::SETB_C8r:
4089-
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
4090-
case X86::SETB_C16r:
4091-
return Expand2AddrUndef(MIB, get(X86::SBB16rr));
40924088
case X86::SETB_C32r:
40934089
return Expand2AddrUndef(MIB, get(X86::SBB32rr));
40944090
case X86::SETB_C64r:

llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define i32 @main() nounwind {
1212
; CHECK-NEXT: pushq %rax
1313
; CHECK-NEXT: xorl %eax, %eax
1414
; CHECK-NEXT: cmpq {{.*}}(%rip), %rax
15-
; CHECK-NEXT: sbbb %al, %al
15+
; CHECK-NEXT: sbbl %eax, %eax
1616
; CHECK-NEXT: testb $-106, %al
1717
; CHECK-NEXT: jle .LBB0_1
1818
; CHECK-NEXT: # %bb.2: # %if.then

llvm/test/CodeGen/X86/flags-copy-lowering.mir

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -541,37 +541,17 @@ body: |
541541
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
542542
543543
$eflags = COPY %3
544-
%4:gr8 = SETB_C8r implicit-def $eflags, implicit $eflags
545-
MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
546-
; CHECK-NOT: $eflags =
547-
; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
548-
; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = COPY %[[ZERO]].sub_8bit
549-
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
550-
; CHECK-NEXT: MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
551-
552-
$eflags = COPY %3
553-
%5:gr16 = SETB_C16r implicit-def $eflags, implicit $eflags
554-
MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5
555-
; CHECK-NOT: $eflags =
556-
; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
557-
; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = COPY %[[CF_EXT]].sub_16bit
558-
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
559-
; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = COPY %[[ZERO]].sub_16bit
560-
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
561-
; CHECK-NEXT: MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
562-
563-
$eflags = COPY %3
564-
%6:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
565-
MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %6
544+
%4:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
545+
MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %4
566546
; CHECK-NOT: $eflags =
567547
; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
568548
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
569549
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
570550
; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
571551
572552
$eflags = COPY %3
573-
%7:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
574-
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7
553+
%5:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
554+
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
575555
; CHECK-NOT: $eflags =
576556
; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
577557
; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit

llvm/test/CodeGen/X86/sbb.ll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ define i8 @i8_select_0_or_neg1(i8 %x) {
99
; CHECK-LABEL: i8_select_0_or_neg1:
1010
; CHECK: # %bb.0:
1111
; CHECK-NEXT: negb %dil
12-
; CHECK-NEXT: sbbb %al, %al
12+
; CHECK-NEXT: sbbl %eax, %eax
13+
; CHECK-NEXT: # kill: def $al killed $al killed $eax
1314
; CHECK-NEXT: retq
1415
%cmp = icmp eq i8 %x, 0
1516
%sel = select i1 %cmp, i8 0, i8 -1
@@ -22,7 +23,8 @@ define i16 @i16_select_0_or_neg1_as_math(i16 %x) {
2223
; CHECK-LABEL: i16_select_0_or_neg1_as_math:
2324
; CHECK: # %bb.0:
2425
; CHECK-NEXT: negw %di
25-
; CHECK-NEXT: sbbw %ax, %ax
26+
; CHECK-NEXT: sbbl %eax, %eax
27+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
2628
; CHECK-NEXT: retq
2729
%cmp = icmp eq i16 %x, 0
2830
%ext = zext i1 %cmp to i16
@@ -90,7 +92,8 @@ define i16 @i16_select_neg1_or_0_commuted(i16 %x) {
9092
; CHECK-LABEL: i16_select_neg1_or_0_commuted:
9193
; CHECK: # %bb.0:
9294
; CHECK-NEXT: cmpw $1, %di
93-
; CHECK-NEXT: sbbw %ax, %ax
95+
; CHECK-NEXT: sbbl %eax, %eax
96+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
9497
; CHECK-NEXT: retq
9598
%cmp = icmp ne i16 %x, 0
9699
%sel = select i1 %cmp, i16 0, i16 -1
@@ -103,7 +106,8 @@ define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
103106
; CHECK-LABEL: i8_select_neg1_or_0_commuted_as_math:
104107
; CHECK: # %bb.0:
105108
; CHECK-NEXT: cmpb $1, %dil
106-
; CHECK-NEXT: sbbb %al, %al
109+
; CHECK-NEXT: sbbl %eax, %eax
110+
; CHECK-NEXT: # kill: def $al killed $al killed $eax
107111
; CHECK-NEXT: retq
108112
%cmp = icmp ne i8 %x, 0
109113
%ext = zext i1 %cmp to i8
@@ -205,7 +209,8 @@ define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind {
205209
; CHECK-LABEL: ult_select_neg1_or_0_sub:
206210
; CHECK: # %bb.0:
207211
; CHECK-NEXT: cmpw %di, %si
208-
; CHECK-NEXT: sbbw %ax, %ax
212+
; CHECK-NEXT: sbbl %eax, %eax
213+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
209214
; CHECK-NEXT: retq
210215
%cmp = icmp ult i16 %y, %x
211216
%zext = zext i1 %cmp to i16

llvm/test/CodeGen/X86/vector-compare-any_of.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,8 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
666666
; SSE-NEXT: packsswb %xmm1, %xmm0
667667
; SSE-NEXT: pmovmskb %xmm0, %eax
668668
; SSE-NEXT: negl %eax
669-
; SSE-NEXT: sbbw %ax, %ax
669+
; SSE-NEXT: sbbl %eax, %eax
670+
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
670671
; SSE-NEXT: retq
671672
;
672673
; AVX1-LABEL: test_v16i16_legal_sext:
@@ -678,7 +679,8 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
678679
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
679680
; AVX1-NEXT: vpmovmskb %xmm0, %eax
680681
; AVX1-NEXT: negl %eax
681-
; AVX1-NEXT: sbbw %ax, %ax
682+
; AVX1-NEXT: sbbl %eax, %eax
683+
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
682684
; AVX1-NEXT: vzeroupper
683685
; AVX1-NEXT: retq
684686
;
@@ -689,7 +691,8 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
689691
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
690692
; AVX2-NEXT: vpmovmskb %xmm0, %eax
691693
; AVX2-NEXT: negl %eax
692-
; AVX2-NEXT: sbbw %ax, %ax
694+
; AVX2-NEXT: sbbl %eax, %eax
695+
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
693696
; AVX2-NEXT: vzeroupper
694697
; AVX2-NEXT: retq
695698
;
@@ -731,15 +734,17 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
731734
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
732735
; SSE-NEXT: pmovmskb %xmm0, %eax
733736
; SSE-NEXT: negl %eax
734-
; SSE-NEXT: sbbb %al, %al
737+
; SSE-NEXT: sbbl %eax, %eax
738+
; SSE-NEXT: # kill: def $al killed $al killed $eax
735739
; SSE-NEXT: retq
736740
;
737741
; AVX-LABEL: test_v16i8_sext:
738742
; AVX: # %bb.0:
739743
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
740744
; AVX-NEXT: vpmovmskb %xmm0, %eax
741745
; AVX-NEXT: negl %eax
742-
; AVX-NEXT: sbbb %al, %al
746+
; AVX-NEXT: sbbl %eax, %eax
747+
; AVX-NEXT: # kill: def $al killed $al killed $eax
743748
; AVX-NEXT: retq
744749
;
745750
; AVX512-LABEL: test_v16i8_sext:
@@ -778,7 +783,8 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
778783
; SSE-NEXT: por %xmm1, %xmm0
779784
; SSE-NEXT: pmovmskb %xmm0, %eax
780785
; SSE-NEXT: negl %eax
781-
; SSE-NEXT: sbbb %al, %al
786+
; SSE-NEXT: sbbl %eax, %eax
787+
; SSE-NEXT: # kill: def $al killed $al killed $eax
782788
; SSE-NEXT: retq
783789
;
784790
; AVX1-LABEL: test_v32i8_sext:
@@ -790,7 +796,8 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
790796
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
791797
; AVX1-NEXT: vpmovmskb %xmm0, %eax
792798
; AVX1-NEXT: negl %eax
793-
; AVX1-NEXT: sbbb %al, %al
799+
; AVX1-NEXT: sbbl %eax, %eax
800+
; AVX1-NEXT: # kill: def $al killed $al killed $eax
794801
; AVX1-NEXT: vzeroupper
795802
; AVX1-NEXT: retq
796803
;
@@ -799,7 +806,8 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
799806
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
800807
; AVX2-NEXT: vpmovmskb %ymm0, %eax
801808
; AVX2-NEXT: negl %eax
802-
; AVX2-NEXT: sbbb %al, %al
809+
; AVX2-NEXT: sbbl %eax, %eax
810+
; AVX2-NEXT: # kill: def $al killed $al killed $eax
803811
; AVX2-NEXT: vzeroupper
804812
; AVX2-NEXT: retq
805813
;

0 commit comments

Comments
 (0)