Skip to content

Commit 69d117e

Browse files
committed
[DAG] ExpandIntRes_MINMAX - simplify cases with sufficient number of sign bits
When legalizing a smax/smin/umax/umin op, if we know that the upper half is all sign bits, then we can perform the op on the lower half and then sign extend the result to the upper half. Alive2: https://alive2.llvm.org/ce/z/rk8Rfd Fixes #58630
1 parent 7c44b48 commit 69d117e

File tree

5 files changed

+110
-256
lines changed

5 files changed

+110
-256
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2870,15 +2870,29 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
28702870
ISD::CondCode CondC;
28712871
std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
28722872

2873+
SDValue LHS = N->getOperand(0);
2874+
SDValue RHS = N->getOperand(1);
2875+
28732876
// Expand the subcomponents.
28742877
SDValue LHSL, LHSH, RHSL, RHSH;
2875-
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
2876-
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
2878+
GetExpandedInteger(LHS, LHSL, LHSH);
2879+
GetExpandedInteger(RHS, RHSL, RHSH);
28772880

28782881
// Value types
28792882
EVT NVT = LHSL.getValueType();
28802883
EVT CCT = getSetCCResultType(NVT);
28812884

2885+
// If the upper halves are all sign bits, then we can perform the MINMAX on
2886+
// the lower half and sign-extend the result to the upper half.
2887+
unsigned NumHalfBits = NVT.getScalarSizeInBits();
2888+
if (DAG.ComputeNumSignBits(LHS) > NumHalfBits &&
2889+
DAG.ComputeNumSignBits(RHS) > NumHalfBits) {
2890+
Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL);
2891+
Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo,
2892+
DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL));
2893+
return;
2894+
}
2895+
28822896
// Hi part is always the same op
28832897
Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
28842898

llvm/test/CodeGen/X86/smax.ll

Lines changed: 24 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -718,23 +718,12 @@ define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
718718
;
719719
; X86-LABEL: test_signbits_i64:
720720
; X86: # %bb.0:
721-
; X86-NEXT: pushl %edi
722-
; X86-NEXT: pushl %esi
723721
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
724722
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
725-
; X86-NEXT: movl %ecx, %esi
726-
; X86-NEXT: sarl $31, %esi
727-
; X86-NEXT: movl %eax, %edx
728-
; X86-NEXT: sarl $31, %edx
729723
; X86-NEXT: cmpl %eax, %ecx
730-
; X86-NEXT: movl %eax, %edi
731-
; X86-NEXT: cmoval %ecx, %edi
732-
; X86-NEXT: cmpl %edx, %esi
733724
; X86-NEXT: cmovgl %ecx, %eax
734-
; X86-NEXT: cmovel %edi, %eax
735-
; X86-NEXT: cmovgl %esi, %edx
736-
; X86-NEXT: popl %esi
737-
; X86-NEXT: popl %edi
725+
; X86-NEXT: movl %eax, %edx
726+
; X86-NEXT: sarl $31, %edx
738727
; X86-NEXT: retl
739728
%ax = ashr i64 %a, 32
740729
%bx = ashr i64 %b, 32
@@ -745,70 +734,41 @@ define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
745734
define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
746735
; X64-LABEL: test_signbits_i128:
747736
; X64: # %bb.0:
748-
; X64-NEXT: movq %rsi, %rdi
749-
; X64-NEXT: sarq $63, %rdi
750-
; X64-NEXT: movq %rcx, %rdx
751-
; X64-NEXT: sarq $63, %rdx
752-
; X64-NEXT: sarq $28, %rcx
753-
; X64-NEXT: cmpq %rcx, %rsi
754737
; X64-NEXT: movq %rcx, %rax
755-
; X64-NEXT: cmovaq %rsi, %rax
756-
; X64-NEXT: cmpq %rdx, %rdi
757-
; X64-NEXT: cmovgq %rsi, %rcx
758-
; X64-NEXT: cmovneq %rcx, %rax
759-
; X64-NEXT: cmovgq %rdi, %rdx
738+
; X64-NEXT: sarq $28, %rax
739+
; X64-NEXT: cmpq %rax, %rsi
740+
; X64-NEXT: cmovgq %rsi, %rax
741+
; X64-NEXT: movq %rax, %rdx
742+
; X64-NEXT: sarq $63, %rdx
760743
; X64-NEXT: retq
761744
;
762745
; X86-LABEL: test_signbits_i128:
763746
; X86: # %bb.0:
764-
; X86-NEXT: pushl %ebp
765747
; X86-NEXT: pushl %ebx
766748
; X86-NEXT: pushl %edi
767749
; X86-NEXT: pushl %esi
768-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
769750
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
770-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
771-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
772-
; X86-NEXT: sarl $31, %ebx
773-
; X86-NEXT: movl %eax, %edx
774-
; X86-NEXT: shrdl $28, %eax, %ecx
775-
; X86-NEXT: sarl $31, %eax
776-
; X86-NEXT: sarl $28, %edx
777-
; X86-NEXT: cmpl %ecx, %ebp
778-
; X86-NEXT: movl %ecx, %edi
779-
; X86-NEXT: cmoval %ebp, %edi
780-
; X86-NEXT: movl %ebp, %esi
781-
; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
782-
; X86-NEXT: movl %ecx, %ebp
783-
; X86-NEXT: cmoval %esi, %ebp
784-
; X86-NEXT: cmovel %edi, %ebp
785-
; X86-NEXT: movl %edx, %edi
786-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
787-
; X86-NEXT: cmoval %esi, %edi
788-
; X86-NEXT: cmpl %ebx, %eax
789-
; X86-NEXT: movl %eax, %esi
790-
; X86-NEXT: sbbl %ebx, %esi
791-
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
792-
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
793-
; X86-NEXT: movl %ebx, %esi
794-
; X86-NEXT: xorl %eax, %esi
795-
; X86-NEXT: cmovel %ebp, %ecx
796-
; X86-NEXT: cmovel %edi, %edx
797-
; X86-NEXT: cmpl %eax, %ebx
798-
; X86-NEXT: movl %eax, %edi
799-
; X86-NEXT: cmoval %ebx, %edi
800-
; X86-NEXT: cmovgl %ebx, %eax
801751
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
802-
; X86-NEXT: cmovnel %eax, %edi
803-
; X86-NEXT: movl %eax, 12(%esi)
804-
; X86-NEXT: movl %edi, 8(%esi)
805-
; X86-NEXT: movl %edx, 4(%esi)
806-
; X86-NEXT: movl %ecx, (%esi)
807-
; X86-NEXT: movl %esi, %eax
752+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
753+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
754+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
755+
; X86-NEXT: shrdl $28, %edi, %ecx
756+
; X86-NEXT: sarl $28, %edi
757+
; X86-NEXT: cmpl %ecx, %esi
758+
; X86-NEXT: movl %ecx, %ebx
759+
; X86-NEXT: cmoval %esi, %ebx
760+
; X86-NEXT: cmpl %edi, %edx
761+
; X86-NEXT: cmovgl %esi, %ecx
762+
; X86-NEXT: cmovel %ebx, %ecx
763+
; X86-NEXT: cmovgl %edx, %edi
764+
; X86-NEXT: movl %edi, 4(%eax)
765+
; X86-NEXT: sarl $31, %edi
766+
; X86-NEXT: movl %edi, 12(%eax)
767+
; X86-NEXT: movl %edi, 8(%eax)
768+
; X86-NEXT: movl %ecx, (%eax)
808769
; X86-NEXT: popl %esi
809770
; X86-NEXT: popl %edi
810771
; X86-NEXT: popl %ebx
811-
; X86-NEXT: popl %ebp
812772
; X86-NEXT: retl $4
813773
%ax = ashr i128 %a, 64
814774
%bx = ashr i128 %b, 92

llvm/test/CodeGen/X86/smin.ll

Lines changed: 24 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -717,23 +717,12 @@ define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
717717
;
718718
; X86-LABEL: test_signbits_i64:
719719
; X86: # %bb.0:
720-
; X86-NEXT: pushl %edi
721-
; X86-NEXT: pushl %esi
722720
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
723721
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
724-
; X86-NEXT: movl %ecx, %esi
725-
; X86-NEXT: sarl $31, %esi
726-
; X86-NEXT: movl %eax, %edx
727-
; X86-NEXT: sarl $31, %edx
728722
; X86-NEXT: cmpl %eax, %ecx
729-
; X86-NEXT: movl %eax, %edi
730-
; X86-NEXT: cmovbl %ecx, %edi
731-
; X86-NEXT: cmpl %edx, %esi
732723
; X86-NEXT: cmovll %ecx, %eax
733-
; X86-NEXT: cmovel %edi, %eax
734-
; X86-NEXT: cmovll %esi, %edx
735-
; X86-NEXT: popl %esi
736-
; X86-NEXT: popl %edi
724+
; X86-NEXT: movl %eax, %edx
725+
; X86-NEXT: sarl $31, %edx
737726
; X86-NEXT: retl
738727
%ax = ashr i64 %a, 32
739728
%bx = ashr i64 %b, 32
@@ -744,74 +733,41 @@ define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
744733
define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
745734
; X64-LABEL: test_signbits_i128:
746735
; X64: # %bb.0:
747-
; X64-NEXT: movq %rsi, %rdi
748-
; X64-NEXT: sarq $63, %rdi
749-
; X64-NEXT: movq %rcx, %rdx
750-
; X64-NEXT: sarq $63, %rdx
751-
; X64-NEXT: sarq $28, %rcx
752-
; X64-NEXT: cmpq %rcx, %rsi
753736
; X64-NEXT: movq %rcx, %rax
754-
; X64-NEXT: cmovbq %rsi, %rax
755-
; X64-NEXT: cmpq %rdx, %rdi
756-
; X64-NEXT: cmovlq %rsi, %rcx
757-
; X64-NEXT: cmovneq %rcx, %rax
758-
; X64-NEXT: cmovlq %rdi, %rdx
737+
; X64-NEXT: sarq $28, %rax
738+
; X64-NEXT: cmpq %rax, %rsi
739+
; X64-NEXT: cmovlq %rsi, %rax
740+
; X64-NEXT: movq %rax, %rdx
741+
; X64-NEXT: sarq $63, %rdx
759742
; X64-NEXT: retq
760743
;
761744
; X86-LABEL: test_signbits_i128:
762745
; X86: # %bb.0:
763-
; X86-NEXT: pushl %ebp
764746
; X86-NEXT: pushl %ebx
765747
; X86-NEXT: pushl %edi
766748
; X86-NEXT: pushl %esi
767-
; X86-NEXT: subl $8, %esp
749+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
750+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
751+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
768752
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
769-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
770753
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
771-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
772-
; X86-NEXT: movl %edx, %eax
773-
; X86-NEXT: movl %edx, %ebp
774-
; X86-NEXT: sarl $31, %eax
775-
; X86-NEXT: movl %ebx, %edx
776-
; X86-NEXT: shrdl $28, %ebx, %ecx
777-
; X86-NEXT: sarl $31, %ebx
778-
; X86-NEXT: sarl $28, %edx
779-
; X86-NEXT: cmpl %ecx, %edi
780-
; X86-NEXT: movl %ecx, %esi
781-
; X86-NEXT: cmovbl %edi, %esi
782-
; X86-NEXT: cmpl %edx, %ebp
783-
; X86-NEXT: movl %ecx, %ebp
784-
; X86-NEXT: cmovbl %edi, %ebp
785-
; X86-NEXT: cmovel %esi, %ebp
786-
; X86-NEXT: movl %edx, %esi
787-
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
788-
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
789-
; X86-NEXT: cmpl %ebx, %eax
790-
; X86-NEXT: movl %ebx, %edi
791-
; X86-NEXT: cmovbl %eax, %edi
792-
; X86-NEXT: movl %ebx, %esi
793-
; X86-NEXT: cmovll %eax, %esi
794-
; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
795-
; X86-NEXT: cmovnel %esi, %edi
796-
; X86-NEXT: movl %eax, %esi
797-
; X86-NEXT: sbbl %ebx, %esi
798-
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
799-
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
800-
; X86-NEXT: xorl %eax, %ebx
801-
; X86-NEXT: cmovel %ebp, %ecx
802-
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
803-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
804-
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
805-
; X86-NEXT: movl %eax, 12(%esi)
806-
; X86-NEXT: movl %edi, 8(%esi)
807-
; X86-NEXT: movl %edx, 4(%esi)
808-
; X86-NEXT: movl %ecx, (%esi)
809-
; X86-NEXT: movl %esi, %eax
810-
; X86-NEXT: addl $8, %esp
754+
; X86-NEXT: shrdl $28, %edi, %ecx
755+
; X86-NEXT: sarl $28, %edi
756+
; X86-NEXT: cmpl %ecx, %esi
757+
; X86-NEXT: movl %ecx, %ebx
758+
; X86-NEXT: cmovbl %esi, %ebx
759+
; X86-NEXT: cmpl %edi, %edx
760+
; X86-NEXT: cmovll %esi, %ecx
761+
; X86-NEXT: cmovel %ebx, %ecx
762+
; X86-NEXT: cmovll %edx, %edi
763+
; X86-NEXT: movl %edi, 4(%eax)
764+
; X86-NEXT: sarl $31, %edi
765+
; X86-NEXT: movl %edi, 12(%eax)
766+
; X86-NEXT: movl %edi, 8(%eax)
767+
; X86-NEXT: movl %ecx, (%eax)
811768
; X86-NEXT: popl %esi
812769
; X86-NEXT: popl %edi
813770
; X86-NEXT: popl %ebx
814-
; X86-NEXT: popl %ebp
815771
; X86-NEXT: retl $4
816772
%ax = ashr i128 %a, 64
817773
%bx = ashr i128 %b, 92

llvm/test/CodeGen/X86/umax.ll

Lines changed: 23 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -726,23 +726,12 @@ define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
726726
;
727727
; X86-LABEL: test_signbits_i64:
728728
; X86: # %bb.0:
729-
; X86-NEXT: pushl %edi
730-
; X86-NEXT: pushl %esi
731729
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
732730
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
733-
; X86-NEXT: movl %ecx, %esi
734-
; X86-NEXT: sarl $31, %esi
735-
; X86-NEXT: movl %eax, %edx
736-
; X86-NEXT: sarl $31, %edx
737731
; X86-NEXT: cmpl %eax, %ecx
738-
; X86-NEXT: movl %eax, %edi
739-
; X86-NEXT: cmoval %ecx, %edi
740-
; X86-NEXT: cmpl %edx, %esi
741732
; X86-NEXT: cmoval %ecx, %eax
742-
; X86-NEXT: cmovel %edi, %eax
743-
; X86-NEXT: cmoval %esi, %edx
744-
; X86-NEXT: popl %esi
745-
; X86-NEXT: popl %edi
733+
; X86-NEXT: movl %eax, %edx
734+
; X86-NEXT: sarl $31, %edx
746735
; X86-NEXT: retl
747736
%ax = ashr i64 %a, 32
748737
%bx = ashr i64 %b, 32
@@ -753,67 +742,41 @@ define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
753742
define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
754743
; X64-LABEL: test_signbits_i128:
755744
; X64: # %bb.0:
756-
; X64-NEXT: movq %rsi, %rdi
757-
; X64-NEXT: sarq $63, %rdi
758-
; X64-NEXT: movq %rcx, %rdx
759-
; X64-NEXT: sarq $63, %rdx
760-
; X64-NEXT: sarq $28, %rcx
761-
; X64-NEXT: cmpq %rcx, %rsi
762745
; X64-NEXT: movq %rcx, %rax
746+
; X64-NEXT: sarq $28, %rax
747+
; X64-NEXT: cmpq %rax, %rsi
763748
; X64-NEXT: cmovaq %rsi, %rax
764-
; X64-NEXT: cmpq %rdx, %rdi
765-
; X64-NEXT: cmovaq %rsi, %rcx
766-
; X64-NEXT: cmovneq %rcx, %rax
767-
; X64-NEXT: cmovaq %rdi, %rdx
749+
; X64-NEXT: movq %rax, %rdx
750+
; X64-NEXT: sarq $63, %rdx
768751
; X64-NEXT: retq
769752
;
770753
; X86-LABEL: test_signbits_i128:
771754
; X86: # %bb.0:
772-
; X86-NEXT: pushl %ebp
773755
; X86-NEXT: pushl %ebx
774756
; X86-NEXT: pushl %edi
775757
; X86-NEXT: pushl %esi
776-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
777758
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
778-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
779-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
780-
; X86-NEXT: sarl $31, %ebx
781-
; X86-NEXT: movl %eax, %edx
782-
; X86-NEXT: shrdl $28, %eax, %ecx
783-
; X86-NEXT: sarl $31, %eax
784-
; X86-NEXT: sarl $28, %edx
785-
; X86-NEXT: cmpl %ecx, %ebp
786-
; X86-NEXT: movl %ecx, %edi
787-
; X86-NEXT: cmoval %ebp, %edi
788-
; X86-NEXT: movl %ebp, %esi
789-
; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
790-
; X86-NEXT: movl %ecx, %ebp
791-
; X86-NEXT: cmoval %esi, %ebp
792-
; X86-NEXT: cmovel %edi, %ebp
793-
; X86-NEXT: movl %edx, %edi
794759
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
795-
; X86-NEXT: cmoval %esi, %edi
796-
; X86-NEXT: cmpl %ebx, %eax
797-
; X86-NEXT: movl %eax, %esi
798-
; X86-NEXT: sbbl %ebx, %esi
799-
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx
800-
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
801-
; X86-NEXT: movl %ebx, %esi
802-
; X86-NEXT: xorl %eax, %esi
803-
; X86-NEXT: cmovel %ebp, %ecx
804-
; X86-NEXT: cmovel %edi, %edx
805-
; X86-NEXT: cmpl %eax, %ebx
806-
; X86-NEXT: cmoval %ebx, %eax
807-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
808-
; X86-NEXT: movl %eax, 12(%esi)
809-
; X86-NEXT: movl %eax, 8(%esi)
810-
; X86-NEXT: movl %edx, 4(%esi)
811-
; X86-NEXT: movl %ecx, (%esi)
812-
; X86-NEXT: movl %esi, %eax
760+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
761+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
762+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
763+
; X86-NEXT: shrdl $28, %edi, %ecx
764+
; X86-NEXT: sarl $28, %edi
765+
; X86-NEXT: cmpl %ecx, %esi
766+
; X86-NEXT: movl %ecx, %ebx
767+
; X86-NEXT: cmoval %esi, %ebx
768+
; X86-NEXT: cmpl %edi, %edx
769+
; X86-NEXT: cmoval %esi, %ecx
770+
; X86-NEXT: cmovel %ebx, %ecx
771+
; X86-NEXT: cmoval %edx, %edi
772+
; X86-NEXT: movl %edi, 4(%eax)
773+
; X86-NEXT: sarl $31, %edi
774+
; X86-NEXT: movl %edi, 12(%eax)
775+
; X86-NEXT: movl %edi, 8(%eax)
776+
; X86-NEXT: movl %ecx, (%eax)
813777
; X86-NEXT: popl %esi
814778
; X86-NEXT: popl %edi
815779
; X86-NEXT: popl %ebx
816-
; X86-NEXT: popl %ebp
817780
; X86-NEXT: retl $4
818781
%ax = ashr i128 %a, 64
819782
%bx = ashr i128 %b, 92

0 commit comments

Comments
 (0)