Skip to content

Commit 7c44b48

Browse files
committed
[X86] Add basic test coverage for Issue #58630
If we have sufficient sign bits, we should be able to expand the IMINMAX using only the lower half (and then sign-extend the result to the upper half)
1 parent 5a72df2 commit 7c44b48

File tree

4 files changed

+644
-0
lines changed

4 files changed

+644
-0
lines changed

llvm/test/CodeGen/X86/smax.ll

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,3 +654,164 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
654654
%r = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
655655
ret <16 x i8> %r
656656
}
657+
658+
define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
659+
; X64-LABEL: test_signbits_i16:
660+
; X64: # %bb.0:
661+
; X64-NEXT: movswl %si, %eax
662+
; X64-NEXT: movswl %di, %ecx
663+
; X64-NEXT: sarl $15, %ecx
664+
; X64-NEXT: sarl $8, %eax
665+
; X64-NEXT: cmpw %ax, %cx
666+
; X64-NEXT: cmovgl %ecx, %eax
667+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
668+
; X64-NEXT: retq
669+
;
670+
; X86-LABEL: test_signbits_i16:
671+
; X86: # %bb.0:
672+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
673+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
674+
; X86-NEXT: sarl $15, %eax
675+
; X86-NEXT: cmpw %cx, %ax
676+
; X86-NEXT: cmovlel %ecx, %eax
677+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
678+
; X86-NEXT: retl
679+
%ax = ashr i16 %a, 15
680+
%bx = ashr i16 %b, 8
681+
%r = call i16 @llvm.smax.i16(i16 %ax, i16 %bx)
682+
ret i16 %r
683+
}
684+
685+
define i32 @test_signbits_i32(i32 %a, i32 %b) nounwind {
686+
; X64-LABEL: test_signbits_i32:
687+
; X64: # %bb.0:
688+
; X64-NEXT: movl %esi, %eax
689+
; X64-NEXT: sarl $16, %edi
690+
; X64-NEXT: sarl $17, %eax
691+
; X64-NEXT: cmpl %eax, %edi
692+
; X64-NEXT: cmovgl %edi, %eax
693+
; X64-NEXT: retq
694+
;
695+
; X86-LABEL: test_signbits_i32:
696+
; X86: # %bb.0:
697+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
698+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
699+
; X86-NEXT: sarl $17, %eax
700+
; X86-NEXT: cmpl %eax, %ecx
701+
; X86-NEXT: cmovgl %ecx, %eax
702+
; X86-NEXT: retl
703+
%ax = ashr i32 %a, 16
704+
%bx = ashr i32 %b, 17
705+
%r = call i32 @llvm.smax.i32(i32 %ax, i32 %bx)
706+
ret i32 %r
707+
}
708+
709+
define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
710+
; X64-LABEL: test_signbits_i64:
711+
; X64: # %bb.0:
712+
; X64-NEXT: movq %rsi, %rax
713+
; X64-NEXT: sarq $32, %rdi
714+
; X64-NEXT: sarq $32, %rax
715+
; X64-NEXT: cmpq %rax, %rdi
716+
; X64-NEXT: cmovgq %rdi, %rax
717+
; X64-NEXT: retq
718+
;
719+
; X86-LABEL: test_signbits_i64:
720+
; X86: # %bb.0:
721+
; X86-NEXT: pushl %edi
722+
; X86-NEXT: pushl %esi
723+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
724+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
725+
; X86-NEXT: movl %ecx, %esi
726+
; X86-NEXT: sarl $31, %esi
727+
; X86-NEXT: movl %eax, %edx
728+
; X86-NEXT: sarl $31, %edx
729+
; X86-NEXT: cmpl %eax, %ecx
730+
; X86-NEXT: movl %eax, %edi
731+
; X86-NEXT: cmoval %ecx, %edi
732+
; X86-NEXT: cmpl %edx, %esi
733+
; X86-NEXT: cmovgl %ecx, %eax
734+
; X86-NEXT: cmovel %edi, %eax
735+
; X86-NEXT: cmovgl %esi, %edx
736+
; X86-NEXT: popl %esi
737+
; X86-NEXT: popl %edi
738+
; X86-NEXT: retl
739+
%ax = ashr i64 %a, 32
740+
%bx = ashr i64 %b, 32
741+
%r = call i64 @llvm.smax.i64(i64 %ax, i64 %bx)
742+
ret i64 %r
743+
}
744+
745+
define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
746+
; X64-LABEL: test_signbits_i128:
747+
; X64: # %bb.0:
748+
; X64-NEXT: movq %rsi, %rdi
749+
; X64-NEXT: sarq $63, %rdi
750+
; X64-NEXT: movq %rcx, %rdx
751+
; X64-NEXT: sarq $63, %rdx
752+
; X64-NEXT: sarq $28, %rcx
753+
; X64-NEXT: cmpq %rcx, %rsi
754+
; X64-NEXT: movq %rcx, %rax
755+
; X64-NEXT: cmovaq %rsi, %rax
756+
; X64-NEXT: cmpq %rdx, %rdi
757+
; X64-NEXT: cmovgq %rsi, %rcx
758+
; X64-NEXT: cmovneq %rcx, %rax
759+
; X64-NEXT: cmovgq %rdi, %rdx
760+
; X64-NEXT: retq
761+
;
762+
; X86-LABEL: test_signbits_i128:
763+
; X86: # %bb.0:
764+
; X86-NEXT: pushl %ebp
765+
; X86-NEXT: pushl %ebx
766+
; X86-NEXT: pushl %edi
767+
; X86-NEXT: pushl %esi
768+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
769+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
770+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
771+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
772+
; X86-NEXT: sarl $31, %ebx
773+
; X86-NEXT: movl %eax, %edx
774+
; X86-NEXT: shrdl $28, %eax, %ecx
775+
; X86-NEXT: sarl $31, %eax
776+
; X86-NEXT: sarl $28, %edx
777+
; X86-NEXT: cmpl %ecx, %ebp
778+
; X86-NEXT: movl %ecx, %edi
779+
; X86-NEXT: cmoval %ebp, %edi
780+
; X86-NEXT: movl %ebp, %esi
781+
; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
782+
; X86-NEXT: movl %ecx, %ebp
783+
; X86-NEXT: cmoval %esi, %ebp
784+
; X86-NEXT: cmovel %edi, %ebp
785+
; X86-NEXT: movl %edx, %edi
786+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
787+
; X86-NEXT: cmoval %esi, %edi
788+
; X86-NEXT: cmpl %ebx, %eax
789+
; X86-NEXT: movl %eax, %esi
790+
; X86-NEXT: sbbl %ebx, %esi
791+
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
792+
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
793+
; X86-NEXT: movl %ebx, %esi
794+
; X86-NEXT: xorl %eax, %esi
795+
; X86-NEXT: cmovel %ebp, %ecx
796+
; X86-NEXT: cmovel %edi, %edx
797+
; X86-NEXT: cmpl %eax, %ebx
798+
; X86-NEXT: movl %eax, %edi
799+
; X86-NEXT: cmoval %ebx, %edi
800+
; X86-NEXT: cmovgl %ebx, %eax
801+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
802+
; X86-NEXT: cmovnel %eax, %edi
803+
; X86-NEXT: movl %eax, 12(%esi)
804+
; X86-NEXT: movl %edi, 8(%esi)
805+
; X86-NEXT: movl %edx, 4(%esi)
806+
; X86-NEXT: movl %ecx, (%esi)
807+
; X86-NEXT: movl %esi, %eax
808+
; X86-NEXT: popl %esi
809+
; X86-NEXT: popl %edi
810+
; X86-NEXT: popl %ebx
811+
; X86-NEXT: popl %ebp
812+
; X86-NEXT: retl $4
813+
%ax = ashr i128 %a, 64
814+
%bx = ashr i128 %b, 92
815+
%r = call i128 @llvm.smax.i128(i128 %ax, i128 %bx)
816+
ret i128 %r
817+
}

llvm/test/CodeGen/X86/smin.ll

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,3 +653,168 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
653653
%r = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
654654
ret <16 x i8> %r
655655
}
656+
657+
define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
658+
; X64-LABEL: test_signbits_i16:
659+
; X64: # %bb.0:
660+
; X64-NEXT: movswl %si, %eax
661+
; X64-NEXT: movswl %di, %ecx
662+
; X64-NEXT: sarl $15, %ecx
663+
; X64-NEXT: sarl $8, %eax
664+
; X64-NEXT: cmpw %ax, %cx
665+
; X64-NEXT: cmovll %ecx, %eax
666+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
667+
; X64-NEXT: retq
668+
;
669+
; X86-LABEL: test_signbits_i16:
670+
; X86: # %bb.0:
671+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
672+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
673+
; X86-NEXT: sarl $15, %eax
674+
; X86-NEXT: cmpw %cx, %ax
675+
; X86-NEXT: cmovgel %ecx, %eax
676+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
677+
; X86-NEXT: retl
678+
%ax = ashr i16 %a, 15
679+
%bx = ashr i16 %b, 8
680+
%r = call i16 @llvm.smin.i16(i16 %ax, i16 %bx)
681+
ret i16 %r
682+
}
683+
684+
define i32 @test_signbits_i32(i32 %a, i32 %b) nounwind {
685+
; X64-LABEL: test_signbits_i32:
686+
; X64: # %bb.0:
687+
; X64-NEXT: movl %esi, %eax
688+
; X64-NEXT: sarl $16, %edi
689+
; X64-NEXT: sarl $17, %eax
690+
; X64-NEXT: cmpl %eax, %edi
691+
; X64-NEXT: cmovll %edi, %eax
692+
; X64-NEXT: retq
693+
;
694+
; X86-LABEL: test_signbits_i32:
695+
; X86: # %bb.0:
696+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
697+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
698+
; X86-NEXT: sarl $17, %eax
699+
; X86-NEXT: cmpl %eax, %ecx
700+
; X86-NEXT: cmovll %ecx, %eax
701+
; X86-NEXT: retl
702+
%ax = ashr i32 %a, 16
703+
%bx = ashr i32 %b, 17
704+
%r = call i32 @llvm.smin.i32(i32 %ax, i32 %bx)
705+
ret i32 %r
706+
}
707+
708+
define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
709+
; X64-LABEL: test_signbits_i64:
710+
; X64: # %bb.0:
711+
; X64-NEXT: movq %rsi, %rax
712+
; X64-NEXT: sarq $32, %rdi
713+
; X64-NEXT: sarq $32, %rax
714+
; X64-NEXT: cmpq %rax, %rdi
715+
; X64-NEXT: cmovlq %rdi, %rax
716+
; X64-NEXT: retq
717+
;
718+
; X86-LABEL: test_signbits_i64:
719+
; X86: # %bb.0:
720+
; X86-NEXT: pushl %edi
721+
; X86-NEXT: pushl %esi
722+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
723+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
724+
; X86-NEXT: movl %ecx, %esi
725+
; X86-NEXT: sarl $31, %esi
726+
; X86-NEXT: movl %eax, %edx
727+
; X86-NEXT: sarl $31, %edx
728+
; X86-NEXT: cmpl %eax, %ecx
729+
; X86-NEXT: movl %eax, %edi
730+
; X86-NEXT: cmovbl %ecx, %edi
731+
; X86-NEXT: cmpl %edx, %esi
732+
; X86-NEXT: cmovll %ecx, %eax
733+
; X86-NEXT: cmovel %edi, %eax
734+
; X86-NEXT: cmovll %esi, %edx
735+
; X86-NEXT: popl %esi
736+
; X86-NEXT: popl %edi
737+
; X86-NEXT: retl
738+
%ax = ashr i64 %a, 32
739+
%bx = ashr i64 %b, 32
740+
%r = call i64 @llvm.smin.i64(i64 %ax, i64 %bx)
741+
ret i64 %r
742+
}
743+
744+
define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
745+
; X64-LABEL: test_signbits_i128:
746+
; X64: # %bb.0:
747+
; X64-NEXT: movq %rsi, %rdi
748+
; X64-NEXT: sarq $63, %rdi
749+
; X64-NEXT: movq %rcx, %rdx
750+
; X64-NEXT: sarq $63, %rdx
751+
; X64-NEXT: sarq $28, %rcx
752+
; X64-NEXT: cmpq %rcx, %rsi
753+
; X64-NEXT: movq %rcx, %rax
754+
; X64-NEXT: cmovbq %rsi, %rax
755+
; X64-NEXT: cmpq %rdx, %rdi
756+
; X64-NEXT: cmovlq %rsi, %rcx
757+
; X64-NEXT: cmovneq %rcx, %rax
758+
; X64-NEXT: cmovlq %rdi, %rdx
759+
; X64-NEXT: retq
760+
;
761+
; X86-LABEL: test_signbits_i128:
762+
; X86: # %bb.0:
763+
; X86-NEXT: pushl %ebp
764+
; X86-NEXT: pushl %ebx
765+
; X86-NEXT: pushl %edi
766+
; X86-NEXT: pushl %esi
767+
; X86-NEXT: subl $8, %esp
768+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
769+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
770+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
771+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
772+
; X86-NEXT: movl %edx, %eax
773+
; X86-NEXT: movl %edx, %ebp
774+
; X86-NEXT: sarl $31, %eax
775+
; X86-NEXT: movl %ebx, %edx
776+
; X86-NEXT: shrdl $28, %ebx, %ecx
777+
; X86-NEXT: sarl $31, %ebx
778+
; X86-NEXT: sarl $28, %edx
779+
; X86-NEXT: cmpl %ecx, %edi
780+
; X86-NEXT: movl %ecx, %esi
781+
; X86-NEXT: cmovbl %edi, %esi
782+
; X86-NEXT: cmpl %edx, %ebp
783+
; X86-NEXT: movl %ecx, %ebp
784+
; X86-NEXT: cmovbl %edi, %ebp
785+
; X86-NEXT: cmovel %esi, %ebp
786+
; X86-NEXT: movl %edx, %esi
787+
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
788+
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
789+
; X86-NEXT: cmpl %ebx, %eax
790+
; X86-NEXT: movl %ebx, %edi
791+
; X86-NEXT: cmovbl %eax, %edi
792+
; X86-NEXT: movl %ebx, %esi
793+
; X86-NEXT: cmovll %eax, %esi
794+
; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
795+
; X86-NEXT: cmovnel %esi, %edi
796+
; X86-NEXT: movl %eax, %esi
797+
; X86-NEXT: sbbl %ebx, %esi
798+
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
799+
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
800+
; X86-NEXT: xorl %eax, %ebx
801+
; X86-NEXT: cmovel %ebp, %ecx
802+
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
803+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
804+
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
805+
; X86-NEXT: movl %eax, 12(%esi)
806+
; X86-NEXT: movl %edi, 8(%esi)
807+
; X86-NEXT: movl %edx, 4(%esi)
808+
; X86-NEXT: movl %ecx, (%esi)
809+
; X86-NEXT: movl %esi, %eax
810+
; X86-NEXT: addl $8, %esp
811+
; X86-NEXT: popl %esi
812+
; X86-NEXT: popl %edi
813+
; X86-NEXT: popl %ebx
814+
; X86-NEXT: popl %ebp
815+
; X86-NEXT: retl $4
816+
%ax = ashr i128 %a, 64
817+
%bx = ashr i128 %b, 92
818+
%r = call i128 @llvm.smin.i128(i128 %ax, i128 %bx)
819+
ret i128 %r
820+
}

0 commit comments

Comments
 (0)