From 3572fd8f59cfcc18cd57da2fe8ca009078bd8fe9 Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Sun, 30 Mar 2025 07:45:47 +0200 Subject: [PATCH 1/2] [X86] Add Tests for Distributing AND/XOR over SELECT; NFC --- llvm/test/CodeGen/X86/bmi-select-distrib.ll | 1027 +++++++++++++++++++ 1 file changed, 1027 insertions(+) create mode 100644 llvm/test/CodeGen/X86/bmi-select-distrib.ll diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll new file mode 100644 index 0000000000000..b5a8e7c457be3 --- /dev/null +++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll @@ -0,0 +1,1027 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+sse2,+bmi | FileCheck %s --check-prefixes=X86,X86-BMI +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+sse2,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-BMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-BMI2 + +define i32 @and_select_neg_to_blsi1(i1 %a0, i32 inreg %a1) nounwind { +; PR131587 +; X86-LABEL: and_select_neg_to_blsi1: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: negl %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_to_blsi1: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 0, %a1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +define i32 @and_select_neg_to_blsi2(i1 %a0, i32 inreg %a1) nounwind { +; PR131587 +; X86-LABEL: and_select_neg_to_blsi2: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: negl %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_to_blsi2: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 0, %a1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %bls, %a1 + ret i32 %ret +} + +define i32 @and_select_neg_to_blsi3(i1 %a0, i32 inreg %a1) nounwind { +; PR131587 +; X86-LABEL: and_select_neg_to_blsi3: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: negl %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: negb %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_to_blsi3: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: negb %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 0, %a1 + %bls = select i1 %a0, i32 -1, i32 %sub + %ret = and i32 %a1, %bls + ret i32 %ret +} + +define i64 @and_select_neg_to_blsi_i64(i1 %a0, i64 %a1) nounwind { +; PR131587 +; X86-LABEL: and_select_neg_to_blsi_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: andb $1, %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: cmpb $1, %bl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %edi, %eax +; X86-NEXT: andl %esi, %edx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_to_blsi_i64: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: negq %rcx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbq %rax, %rax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: retq + %sub = sub i64 0, %a1 + %bls = select i1 %a0, i64 %sub, i64 -1 + %ret = and i64 %a1, %bls + ret i64 %ret +} + +; The transformation also applies to the following test although we do not emit a BLSI instruction +; here because the negation instruction is used more than once. +define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: and_neg_select_pos_i32: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: negl %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %esi, %eax +; X86-NEXT: andl %edx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_neg_select_pos_i32: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: retq + %sub = sub i32 0, %a1 + %bls = select i1 %a0, i32 %a1, i32 -1 + %ret = and i32 %sub, %bls + ret i32 %ret +} + +; Negative test +define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind { +; X86-LABEL: and_select_neg_i16: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: negl %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: andl %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_i16: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %sub = sub i16 0, %a1 + %bls = select i1 %a0, i16 %sub, i16 -1 + %ret = and i16 %a1, %bls + ret i16 %ret +} + +; Negative test +define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind { +; X86-LABEL: and_select_neg_v4xi32: +; X86: # %bb.0: +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: jne .LBB6_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-NEXT: pand %xmm1, %xmm0 +; X86-NEXT: retl +; X86-NEXT: .LBB6_1: +; X86-NEXT: pxor %xmm1, %xmm1 +; X86-NEXT: psubd %xmm0, %xmm1 +; X86-NEXT: pand %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_v4xi32: +; X64: # %bb.0: +; X64-NEXT: testb $1, %dil +; X64-NEXT: jne .LBB6_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: pand %xmm1, %xmm0 +; X64-NEXT: retq +; X64-NEXT: .LBB6_1: +; X64-NEXT: pxor %xmm1, %xmm1 +; X64-NEXT: psubd %xmm0, %xmm1 +; X64-NEXT: pand %xmm1, %xmm0 +; X64-NEXT: retq + %sub = sub <4 x i32> zeroinitializer, %a1 + %bls = select i1 %a0, <4 x i32> %sub, <4 x i32> + %ret = and <4 x i32> %a1, %bls + ret <4 x i32> %ret +} + +; Negative test +define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: and_select_no_neg: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: andl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: and_select_no_neg: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 %a1, 0 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +; Negative test +define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: and_select_neg_wrong_const: +; X86: # %bb.0: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, %edx +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: andl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_wrong_const: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: testb $1, %dil +; X64-NEXT: movl $1, %eax +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 0, %a1 + %bls = select i1 %a0, i32 %sub, i32 1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +; Negative test +define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind { +; X86-LABEL: and_select_neg_different_op: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: negl %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_neg_different_op: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: negl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %edx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 0, %a2 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +define i32 @and_select_sub_1_to_blsr1(i1 %a0, i32 inreg %a1) nounwind { +; PR133848 +; X86-LABEL: and_select_sub_1_to_blsr1: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_to_blsr1: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +define i32 @and_select_sub_1_to_blsr2(i1 %a0, i32 inreg %a1) nounwind { +; PR133848 +; X86-LABEL: and_select_sub_1_to_blsr2: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_to_blsr2: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %bls, %a1 + ret i32 %ret +} + +define i32 @and_select_sub_1_to_blsr3(i1 %a0, i32 inreg %a1) nounwind { +; PR133848 +; X86-LABEL: and_select_sub_1_to_blsr3: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: negb %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_to_blsr3: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: negb %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 -1, i32 %sub + %ret = and i32 %a1, %bls + ret i32 %ret +} + +define i32 @and_select_sub_1_to_blsr4(i1 %a0, i32 inreg %a1) nounwind { +; PR133848 +; X86-LABEL: and_select_sub_1_to_blsr4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_to_blsr4: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 %a1, 1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +; The transformation also applies to the following test although we do not emit a BLSR instruction +; here because the subtraction instruction is used more than once. +define i32 @and_sub_1_select_orig(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: and_sub_1_select_orig: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %esi, %eax +; X86-NEXT: andl %edx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_sub_1_select_orig: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %a1, i32 -1 + %ret = and i32 %sub, %bls + ret i32 %ret +} + +define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind { +; PR133848 +; X86-LABEL: and_select_sub_1_to_blsr_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: andb $1, %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: addl $-1, %edi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpb $1, %bl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %edi, %eax +; X86-NEXT: andl %esi, %edx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_to_blsr_i64: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: leaq -1(%rsi), %rcx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbq %rax, %rax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: retq + %sub = add i64 %a1, -1 + %bls = select i1 %a0, i64 %sub, i64 -1 + %ret = and i64 %a1, %bls + ret i64 %ret +} + +; Negative test +define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind { +; X86-LABEL: and_select_sub_1_i16: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: leal -1(%edx), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: andl %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_i16: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %sub = add i16 %a1, -1 + %bls = select i1 %a0, i16 %sub, i16 -1 + %ret = and i16 %a1, %bls + ret i16 %ret +} + +; Negative test +define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind { +; X86-LABEL: and_select_sub_1_v4xi32: +; X86: # %bb.0: +; X86-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB17_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: paddd %xmm0, %xmm1 +; X86-NEXT: .LBB17_2: +; X86-NEXT: pand %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_v4xi32: +; X64: # %bb.0: +; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: testb $1, %dil +; X64-NEXT: je .LBB17_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: paddd %xmm0, %xmm1 +; X64-NEXT: .LBB17_2: +; X64-NEXT: pand %xmm1, %xmm0 +; X64-NEXT: retq + %sub = add <4 x i32> %a1, + %bls = select i1 %a0, <4 x i32> %sub, <4 x i32> + %ret = and <4 x i32> %a1, %bls + ret <4 x i32> %ret +} + +; Negative test +define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: and_select_no_sub_1: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -2(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_no_sub_1: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -2(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -2 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +; Negative test +define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: and_select_sub_1_wrong_const: +; X86: # %bb.0: +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, %edx +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: andl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_wrong_const: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: testb $1, %dil +; X64-NEXT: movl $1, %eax +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %sub, i32 1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +; Negative test +define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind { +; X86-LABEL: and_select_sub_1_different_op: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: decl %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: and_select_sub_1_different_op: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edx killed $edx def $rdx +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rdx), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a2, -1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = and i32 %a1, %bls + ret i32 %ret +} + +define i32 @xor_select_sub_1_to_blsmsk1(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_select_sub_1_to_blsmsk1: +; X86: # %bb.0: +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_to_blsmsk1: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %sub, i32 0 + %ret = xor i32 %a1, %bls + ret i32 %ret +} + +define i32 @xor_select_sub_1_to_blsmsk2(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_select_sub_1_to_blsmsk2: +; X86: # %bb.0: +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_to_blsmsk2: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %sub, i32 0 + %ret = xor i32 %bls, %a1 + ret i32 %ret +} + +define i32 @xor_select_sub_1_to_blsmsk3(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_select_sub_1_to_blsmsk3: +; X86: # %bb.0: +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_to_blsmsk3: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 0, i32 %sub + %ret = xor i32 %a1, %bls + ret i32 %ret +} + +define i32 @xor_select_sub_1_to_blsmsk4(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_select_sub_1_to_blsmsk4: +; X86: # %bb.0: +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_to_blsmsk4: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = sub i32 %a1, 1 + %bls = select i1 %a0, i32 %sub, i32 0 + %ret = xor i32 %a1, %bls + ret i32 %ret +} + +; The transformation also applies to the following test although we do not emit a BLSMSK instruction +; here because the subtraction instruction is used more than once. +define i32 @xor_sub_1_select_orig(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_sub_1_select_orig: +; X86: # %bb.0: +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_sub_1_select_orig: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %esi, %eax +; X64-NEXT: xorl %ecx, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %a1, i32 0 + %ret = xor i32 %sub, %bls + ret i32 %ret +} + +define i64 @xor_select_sub_1_to_blsmsk_i64(i1 %a0, i64 %a1) nounwind { +; X86-LABEL: xor_select_sub_1_to_blsmsk_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $-1, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: cmovel %edi, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: xorl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_to_blsmsk_i64: +; X64: # %bb.0: +; X64-NEXT: leaq -1(%rsi), %rcx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovneq %rcx, %rax +; X64-NEXT: xorq %rsi, %rax +; X64-NEXT: retq + %sub = add i64 %a1, -1 + %bls = select i1 %a0, i64 %sub, i64 0 + %ret = xor i64 %a1, %bls + ret i64 %ret +} + +; Negative test +define i16 @xor_select_sub_1_i16(i1 %a0, i16 %a1) nounwind { +; X86-LABEL: xor_select_sub_1_i16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal -1(%ecx), %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %edx, %eax +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_i16: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %sub = add i16 %a1, -1 + %bls = select i1 %a0, i16 %sub, i16 0 + %ret = xor i16 %a1, %bls + ret i16 %ret +} + +; Negative test +define <4 x i32> @xor_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind { +; X86-LABEL: xor_select_sub_1_v4xi32: +; X86: # %bb.0: +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: jne .LBB28_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: xorps %xmm1, %xmm0 +; X86-NEXT: retl +; X86-NEXT: .LBB28_1: +; X86-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-NEXT: paddd %xmm0, %xmm1 +; X86-NEXT: pxor %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_v4xi32: +; X64: # %bb.0: +; X64-NEXT: testb $1, %dil +; X64-NEXT: jne .LBB28_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: xorps %xmm1, %xmm0 +; X64-NEXT: retq +; X64-NEXT: .LBB28_1: +; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: paddd %xmm0, %xmm1 +; X64-NEXT: pxor %xmm1, %xmm0 +; X64-NEXT: retq + %sub = add <4 x i32> %a1, + %bls = select i1 %a0, <4 x i32> %sub, <4 x i32> zeroinitializer + %ret = xor <4 x i32> %a1, %bls + ret <4 x i32> %ret +} + +; Negative test +define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_select_no_sub_1: +; X86: # %bb.0: +; X86-NEXT: leal 1(%eax), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_no_sub_1: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal 1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, 1 + %bls = select i1 %a0, i32 %sub, i32 0 + %ret = xor i32 %a1, %bls + ret i32 %ret +} + +; Negative test +define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind { +; X86-LABEL: xor_select_sub_1_wrong_const: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $1, %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpb $1, %cl +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: xorl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_wrong_const: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rsi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb $1, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a1, -1 + %bls = select i1 %a0, i32 %sub, i32 -1 + %ret = xor i32 %a1, %bls + ret i32 %ret +} + +; Negative test +define i32 @xor_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind { +; X86-LABEL: xor_select_sub_1_different_op: +; X86: # %bb.0: +; X86-NEXT: leal -1(%edx), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: xor_select_sub_1_different_op: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edx killed $edx def $rdx +; X64-NEXT: leal -1(%rdx), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: retq + %sub = add i32 %a2, -1 + %bls = select i1 %a0, i32 %sub, i32 0 + %ret = xor i32 %a1, %bls + ret i32 %ret +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; X64-BMI: {{.*}} +; X64-BMI2: {{.*}} +; X86-BMI: {{.*}} +; X86-BMI2: {{.*}} From 4ced124565fb19cf76aa39b6f2701b52e37bff9d Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Sun, 30 Mar 2025 08:58:00 +0200 Subject: [PATCH 2/2] [X86] Distribute Certain Bitwise Operations over SELECT InstCombine canonicalizes `(select P (and X (- X)) X)` to `(and (select P (- X) umax) X)`. This is counterproductive for the X86 backend when BMI is available because we can encode `(and X (- X))` using the `BLSI` instruction. A similar situation arises if we have `(select P (and X (sub X 1)) X)` (prevents use of `BLSR` instruction) or `(select P (xor X (sub X 1)) X)` (prevents use of `BLSMSK` instruction). Trigger the inverse transformation in the X86 backend if BMI is available and we can use the mentioned BMI instructions. This is done by adjusting the `shouldFoldSelectWithIdentityConstant()` implementation for the X86 backend. In this way, we get `(select P (and X (- X)) X)` again, which enables the use of `BLSI` (similar for the other cases described above). Alive proofs: https://alive2.llvm.org/ce/z/MT_pKi Fixes #131587, fixes #133848. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 23 +- llvm/test/CodeGen/X86/bmi-select-distrib.ll | 363 ++++++-------------- 2 files changed, 125 insertions(+), 261 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b71f49165c60..e761a91e689e3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35552,8 +35552,29 @@ bool X86TargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT, bool X86TargetLowering::shouldFoldSelectWithIdentityConstant( unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const { - if (SelectOpcode != ISD::VSELECT) + if (SelectOpcode == ISD::SELECT) { + if (VT.isVector()) + return false; + if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64)) + return false; + using namespace llvm::SDPatternMatch; + // BLSI + if (BinOpcode == ISD::AND && (sd_match(Y, m_Neg(m_Specific(X))) || + sd_match(X, m_Neg(m_Specific(Y))))) + return true; + // BLSR + if (BinOpcode == ISD::AND && + (sd_match(Y, m_Add(m_Specific(X), m_AllOnes())) || + sd_match(X, m_Add(m_Specific(Y), m_AllOnes())))) + return true; + // BLSMSK + if (BinOpcode == ISD::XOR && + (sd_match(Y, m_Add(m_Specific(X), m_AllOnes())) || + sd_match(X, m_Add(m_Specific(Y), m_AllOnes())))) + return true; + return false; + } // TODO: This is too general. There are cases where pre-AVX512 codegen would // benefit. The transform may also be profitable for scalar code. if (!Subtarget.hasAVX512()) diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll index b5a8e7c457be3..49beda516d508 100644 --- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll +++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll @@ -8,29 +8,16 @@ define i32 @and_select_neg_to_blsi1(i1 %a0, i32 inreg %a1) nounwind { ; PR131587 ; X86-LABEL: and_select_neg_to_blsi1: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: movl %eax, %edx -; X86-NEXT: negl %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsil %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_neg_to_blsi1: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: negl %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsil %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = sub i32 0, %a1 %bls = select i1 %a0, i32 %sub, i32 -1 @@ -42,29 +29,16 @@ define i32 @and_select_neg_to_blsi2(i1 %a0, i32 inreg %a1) nounwind { ; PR131587 ; X86-LABEL: and_select_neg_to_blsi2: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: movl %eax, %edx -; X86-NEXT: negl %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsil %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_neg_to_blsi2: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: negl %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsil %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = sub i32 0, %a1 %bls = select i1 %a0, i32 %sub, i32 -1 @@ -76,29 +50,16 @@ define i32 @and_select_neg_to_blsi3(i1 %a0, i32 inreg %a1) nounwind { ; PR131587 ; X86-LABEL: and_select_neg_to_blsi3: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: movl %eax, %edx -; X86-NEXT: negl %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: negb %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsil %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_neg_to_blsi3: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: negl %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: negb %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsil %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %esi, %eax ; X64-NEXT: retq %sub = sub i32 0, %a1 %bls = select i1 %a0, i32 -1, i32 %sub @@ -110,39 +71,26 @@ define i64 @and_select_neg_to_blsi_i64(i1 %a0, i64 %a1) nounwind { ; PR131587 ; X86-LABEL: and_select_neg_to_blsi_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: andb $1, %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: negl %edi -; X86-NEXT: movl $0, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax ; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: cmpb $1, %bl -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl %eax, %edx -; X86-NEXT: orl %edi, %eax ; X86-NEXT: andl %esi, %edx ; X86-NEXT: andl %ecx, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: and_select_neg_to_blsi_i64: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: movq %rsi, %rcx -; X64-NEXT: negq %rcx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbq %rax, %rax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: andq %rsi, %rax +; X64-NEXT: blsiq %rsi, %rax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmoveq %rsi, %rax ; X64-NEXT: retq %sub = sub i64 0, %a1 %bls = select i1 %a0, i64 %sub, i64 -1 @@ -155,29 +103,20 @@ define i64 @and_select_neg_to_blsi_i64(i1 %a0, i64 %a1) nounwind { define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: and_neg_select_pos_i32: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: movl %eax, %edx -; X86-NEXT: negl %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %esi, %eax -; X86-NEXT: andl %edx, %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_neg_select_pos_i32: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: negl %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %esi, %eax -; X64-NEXT: andl %ecx, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: andl %eax, %esi +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %esi, %eax ; X64-NEXT: retq %sub = sub i32 0, %a1 %bls = select i1 %a0, i32 %a1, i32 -1 @@ -348,28 +287,16 @@ define i32 @and_select_sub_1_to_blsr1(i1 %a0, i32 inreg %a1) nounwind { ; PR133848 ; X86-LABEL: and_select_sub_1_to_blsr1: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: leal -1(%eax), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsrl %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_sub_1_to_blsr1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: andb $1, %dil -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsrl %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 %sub, i32 -1 @@ -381,28 +308,16 @@ define i32 @and_select_sub_1_to_blsr2(i1 %a0, i32 inreg %a1) nounwind { ; PR133848 ; X86-LABEL: and_select_sub_1_to_blsr2: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: leal -1(%eax), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsrl %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_sub_1_to_blsr2: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: andb $1, %dil -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsrl %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 %sub, i32 -1 @@ -414,28 +329,16 @@ define i32 @and_select_sub_1_to_blsr3(i1 %a0, i32 inreg %a1) nounwind { ; PR133848 ; X86-LABEL: and_select_sub_1_to_blsr3: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: leal -1(%eax), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: negb %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsrl %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_sub_1_to_blsr3: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: andb $1, %dil -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: negb %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsrl %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 -1, i32 %sub @@ -447,28 +350,16 @@ define i32 @and_select_sub_1_to_blsr4(i1 %a0, i32 inreg %a1) nounwind { ; PR133848 ; X86-LABEL: and_select_sub_1_to_blsr4: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: leal -1(%eax), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: andl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: blsrl %eax, %ecx +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_select_sub_1_to_blsr4: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: andb $1, %dil -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: blsrl %esi, %eax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = sub i32 %a1, 1 %bls = select i1 %a0, i32 %sub, i32 -1 @@ -481,28 +372,19 @@ define i32 @and_select_sub_1_to_blsr4(i1 %a0, i32 inreg %a1) nounwind { define i32 @and_sub_1_select_orig(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: and_sub_1_select_orig: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andb $1, %cl -; X86-NEXT: leal -1(%eax), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpb $1, %cl -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %esi, %eax -; X86-NEXT: andl %edx, %eax -; X86-NEXT: popl %esi +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: and_sub_1_select_orig: ; X64: # %bb.0: ; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: andb $1, %dil -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl %esi, %eax -; X64-NEXT: andl %ecx, %eax +; X64-NEXT: leal -1(%rsi), %eax +; X64-NEXT: andl %eax, %esi +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmovnel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 %a1, i32 -1 @@ -514,38 +396,26 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind { ; PR133848 ; X86-LABEL: and_select_sub_1_to_blsr_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: andb $1, %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: addl $-1, %edi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %esi, %edx ; X86-NEXT: adcl $-1, %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpb $1, %bl -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl %eax, %edx -; X86-NEXT: orl %edi, %eax ; X86-NEXT: andl %esi, %edx ; X86-NEXT: andl %ecx, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: and_select_sub_1_to_blsr_i64: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: leaq -1(%rsi), %rcx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpb $1, %dil -; X64-NEXT: sbbq %rax, %rax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: andq %rsi, %rax +; X64-NEXT: blsrq %rsi, %rax +; X64-NEXT: testb $1, %dil +; X64-NEXT: cmoveq %rsi, %rax ; X64-NEXT: retq %sub = add i64 %a1, -1 %bls = select i1 %a0, i64 %sub, i64 -1 @@ -713,21 +583,16 @@ define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) define i32 @xor_select_sub_1_to_blsmsk1(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: xor_select_sub_1_to_blsmsk1: ; X86: # %bb.0: -; X86-NEXT: leal -1(%eax), %ecx -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: blsmskl %eax, %ecx ; X86-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %edx, %eax +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: xor_select_sub_1_to_blsmsk1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: blsmskl %esi, %eax ; X64-NEXT: testb $1, %dil -; X64-NEXT: cmovnel %ecx, %eax -; X64-NEXT: xorl %esi, %eax +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 %sub, i32 0 @@ -738,21 +603,16 @@ define i32 @xor_select_sub_1_to_blsmsk1(i1 %a0, i32 inreg %a1) nounwind { define i32 @xor_select_sub_1_to_blsmsk2(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: xor_select_sub_1_to_blsmsk2: ; X86: # %bb.0: -; X86-NEXT: leal -1(%eax), %ecx -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: blsmskl %eax, %ecx ; X86-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %edx, %eax +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: xor_select_sub_1_to_blsmsk2: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: blsmskl %esi, %eax ; X64-NEXT: testb $1, %dil -; X64-NEXT: cmovnel %ecx, %eax -; X64-NEXT: xorl %esi, %eax +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 %sub, i32 0 @@ -763,21 +623,16 @@ define i32 @xor_select_sub_1_to_blsmsk2(i1 %a0, i32 inreg %a1) nounwind { define i32 @xor_select_sub_1_to_blsmsk3(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: xor_select_sub_1_to_blsmsk3: ; X86: # %bb.0: -; X86-NEXT: leal -1(%eax), %ecx -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: blsmskl %eax, %ecx ; X86-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-NEXT: cmovel %ecx, %edx -; X86-NEXT: xorl %edx, %eax +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: xor_select_sub_1_to_blsmsk3: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: blsmskl %esi, %eax ; X64-NEXT: testb $1, %dil -; X64-NEXT: cmovel %ecx, %eax -; X64-NEXT: xorl %esi, %eax +; X64-NEXT: cmovnel %esi, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 0, i32 %sub @@ -788,21 +643,16 @@ define i32 @xor_select_sub_1_to_blsmsk3(i1 %a0, i32 inreg %a1) nounwind { define i32 @xor_select_sub_1_to_blsmsk4(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: xor_select_sub_1_to_blsmsk4: ; X86: # %bb.0: -; X86-NEXT: leal -1(%eax), %ecx -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: blsmskl %eax, %ecx ; X86-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %edx, %eax +; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: xor_select_sub_1_to_blsmsk4: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: blsmskl %esi, %eax ; X64-NEXT: testb $1, %dil -; X64-NEXT: cmovnel %ecx, %eax -; X64-NEXT: xorl %esi, %eax +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %sub = sub i32 %a1, 1 %bls = select i1 %a0, i32 %sub, i32 0 @@ -816,20 +666,18 @@ define i32 @xor_sub_1_select_orig(i1 %a0, i32 inreg %a1) nounwind { ; X86-LABEL: xor_sub_1_select_orig: ; X86: # %bb.0: ; X86-NEXT: leal -1(%eax), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-NEXT: cmovel %edx, %eax ; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: xor_sub_1_select_orig: ; X64: # %bb.0: ; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: leal -1(%rsi), %ecx -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: leal -1(%rsi), %eax +; X64-NEXT: xorl %eax, %esi ; X64-NEXT: testb $1, %dil ; X64-NEXT: cmovnel %esi, %eax -; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: retq %sub = add i32 %a1, -1 %bls = select i1 %a0, i32 %a1, i32 0 @@ -840,7 +688,6 @@ define i32 @xor_sub_1_select_orig(i1 %a0, i32 inreg %a1) nounwind { define i64 @xor_select_sub_1_to_blsmsk_i64(i1 %a0, i64 %a1) nounwind { ; X86-LABEL: xor_select_sub_1_to_blsmsk_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -848,23 +695,19 @@ define i64 @xor_select_sub_1_to_blsmsk_i64(i1 %a0, i64 %a1) nounwind { ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %esi, %edx ; X86-NEXT: adcl $-1, %edx -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-NEXT: cmovel %edi, %eax -; X86-NEXT: cmovel %edi, %edx -; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: xorl %esi, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: xor_select_sub_1_to_blsmsk_i64: ; X64: # %bb.0: -; X64-NEXT: leaq -1(%rsi), %rcx -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: blsmskq %rsi, %rax ; X64-NEXT: testb $1, %dil -; X64-NEXT: cmovneq %rcx, %rax -; X64-NEXT: xorq %rsi, %rax +; X64-NEXT: cmoveq %rsi, %rax ; X64-NEXT: retq %sub = add i64 %a1, -1 %bls = select i1 %a0, i64 %sub, i64 0