diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll new file mode 100644 index 0000000000000..82e2daee60f82 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll @@ -0,0 +1,1123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define i32 @xmsk_eq_allzeros_i8(<32 x i8 > %a) { +; CHECK-LABEL: xmsk_eq_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvseqi.b $xr0, $xr0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 17 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 16 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 18 +; CHECK-NEXT: slli.d $a1, $a1, 17 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 19 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 18 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 19 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 21 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 20 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 22 +; CHECK-NEXT: slli.d $a1, $a1, 21 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 23 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 22 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 23 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 25 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 24 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 26 +; CHECK-NEXT: slli.d $a1, $a1, 25 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 27 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 26 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 27 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 29 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 28 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 30 +; CHECK-NEXT: slli.d $a1, $a1, 29 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 30 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 31 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp eq <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @xmsk_sgt_allones_i8(<32 x i8 > %a) { +; CHECK-LABEL: xmsk_sgt_allones_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 17 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 16 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 18 +; CHECK-NEXT: slli.d $a1, $a1, 17 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 19 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 18 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 19 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 21 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 20 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 22 +; CHECK-NEXT: slli.d $a1, $a1, 21 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 23 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 22 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 23 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 25 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 24 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 26 +; CHECK-NEXT: slli.d $a1, $a1, 25 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 27 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 26 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 27 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 29 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 28 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 30 +; CHECK-NEXT: slli.d $a1, $a1, 29 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 30 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 31 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp sgt <32 x i8> %a, splat (i8 -1) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @xmsk_sge_allzeros_i8(<32 x i8 > %a) { +; CHECK-LABEL: xmsk_sge_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvrepli.b $xr1, 0 +; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 17 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 16 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 18 +; CHECK-NEXT: slli.d $a1, $a1, 17 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 19 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 18 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 19 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 21 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 20 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 22 +; CHECK-NEXT: slli.d $a1, $a1, 21 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 23 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 22 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 23 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 25 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 24 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 26 +; CHECK-NEXT: slli.d $a1, $a1, 25 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 27 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 26 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 27 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 29 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 28 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 30 +; CHECK-NEXT: slli.d $a1, $a1, 29 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 30 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 31 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp sge <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @xmsk_slt_allzeros_i8(<32 x i8 > %a) { +; CHECK-LABEL: xmsk_slt_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvslti.b $xr0, $xr0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 17 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 16 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 18 +; CHECK-NEXT: slli.d $a1, $a1, 17 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 19 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 18 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 19 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 21 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 20 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 22 +; CHECK-NEXT: slli.d $a1, $a1, 21 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 23 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 22 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 23 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 25 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 24 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 26 +; CHECK-NEXT: slli.d $a1, $a1, 25 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 27 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 26 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 27 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 29 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 28 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 30 +; CHECK-NEXT: slli.d $a1, $a1, 29 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 30 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 31 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i16 @xmsk_slt_allzeros_i16(<16 x i16 > %a) { +; CHECK-LABEL: xmsk_slt_allzeros_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvslti.h $xr0, $xr0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.hu $a1, $sp, 18 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 8 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.hu $a1, $sp, 22 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 10 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.hu $a1, $sp, 26 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 12 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.h $a1, $sp, 30 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 14 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <16 x i16> %a, splat (i16 0) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i8 @xmsk_slt_allzeros_i32(<8 x i32 > %a) { +; CHECK-LABEL: xmsk_slt_allzeros_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xvslti.w $xr0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <8 x i32> %a, splat (i32 0) + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +define i4 @xmsk_slt_allzeros_i64(<4 x i64 > %a) { +; CHECK-LABEL: xmsk_slt_allzeros_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xvslti.d $xr0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 1 +; CHECK-NEXT: sub.d $a0, $a1, $a0 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3 +; CHECK-NEXT: slli.d $a1, $a1, 3 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 15 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <4 x i64> %a, splat (i64 0) + %2 = bitcast <4 x i1> %1 to i4 + ret i4 %2 +} + +define i32 @xmsk_sle_allones_i8(<32 x i8 > %a) { +; CHECK-LABEL: xmsk_sle_allones_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvslei.b $xr0, $xr0, -1 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 17 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 16 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 18 +; CHECK-NEXT: slli.d $a1, $a1, 17 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 19 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 18 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 19 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 21 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 20 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 22 +; CHECK-NEXT: slli.d $a1, $a1, 21 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 23 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 22 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 23 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 25 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 24 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 26 +; CHECK-NEXT: slli.d $a1, $a1, 25 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 27 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 26 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 27 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 29 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 28 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 30 +; CHECK-NEXT: slli.d $a1, $a1, 29 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 30 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 31 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <32 x i8> %a, splat (i8 -1) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i16 @xmsk_sle_allones_i32(<16 x i16 > %a) { +; CHECK-LABEL: xmsk_sle_allones_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvslei.h $xr0, $xr0, -1 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.hu $a1, $sp, 18 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 8 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.hu $a1, $sp, 22 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 10 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.hu $a1, $sp, 26 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 12 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.hu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.h $a1, $sp, 30 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 14 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <16 x i16> %a, splat (i16 -1) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i8 @xmsk_sle_allones_i16(<8 x i32 > %a) { +; CHECK-LABEL: xmsk_sle_allones_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xvslei.w $xr0, $xr0, -1 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <8 x i32> %a, splat (i32 -1) + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +define i4 @xmsk_sle_allones_i64(<4 x i64 > %a) { +; CHECK-LABEL: xmsk_sle_allones_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xvslei.d $xr0, $xr0, -1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 1 +; CHECK-NEXT: sub.d $a0, $a1, $a0 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3 +; CHECK-NEXT: slli.d $a1, $a1, 3 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 15 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <4 x i64> %a, splat (i64 -1) + %2 = bitcast <4 x i1> %1 to i4 + ret i4 %2 +} + +define i32 @xmsk_ne_allzeros_i8(<32 x i8 > %a) { +; CHECK-LABEL: xmsk_ne_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvseqi.b $xr0, $xr0, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 16 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 17 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 16 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 18 +; CHECK-NEXT: slli.d $a1, $a1, 17 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 19 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 18 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 20 +; CHECK-NEXT: slli.d $a1, $a1, 19 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 21 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 20 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 22 +; CHECK-NEXT: slli.d $a1, $a1, 21 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 23 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 22 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 24 +; CHECK-NEXT: slli.d $a1, $a1, 23 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 25 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 24 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 26 +; CHECK-NEXT: slli.d $a1, $a1, 25 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 27 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 26 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 28 +; CHECK-NEXT: slli.d $a1, $a1, 27 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.bu $a1, $sp, 29 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 28 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: ld.bu $a2, $sp, 30 +; CHECK-NEXT: slli.d $a1, $a1, 29 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: andi $a2, $a2, 1 +; CHECK-NEXT: slli.d $a2, $a2, 30 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: slli.d $a1, $a1, 31 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = icmp ne <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll new file mode 100644 index 0000000000000..eff3302145c95 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -0,0 +1,555 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define i16 @vmsk_eq_allzeros_i8(<16 x i8 > %a) { +; CHECK-LABEL: vmsk_eq_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp eq <16 x i8> %a, splat (i8 0) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i16 @vmsk_sgt_allones_i8(<16 x i8 > %a) { +; CHECK-LABEL: vmsk_sgt_allones_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sgt <16 x i8> %a, splat (i8 -1) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i16 @vmsk_sge_allzeros_i8(<16 x i8 > %a) { +; CHECK-LABEL: vmsk_sge_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sge <16 x i8> %a, splat (i8 0) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i16 @vmsk_slt_allzeros_i8(<16 x i8 > %a) { +; CHECK-LABEL: vmsk_slt_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslti.b $vr0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <16 x i8> %a, splat (i8 0) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i8 @vmsk_slt_allzeros_i16(<8 x i16 > %a) { +; CHECK-LABEL: vmsk_slt_allzeros_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslti.h $vr0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <8 x i16> %a, splat (i16 0) + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +define i4 @vmsk_slt_allzeros_i32(<4 x i32 > %a) { +; CHECK-LABEL: vmsk_slt_allzeros_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslti.w $vr0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 3 +; CHECK-NEXT: slli.d $a1, $a1, 3 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 15 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <4 x i32> %a, splat (i32 0) + %2 = bitcast <4 x i1> %1 to i4 + ret i4 %2 +} + +define i2 @vmsk_slt_allzeros_i64(<2 x i64 > %a) { +; CHECK-LABEL: vmsk_slt_allzeros_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslti.d $vr0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 1 +; CHECK-NEXT: slli.d $a1, $a1, 1 +; CHECK-NEXT: sub.d $a0, $a1, $a0 +; CHECK-NEXT: andi $a0, $a0, 3 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <2 x i64> %a, splat (i64 0) + %2 = bitcast <2 x i1> %1 to i2 + ret i2 %2 +} + +define i16 @vmsk_sle_allones_i8(<16 x i8 > %a) { +; CHECK-LABEL: vmsk_sle_allones_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslei.b $vr0, $vr0, -1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <16 x i8> %a, splat (i8 -1) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +define i8 @vmsk_sle_allones_i16(<8 x i16 > %a) { +; CHECK-LABEL: vmsk_sle_allones_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslei.h $vr0, $vr0, -1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <8 x i16> %a, splat (i16 -1) + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +define i4 @vmsk_sle_allones_i32(<4 x i32 > %a) { +; CHECK-LABEL: vmsk_sle_allones_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslei.w $vr0, $vr0, -1 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 3 +; CHECK-NEXT: slli.d $a1, $a1, 3 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: andi $a0, $a0, 15 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <4 x i32> %a, splat (i32 -1) + %2 = bitcast <4 x i1> %1 to i4 + ret i4 %2 +} + +define i2 @vmsk_sle_allones_i64(<2 x i64 > %a) { +; CHECK-LABEL: vmsk_sle_allones_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vslei.d $vr0, $vr0, -1 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 1 +; CHECK-NEXT: slli.d $a1, $a1, 1 +; CHECK-NEXT: sub.d $a0, $a1, $a0 +; CHECK-NEXT: andi $a0, $a0, 3 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <2 x i64> %a, splat (i64 -1) + %2 = bitcast <2 x i1> %1 to i2 + ret i2 %2 +} + +define i16 @vmsk_ne_allzeros_i8(<16 x i8 > %a) { +; CHECK-LABEL: vmsk_ne_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 +; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 +; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 6 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 7 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 8 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 9 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 10 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 11 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 12 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 13 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 +; CHECK-NEXT: andi $a1, $a1, 1 +; CHECK-NEXT: slli.d $a1, $a1, 14 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 +; CHECK-NEXT: slli.d $a1, $a1, 15 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %1 = icmp ne <16 x i8> %a, splat (i8 0) + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +}