Skip to content

Commit 920aac4

Browse files
lu-zeroAmanieu
authored andcommitted
Add vec_all_gt and vec_any_gt
1 parent e5f2bc3 commit 920aac4

File tree

1 file changed

+307
-0
lines changed

1 file changed

+307
-0
lines changed

crates/core_arch/src/powerpc/altivec.rs

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ extern "C" {
270270

271271
#[link_name = "llvm.ppc.altivec.vcmpgefp.p"]
272272
fn vcmpgefp_p(cr: i32, a: vector_float, b: vector_float) -> i32;
273+
#[link_name = "llvm.ppc.altivec.vcmpgtfp.p"]
274+
fn vcmpgtfp_p(cr: i32, a: vector_float, b: vector_float) -> i32;
273275
}
274276

275277
macro_rules! s_t_l {
@@ -557,6 +559,8 @@ mod sealed {
557559
}
558560
}
559561

562+
// All/Any GreaterEqual
563+
560564
#[inline]
561565
#[target_feature(enable = "altivec")]
562566
#[cfg_attr(test, assert_instr(vcmpgtsb.))]
@@ -694,6 +698,145 @@ mod sealed {
694698
}
695699
}
696700

701+
// All/Any Greater Than
702+
703+
#[inline]
704+
#[target_feature(enable = "altivec")]
705+
#[cfg_attr(test, assert_instr(vcmpgtsb.))]
706+
unsafe fn vcmpgtsb_all(a: vector_signed_char, b: vector_signed_char) -> bool {
707+
vcmpgtsb_p(2, a, b) != 0
708+
}
709+
710+
#[inline]
711+
#[target_feature(enable = "altivec")]
712+
#[cfg_attr(test, assert_instr(vcmpgtsb.))]
713+
unsafe fn vcmpgtsb_any(a: vector_signed_char, b: vector_signed_char) -> bool {
714+
vcmpgtsb_p(1, a, b) != 0
715+
}
716+
717+
#[inline]
718+
#[target_feature(enable = "altivec")]
719+
#[cfg_attr(test, assert_instr(vcmpgtsh.))]
720+
unsafe fn vcmpgtsh_all(a: vector_signed_short, b: vector_signed_short) -> bool {
721+
vcmpgtsh_p(2, a, b) != 0
722+
}
723+
724+
#[inline]
725+
#[target_feature(enable = "altivec")]
726+
#[cfg_attr(test, assert_instr(vcmpgtsh.))]
727+
unsafe fn vcmpgtsh_any(a: vector_signed_short, b: vector_signed_short) -> bool {
728+
vcmpgtsh_p(1, a, b) != 0
729+
}
730+
731+
#[inline]
732+
#[target_feature(enable = "altivec")]
733+
#[cfg_attr(test, assert_instr(vcmpgtsw.))]
734+
unsafe fn vcmpgtsw_all(a: vector_signed_int, b: vector_signed_int) -> bool {
735+
vcmpgtsw_p(2, a, b) != 0
736+
}
737+
738+
#[inline]
739+
#[target_feature(enable = "altivec")]
740+
#[cfg_attr(test, assert_instr(vcmpgtsw.))]
741+
unsafe fn vcmpgtsw_any(a: vector_signed_int, b: vector_signed_int) -> bool {
742+
vcmpgtsw_p(1, a, b) != 0
743+
}
744+
745+
#[inline]
746+
#[target_feature(enable = "altivec")]
747+
#[cfg_attr(test, assert_instr(vcmpgtub.))]
748+
unsafe fn vcmpgtub_all(a: vector_unsigned_char, b: vector_unsigned_char) -> bool {
749+
vcmpgtub_p(2, a, b) != 0
750+
}
751+
752+
#[inline]
753+
#[target_feature(enable = "altivec")]
754+
#[cfg_attr(test, assert_instr(vcmpgtub.))]
755+
unsafe fn vcmpgtub_any(a: vector_unsigned_char, b: vector_unsigned_char) -> bool {
756+
vcmpgtub_p(1, a, b) != 0
757+
}
758+
759+
#[inline]
760+
#[target_feature(enable = "altivec")]
761+
#[cfg_attr(test, assert_instr(vcmpgtuh.))]
762+
unsafe fn vcmpgtuh_all(a: vector_unsigned_short, b: vector_unsigned_short) -> bool {
763+
vcmpgtuh_p(2, a, b) != 0
764+
}
765+
766+
#[inline]
767+
#[target_feature(enable = "altivec")]
768+
#[cfg_attr(test, assert_instr(vcmpgtuh.))]
769+
unsafe fn vcmpgtuh_any(a: vector_unsigned_short, b: vector_unsigned_short) -> bool {
770+
vcmpgtuh_p(1, a, b) != 0
771+
}
772+
773+
#[inline]
774+
#[target_feature(enable = "altivec")]
775+
#[cfg_attr(test, assert_instr(vcmpgtuw.))]
776+
unsafe fn vcmpgtuw_all(a: vector_unsigned_int, b: vector_unsigned_int) -> bool {
777+
vcmpgtuw_p(2, a, b) != 0
778+
}
779+
780+
#[inline]
781+
#[target_feature(enable = "altivec")]
782+
#[cfg_attr(test, assert_instr(vcmpgtuw.))]
783+
unsafe fn vcmpgtuw_any(a: vector_unsigned_int, b: vector_unsigned_int) -> bool {
784+
vcmpgtuw_p(1, a, b) != 0
785+
}
786+
787+
pub trait VectorAllGt<Other> {
788+
type Result;
789+
unsafe fn vec_all_gt(self, b: Other) -> Self::Result;
790+
}
791+
792+
impl_vec_any_all! { [VectorAllGt vec_all_gt] (
793+
vcmpgtub_all, vcmpgtsb_all,
794+
vcmpgtuh_all, vcmpgtsh_all,
795+
vcmpgtuw_all, vcmpgtsw_all
796+
) }
797+
798+
// TODO: vsx encoding
799+
#[inline]
800+
#[target_feature(enable = "altivec")]
801+
#[cfg_attr(test, assert_instr(vcmpgtfp.))]
802+
unsafe fn vcmpgtfp_all(a: vector_float, b: vector_float) -> bool {
803+
vcmpgtfp_p(2, a, b) != 0
804+
}
805+
806+
impl VectorAllGt<vector_float> for vector_float {
807+
type Result = bool;
808+
#[inline]
809+
unsafe fn vec_all_gt(self, b: vector_float) -> Self::Result {
810+
vcmpgtfp_all(self, b)
811+
}
812+
}
813+
814+
pub trait VectorAnyGt<Other> {
815+
type Result;
816+
unsafe fn vec_any_gt(self, b: Other) -> Self::Result;
817+
}
818+
819+
impl_vec_any_all! { [VectorAnyGt vec_any_gt] (
820+
vcmpgtub_any, vcmpgtsb_any,
821+
vcmpgtuh_any, vcmpgtsh_any,
822+
vcmpgtuw_any, vcmpgtsw_any
823+
) }
824+
825+
#[inline]
826+
#[target_feature(enable = "altivec")]
827+
#[cfg_attr(test, assert_instr(vcmpgtfp.))]
828+
unsafe fn vcmpgtfp_any(a: vector_float, b: vector_float) -> bool {
829+
vcmpgtfp_p(1, a, b) != 0
830+
}
831+
832+
impl VectorAnyGt<vector_float> for vector_float {
833+
type Result = bool;
834+
#[inline]
835+
unsafe fn vec_any_gt(self, b: vector_float) -> Self::Result {
836+
vcmpgtfp_any(self, b)
837+
}
838+
}
839+
697840
test_impl! { vec_vceil(a: vector_float) -> vector_float [vceil, vrfip / xvrspip ] }
698841

699842
test_impl! { vec_vavgsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vavgsb, vavgsb ] }
@@ -2019,6 +2162,26 @@ where
20192162
a.vec_any_ge(b)
20202163
}
20212164

2165+
/// Vector All Elements Greater Than
2166+
#[inline]
2167+
#[target_feature(enable = "altivec")]
2168+
pub unsafe fn vec_all_gt<T, U>(a: T, b: U) -> <T as sealed::VectorAllGt<U>>::Result
2169+
where
2170+
T: sealed::VectorAllGt<U>,
2171+
{
2172+
a.vec_all_gt(b)
2173+
}
2174+
2175+
/// Vector Any Element Greater Than
2176+
#[inline]
2177+
#[target_feature(enable = "altivec")]
2178+
pub unsafe fn vec_any_gt<T, U>(a: T, b: U) -> <T as sealed::VectorAnyGt<U>>::Result
2179+
where
2180+
T: sealed::VectorAnyGt<U>,
2181+
{
2182+
a.vec_any_gt(b)
2183+
}
2184+
20222185
#[cfg(target_endian = "big")]
20232186
mod endian {
20242187
use super::*;
@@ -2529,6 +2692,150 @@ mod tests {
25292692
true
25302693
}
25312694

2695+
test_vec_2! { test_vec_all_gt_i8_false, vec_all_gt, i8x16 -> bool,
2696+
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2697+
[0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2698+
false
2699+
}
2700+
2701+
test_vec_2! { test_vec_all_gt_u8_false, vec_all_gt, u8x16 -> bool,
2702+
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2703+
[0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2704+
false
2705+
}
2706+
2707+
test_vec_2! { test_vec_all_gt_i16_false, vec_all_gt, i16x8 -> bool,
2708+
[1, 0, 0, 0, 0, 0, 0, 0],
2709+
[0, 0, -1, 1, 0, 0, 0, 0],
2710+
false
2711+
}
2712+
2713+
test_vec_2! { test_vec_all_gt_u16_false, vec_all_gt, u16x8 -> bool,
2714+
[1, 0, 0, 0, 0, 0, 0, 0],
2715+
[0, 0, 255, 1, 0, 0, 0, 0],
2716+
false
2717+
}
2718+
2719+
test_vec_2! { test_vec_all_gt_i32_false, vec_all_gt, i32x4 -> bool,
2720+
[1, -1, 0, 0],
2721+
[0, -1, 0, 1],
2722+
false
2723+
}
2724+
2725+
test_vec_2! { test_vec_all_gt_u32_false, vec_all_gt, u32x4 -> bool,
2726+
[1, 255, 0, 0],
2727+
[0, 255, 1, 1],
2728+
false
2729+
}
2730+
2731+
test_vec_2! { test_vec_all_gt_i8_true, vec_all_gt, i8x16 -> bool,
2732+
[2, 1, -1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
2733+
[0, 0, -2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
2734+
true
2735+
}
2736+
2737+
test_vec_2! { test_vec_all_gt_u8_true, vec_all_gt, u8x16 -> bool,
2738+
[1, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2739+
[0, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2740+
true
2741+
}
2742+
2743+
test_vec_2! { test_vec_all_gt_i16_true, vec_all_gt, i16x8 -> bool,
2744+
[1, -1, 42, 1, 1, 1, 1, 1],
2745+
[0, -5, 2, 0, 0, 0, 0, 0],
2746+
true
2747+
}
2748+
2749+
test_vec_2! { test_vec_all_gt_u16_true, vec_all_gt, u16x8 -> bool,
2750+
[42, 255, 1, 1, 1, 1, 1, 1],
2751+
[2, 254, 0, 0, 0, 0, 0, 0],
2752+
true
2753+
}
2754+
2755+
test_vec_2! { test_vec_all_gt_i32_true, vec_all_gt, i32x4 -> bool,
2756+
[1, -1, 1, 1],
2757+
[0, -2, 0, 0],
2758+
true
2759+
}
2760+
2761+
test_vec_2! { test_vec_all_gt_u32_true, vec_all_gt, u32x4 -> bool,
2762+
[1, 255, 1, 1],
2763+
[0, 254, 0, 0],
2764+
true
2765+
}
2766+
2767+
test_vec_2! { test_vec_any_gt_i8_false, vec_any_gt, i8x16 -> bool,
2768+
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2769+
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2770+
false
2771+
}
2772+
2773+
test_vec_2! { test_vec_any_gt_u8_false, vec_any_gt, u8x16 -> bool,
2774+
[1, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2775+
[42, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2776+
false
2777+
}
2778+
2779+
test_vec_2! { test_vec_any_gt_i16_false, vec_any_gt, i16x8 -> bool,
2780+
[1, -1, -2, 0, 0, 0, 0, 0],
2781+
[2, 0, -1, 1, 1, 1, 1, 1],
2782+
false
2783+
}
2784+
2785+
test_vec_2! { test_vec_any_gt_u16_false, vec_any_gt, u16x8 -> bool,
2786+
[1, 2, 0, 0, 0, 0, 0, 0],
2787+
[2, 42, 255, 1, 1, 1, 1, 1],
2788+
false
2789+
}
2790+
2791+
test_vec_2! { test_vec_any_gt_i32_false, vec_any_gt, i32x4 -> bool,
2792+
[1, -1, 0, 0],
2793+
[2, 0, 1, 1],
2794+
false
2795+
}
2796+
2797+
test_vec_2! { test_vec_any_gt_u32_false, vec_any_gt, u32x4 -> bool,
2798+
[1, 2, 1, 0],
2799+
[4, 255, 4, 1],
2800+
false
2801+
}
2802+
2803+
test_vec_2! { test_vec_any_gt_i8_true, vec_any_gt, i8x16 -> bool,
2804+
[1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2805+
[0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2806+
true
2807+
}
2808+
2809+
test_vec_2! { test_vec_any_gt_u8_true, vec_any_gt, u8x16 -> bool,
2810+
[1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2811+
[0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2812+
true
2813+
}
2814+
2815+
test_vec_2! { test_vec_any_gt_i16_true, vec_any_gt, i16x8 -> bool,
2816+
[1, -1, 1, 0, 0, 0, 0, 0],
2817+
[0, -1, 1, 0, 0, 0, 0, 0],
2818+
true
2819+
}
2820+
2821+
test_vec_2! { test_vec_any_gt_u16_true, vec_any_gt, u16x8 -> bool,
2822+
[1, 255, 1, 0, 0, 0, 0, 0],
2823+
[0, 255, 1, 0, 0, 0, 0, 0],
2824+
true
2825+
}
2826+
2827+
test_vec_2! { test_vec_any_gt_i32_true, vec_any_gt, i32x4 -> bool,
2828+
[1, -1, 0, 1],
2829+
[0, -1, 0, 1],
2830+
true
2831+
}
2832+
2833+
test_vec_2! { test_vec_any_gt_u32_true, vec_any_gt, u32x4 -> bool,
2834+
[1, 255, 0, 1],
2835+
[0, 255, 0, 1],
2836+
true
2837+
}
2838+
25322839
#[simd_test(enable = "altivec")]
25332840
unsafe fn test_vec_cmpb() {
25342841
let a: vector_float = transmute(f32x4::new(0.1, 0.5, 0.6, 0.9));

0 commit comments

Comments
 (0)