@@ -731,12 +731,15 @@ pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
731
731
732
732
macro_rules! arm_simd_2 {
733
733
( $name: ident, $type: ty, $simd_fn: ident, $intrarm: ident, $intraarch: ident) => {
734
+ arm_simd_2!( $name, $type, $type, $simd_fn, $intrarm, $intraarch) ;
735
+ } ;
736
+ ( $name: ident, $type: ty, $res: ty, $simd_fn: ident, $intrarm: ident, $intraarch: ident) => {
734
737
#[ inline]
735
738
#[ target_feature( enable = "neon" ) ]
736
739
#[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
737
740
#[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( $intrarm) ) ]
738
741
#[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( $intraarch) ) ]
739
- pub unsafe fn $name( a: $type, b: $type) -> $type {
742
+ pub unsafe fn $name( a: $type, b: $type) -> $res {
740
743
$simd_fn( a, b)
741
744
}
742
745
} ;
@@ -814,52 +817,33 @@ arm_simd_eor!(veor_u64, uint64x1_t);
814
817
arm_simd_eor ! ( veorq_u64, uint64x2_t) ;
815
818
816
819
macro_rules! arm_simd_ceq {
817
- ( $name: ident, $type: ty) => {
820
+ ( $name: ident, $type: ty, $res : ty ) => {
818
821
/// Compare bitwise Equal (vector)
819
- arm_simd_2!( $name, $type, simd_eq, cmeq, cmeq) ;
822
+ arm_simd_2!( $name, $type, $res , simd_eq, cmeq, cmeq) ;
820
823
} ;
821
824
}
822
825
823
- arm_simd_ceq ! ( vceq_s8, int8x8_t) ;
824
- arm_simd_ceq ! ( vceqq_s8, int8x16_t) ;
825
- arm_simd_ceq ! ( vceq_s16, int16x4_t) ;
826
- arm_simd_ceq ! ( vceqq_s16, int16x8_t) ;
827
- arm_simd_ceq ! ( vceq_s32, int32x2_t) ;
828
- arm_simd_ceq ! ( vceqq_s32, int32x4_t) ;
829
- arm_simd_ceq ! ( vceq_u8, uint8x8_t) ;
830
- arm_simd_ceq ! ( vceqq_u8, uint8x16_t) ;
831
- arm_simd_ceq ! ( vceq_u16, uint16x4_t) ;
832
- arm_simd_ceq ! ( vceqq_u16, uint16x8_t) ;
833
- arm_simd_ceq ! ( vceq_u32, uint32x2_t) ;
834
- arm_simd_ceq ! ( vceqq_u32, uint32x4_t) ;
835
-
836
- // arm_simd_ceq!(vceq_f32, float32x2_t); // we have a different return type
837
- #[ inline]
838
- #[ target_feature( enable = "neon" ) ]
839
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
840
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmeq) ) ]
841
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmeq) ) ]
842
- pub unsafe fn vceq_f32 ( a : float32x2_t , b : float32x2_t ) -> uint32x2_t {
843
- simd_eq ( a, b)
844
- }
845
-
846
- // arm_simd_ceq!(vceqq_f32, float32x4_t); we have a different return type
847
- #[ inline]
848
- #[ target_feature( enable = "neon" ) ]
849
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
850
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmeq) ) ]
851
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmeq) ) ]
852
- pub unsafe fn vceqq_f32 ( a : float32x4_t , b : float32x4_t ) -> uint32x4_t {
853
- simd_eq ( a, b)
854
- }
855
-
856
- arm_simd_ceq ! ( vceq_p8, poly8x8_t) ;
857
- arm_simd_ceq ! ( vceqq_p8, poly8x16_t) ;
826
+ arm_simd_ceq ! ( vceq_s8, int8x8_t, uint8x8_t) ;
827
+ arm_simd_ceq ! ( vceqq_s8, int8x16_t, uint8x16_t) ;
828
+ arm_simd_ceq ! ( vceq_s16, int16x4_t, uint16x4_t) ;
829
+ arm_simd_ceq ! ( vceqq_s16, int16x8_t, uint16x8_t) ;
830
+ arm_simd_ceq ! ( vceq_s32, int32x2_t, uint32x2_t) ;
831
+ arm_simd_ceq ! ( vceqq_s32, int32x4_t, uint32x4_t) ;
832
+ arm_simd_ceq ! ( vceq_u8, uint8x8_t, uint8x8_t) ;
833
+ arm_simd_ceq ! ( vceqq_u8, uint8x16_t, uint8x16_t) ;
834
+ arm_simd_ceq ! ( vceq_u16, uint16x4_t, uint16x4_t) ;
835
+ arm_simd_ceq ! ( vceqq_u16, uint16x8_t, uint16x8_t) ;
836
+ arm_simd_ceq ! ( vceq_u32, uint32x2_t, uint32x2_t) ;
837
+ arm_simd_ceq ! ( vceqq_u32, uint32x4_t, uint32x4_t) ;
838
+ arm_simd_2 ! ( vceq_f32, float32x2_t, uint32x2_t, simd_eq, fcmeq, fcmeq) ;
839
+ arm_simd_2 ! ( vceqq_f32, float32x4_t, uint32x4_t, simd_eq, fcmeq, fcmeq) ;
840
+ arm_simd_ceq ! ( vceq_p8, poly8x8_t, poly8x8_t) ;
841
+ arm_simd_ceq ! ( vceqq_p8, poly8x16_t, poly8x16_t) ;
858
842
859
843
macro_rules! arm_simd_cgt {
860
- ( $name: ident, $type: ty) => {
844
+ ( $name: ident, $type: ty, $res : ty ) => {
861
845
/// Compare signed Greater than (vector)
862
- arm_simd_2!( $name, $type, simd_gt, cmgt, cmgt) ;
846
+ arm_simd_2!( $name, $type, $res , simd_gt, cmgt, cmgt) ;
863
847
} ;
864
848
}
865
849
@@ -869,41 +853,25 @@ macro_rules! arm_simd_cgtu {
869
853
arm_simd_2!( $name, $type, simd_gt, cmhi, cmhi) ;
870
854
} ;
871
855
}
872
- arm_simd_cgt ! ( vcgt_s8, int8x8_t) ;
873
- arm_simd_cgt ! ( vcgtq_s8, int8x16_t) ;
874
- arm_simd_cgt ! ( vcgt_s16, int16x4_t) ;
875
- arm_simd_cgt ! ( vcgtq_s16, int16x8_t) ;
876
- arm_simd_cgt ! ( vcgt_s32, int32x2_t) ;
877
- arm_simd_cgt ! ( vcgtq_s32, int32x4_t) ;
856
+ arm_simd_cgt ! ( vcgt_s8, int8x8_t, uint8x8_t ) ;
857
+ arm_simd_cgt ! ( vcgtq_s8, int8x16_t, uint8x16_t ) ;
858
+ arm_simd_cgt ! ( vcgt_s16, int16x4_t, uint16x4_t ) ;
859
+ arm_simd_cgt ! ( vcgtq_s16, int16x8_t, uint16x8_t ) ;
860
+ arm_simd_cgt ! ( vcgt_s32, int32x2_t, uint32x2_t ) ;
861
+ arm_simd_cgt ! ( vcgtq_s32, int32x4_t, uint32x4_t ) ;
878
862
arm_simd_cgtu ! ( vcgt_u8, uint8x8_t) ;
879
863
arm_simd_cgtu ! ( vcgtq_u8, uint8x16_t) ;
880
864
arm_simd_cgtu ! ( vcgt_u16, uint16x4_t) ;
881
865
arm_simd_cgtu ! ( vcgtq_u16, uint16x8_t) ;
882
866
arm_simd_cgtu ! ( vcgt_u32, uint32x2_t) ;
883
867
arm_simd_cgtu ! ( vcgtq_u32, uint32x4_t) ;
884
-
885
- #[ inline]
886
- #[ target_feature( enable = "neon" ) ]
887
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
888
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmgt) ) ]
889
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmgt) ) ]
890
- pub unsafe fn vcgt_f32 ( a : float32x2_t , b : float32x2_t ) -> uint32x2_t {
891
- simd_gt ( a, b)
892
- }
893
-
894
- #[ inline]
895
- #[ target_feature( enable = "neon" ) ]
896
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
897
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmgt) ) ]
898
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmgt) ) ]
899
- pub unsafe fn vcgtq_f32 ( a : float32x4_t , b : float32x4_t ) -> uint32x4_t {
900
- simd_gt ( a, b)
901
- }
868
+ arm_simd_2 ! ( vcgt_f32, float32x2_t, uint32x2_t, simd_gt, fcmgt, fcmgt) ;
869
+ arm_simd_2 ! ( vcgtq_f32, float32x4_t, uint32x4_t, simd_gt, fcmgt, fcmgt) ;
902
870
903
871
macro_rules! arm_simd_clt {
904
- ( $name: ident, $type: ty) => {
872
+ ( $name: ident, $type: ty, $res : ty ) => {
905
873
/// Compare signed Lesser than (vector)
906
- arm_simd_2!( $name, $type, simd_lt, cmgt, cmgt) ;
874
+ arm_simd_2!( $name, $type, $res , simd_lt, cmgt, cmgt) ;
907
875
} ;
908
876
}
909
877
@@ -913,41 +881,25 @@ macro_rules! arm_simd_cltu {
913
881
arm_simd_2!( $name, $type, simd_lt, cmhi, cmhi) ;
914
882
} ;
915
883
}
916
- arm_simd_clt ! ( vclt_s8, int8x8_t) ;
917
- arm_simd_clt ! ( vcltq_s8, int8x16_t) ;
918
- arm_simd_clt ! ( vclt_s16, int16x4_t) ;
919
- arm_simd_clt ! ( vcltq_s16, int16x8_t) ;
920
- arm_simd_clt ! ( vclt_s32, int32x2_t) ;
921
- arm_simd_clt ! ( vcltq_s32, int32x4_t) ;
884
+ arm_simd_clt ! ( vclt_s8, int8x8_t, uint8x8_t ) ;
885
+ arm_simd_clt ! ( vcltq_s8, int8x16_t, uint8x16_t ) ;
886
+ arm_simd_clt ! ( vclt_s16, int16x4_t, uint16x4_t ) ;
887
+ arm_simd_clt ! ( vcltq_s16, int16x8_t, uint16x8_t ) ;
888
+ arm_simd_clt ! ( vclt_s32, int32x2_t, uint32x2_t ) ;
889
+ arm_simd_clt ! ( vcltq_s32, int32x4_t, uint32x4_t ) ;
922
890
arm_simd_cltu ! ( vclt_u8, uint8x8_t) ;
923
891
arm_simd_cltu ! ( vcltq_u8, uint8x16_t) ;
924
892
arm_simd_cltu ! ( vclt_u16, uint16x4_t) ;
925
893
arm_simd_cltu ! ( vcltq_u16, uint16x8_t) ;
926
894
arm_simd_cltu ! ( vclt_u32, uint32x2_t) ;
927
895
arm_simd_cltu ! ( vcltq_u32, uint32x4_t) ;
928
-
929
- #[ inline]
930
- #[ target_feature( enable = "neon" ) ]
931
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
932
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmgt) ) ]
933
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmgt) ) ]
934
- pub unsafe fn vclt_f32 ( a : float32x2_t , b : float32x2_t ) -> uint32x2_t {
935
- simd_lt ( a, b)
936
- }
937
-
938
- #[ inline]
939
- #[ target_feature( enable = "neon" ) ]
940
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
941
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmgt) ) ]
942
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmgt) ) ]
943
- pub unsafe fn vcltq_f32 ( a : float32x4_t , b : float32x4_t ) -> uint32x4_t {
944
- simd_lt ( a, b)
945
- }
896
+ arm_simd_2 ! ( vclt_f32, float32x2_t, uint32x2_t, simd_lt, fcmgt, fcmgt) ;
897
+ arm_simd_2 ! ( vcltq_f32, float32x4_t, uint32x4_t, simd_lt, fcmgt, fcmgt) ;
946
898
947
899
macro_rules! arm_simd_cge {
948
- ( $name: ident, $type: ty) => {
900
+ ( $name: ident, $type: ty, $res : ty ) => {
949
901
/// Compare signed Greater than equals (vector)
950
- arm_simd_2!( $name, $type, simd_ge, cmge, cmge) ;
902
+ arm_simd_2!( $name, $type, $res , simd_ge, cmge, cmge) ;
951
903
} ;
952
904
}
953
905
@@ -957,41 +909,25 @@ macro_rules! arm_simd_cgeu {
957
909
arm_simd_2!( $name, $type, simd_ge, cmhs, cmhs) ;
958
910
} ;
959
911
}
960
- arm_simd_cge ! ( vcge_s8, int8x8_t) ;
961
- arm_simd_cge ! ( vcgeq_s8, int8x16_t) ;
962
- arm_simd_cge ! ( vcge_s16, int16x4_t) ;
963
- arm_simd_cge ! ( vcgeq_s16, int16x8_t) ;
964
- arm_simd_cge ! ( vcge_s32, int32x2_t) ;
965
- arm_simd_cge ! ( vcgeq_s32, int32x4_t) ;
912
+ arm_simd_cge ! ( vcge_s8, int8x8_t, uint8x8_t ) ;
913
+ arm_simd_cge ! ( vcgeq_s8, int8x16_t, uint8x16_t ) ;
914
+ arm_simd_cge ! ( vcge_s16, int16x4_t, uint16x4_t ) ;
915
+ arm_simd_cge ! ( vcgeq_s16, int16x8_t, uint16x8_t ) ;
916
+ arm_simd_cge ! ( vcge_s32, int32x2_t, uint32x2_t ) ;
917
+ arm_simd_cge ! ( vcgeq_s32, int32x4_t, uint32x4_t ) ;
966
918
arm_simd_cgeu ! ( vcge_u8, uint8x8_t) ;
967
919
arm_simd_cgeu ! ( vcgeq_u8, uint8x16_t) ;
968
920
arm_simd_cgeu ! ( vcge_u16, uint16x4_t) ;
969
921
arm_simd_cgeu ! ( vcgeq_u16, uint16x8_t) ;
970
922
arm_simd_cgeu ! ( vcge_u32, uint32x2_t) ;
971
923
arm_simd_cgeu ! ( vcgeq_u32, uint32x4_t) ;
972
-
973
- #[ inline]
974
- #[ target_feature( enable = "neon" ) ]
975
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
976
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmge) ) ]
977
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmge) ) ]
978
- pub unsafe fn vcge_f32 ( a : float32x2_t , b : float32x2_t ) -> uint32x2_t {
979
- simd_ge ( a, b)
980
- }
981
-
982
- #[ inline]
983
- #[ target_feature( enable = "neon" ) ]
984
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
985
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmge) ) ]
986
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmge) ) ]
987
- pub unsafe fn vcgeq_f32 ( a : float32x4_t , b : float32x4_t ) -> uint32x4_t {
988
- simd_ge ( a, b)
989
- }
924
+ arm_simd_2 ! ( vcge_f32, float32x2_t, uint32x2_t, simd_ge, fcmge, fcmge) ;
925
+ arm_simd_2 ! ( vcgeq_f32, float32x4_t, uint32x4_t, simd_ge, fcmge, fcmge) ;
990
926
991
927
macro_rules! arm_simd_cle {
992
- ( $name: ident, $type: ty) => {
928
+ ( $name: ident, $type: ty, $res : ty ) => {
993
929
/// Compare signed Lesser than equals (vector)
994
- arm_simd_2!( $name, $type, simd_le, cmge, cmge) ;
930
+ arm_simd_2!( $name, $type, $res , simd_le, cmge, cmge) ;
995
931
} ;
996
932
}
997
933
@@ -1001,36 +937,20 @@ macro_rules! arm_simd_cleu {
1001
937
arm_simd_2!( $name, $type, simd_le, cmhs, cmhs) ;
1002
938
} ;
1003
939
}
1004
- arm_simd_cle ! ( vcle_s8, int8x8_t) ;
1005
- arm_simd_cle ! ( vcleq_s8, int8x16_t) ;
1006
- arm_simd_cle ! ( vcle_s16, int16x4_t) ;
1007
- arm_simd_cle ! ( vcleq_s16, int16x8_t) ;
1008
- arm_simd_cle ! ( vcle_s32, int32x2_t) ;
1009
- arm_simd_cle ! ( vcleq_s32, int32x4_t) ;
940
+ arm_simd_cle ! ( vcle_s8, int8x8_t, uint8x8_t ) ;
941
+ arm_simd_cle ! ( vcleq_s8, int8x16_t, uint8x16_t ) ;
942
+ arm_simd_cle ! ( vcle_s16, int16x4_t, uint16x4_t ) ;
943
+ arm_simd_cle ! ( vcleq_s16, int16x8_t, uint16x8_t ) ;
944
+ arm_simd_cle ! ( vcle_s32, int32x2_t, uint32x2_t ) ;
945
+ arm_simd_cle ! ( vcleq_s32, int32x4_t, uint32x4_t ) ;
1010
946
arm_simd_cleu ! ( vcle_u8, uint8x8_t) ;
1011
947
arm_simd_cleu ! ( vcleq_u8, uint8x16_t) ;
1012
948
arm_simd_cleu ! ( vcle_u16, uint16x4_t) ;
1013
949
arm_simd_cleu ! ( vcleq_u16, uint16x8_t) ;
1014
950
arm_simd_cleu ! ( vcle_u32, uint32x2_t) ;
1015
951
arm_simd_cleu ! ( vcleq_u32, uint32x4_t) ;
1016
-
1017
- #[ inline]
1018
- #[ target_feature( enable = "neon" ) ]
1019
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
1020
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmge) ) ]
1021
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmge) ) ]
1022
- pub unsafe fn vcle_f32 ( a : float32x2_t , b : float32x2_t ) -> uint32x2_t {
1023
- simd_le ( a, b)
1024
- }
1025
-
1026
- #[ inline]
1027
- #[ target_feature( enable = "neon" ) ]
1028
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
1029
- #[ cfg_attr( all( test, target_arch = "arm" ) , assert_instr( fcmge) ) ]
1030
- #[ cfg_attr( all( test, target_arch = "aarch64" ) , assert_instr( fcmge) ) ]
1031
- pub unsafe fn vcleq_f32 ( a : float32x4_t , b : float32x4_t ) -> uint32x4_t {
1032
- simd_le ( a, b)
1033
- }
952
+ arm_simd_2 ! ( vcle_f32, float32x2_t, uint32x2_t, simd_le, fcmge, fcmge) ;
953
+ arm_simd_2 ! ( vcleq_f32, float32x4_t, uint32x4_t, simd_le, fcmge, fcmge) ;
1034
954
1035
955
/// Folding minimum of adjacent pairs
1036
956
#[ inline]
0 commit comments