Skip to content

Commit cb4c312

Browse files
committed
Do not use vpmin/max to reduce vector masks smaller than 128-bit on ARM+NEON
1 parent 62a32e7 commit cb4c312

File tree

1 file changed

+0
-120
lines changed
  • src/codegen/reductions/mask

1 file changed

+0
-120
lines changed

src/codegen/reductions/mask/arm.rs

Lines changed: 0 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,121 +1,5 @@
11
//! Mask reductions implementation for `arm` targets
22
3-
/// ARM m32x2 v7+neon implementation
4-
macro_rules! arm_m32x2_v7_neon_impl {
5-
($id:ident, $vpmin:ident, $vpmax:ident) => {
6-
impl All for $id {
7-
#[inline]
8-
#[target_feature(enable = "v7,neon")]
9-
unsafe fn all(self) -> bool {
10-
use crate::arch::arm::$vpmin;
11-
use crate::mem::transmute;
12-
// pmin((a, b), (-,-)) => (b, -).0 => b
13-
let tmp: $id =
14-
transmute($vpmin(transmute(self), crate::mem::uninitialized()));
15-
tmp.extract(0)
16-
}
17-
}
18-
impl Any for $id {
19-
#[inline]
20-
#[target_feature(enable = "v7,neon")]
21-
unsafe fn any(self) -> bool {
22-
use crate::arch::arm::$vpmax;
23-
use crate::mem::transmute;
24-
// pmax((a, b), (-,-)) => (b, -).0 => b
25-
let tmp: $id =
26-
transmute($vpmax(transmute(self), crate::mem::uninitialized()));
27-
tmp.extract(0)
28-
}
29-
}
30-
};
31-
}
32-
33-
/// ARM m16x4 v7+neon implementation
34-
macro_rules! arm_m16x4_v7_neon_impl {
35-
($id:ident, $vpmin:ident, $vpmax:ident) => {
36-
impl All for $id {
37-
#[inline]
38-
#[target_feature(enable = "v7,neon")]
39-
unsafe fn all(self) -> bool {
40-
use crate::arch::arm::$vpmin;
41-
use crate::mem::transmute;
42-
// tmp = pmin((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
43-
let tmp = $vpmin(transmute(self), crate::mem::uninitialized());
44-
// tmp = pmin((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
45-
let tmp: $id = transmute($vpmin(tmp, crate::mem::uninitialized()));
46-
tmp.extract(0)
47-
}
48-
}
49-
impl Any for $id {
50-
#[inline]
51-
#[target_feature(enable = "v7,neon")]
52-
unsafe fn any(self) -> bool {
53-
use crate::arch::arm::$vpmax;
54-
use crate::mem::transmute;
55-
// tmp = pmax((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
56-
let tmp = $vpmax(transmute(self), crate::mem::uninitialized());
57-
// tmp = pmax((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
58-
let tmp: $id = transmute($vpmax(tmp, crate::mem::uninitialized()));
59-
tmp.extract(0)
60-
}
61-
}
62-
};
63-
}
64-
65-
/// ARM m8x8 v7+neon implementation
66-
macro_rules! arm_m8x8_v7_neon_impl {
67-
($id:ident, $vpmin:ident, $vpmax:ident) => {
68-
impl All for $id {
69-
#[inline]
70-
#[target_feature(enable = "v7,neon")]
71-
unsafe fn all(self) -> bool {
72-
use crate::arch::arm::$vpmin;
73-
use crate::mem::transmute;
74-
// tmp = pmin(
75-
// (a, b, c, d, e, f, g, h),
76-
// (-, -, -, -, -, -, -, -)
77-
// ) => (a, c, e, g, -, -, -, -)
78-
let tmp = $vpmin(transmute(self), crate::mem::uninitialized());
79-
// tmp = pmin(
80-
// (a, c, e, g, -, -, -, -),
81-
// (-, -, -, -, -, -, -, -)
82-
// ) => (c, g, -, -, -, -, -, -)
83-
let tmp = $vpmin(tmp, crate::mem::uninitialized());
84-
// tmp = pmin(
85-
// (c, g, -, -, -, -, -, -),
86-
// (-, -, -, -, -, -, -, -)
87-
// ) => (g, -, -, -, -, -, -, -).0 => g
88-
let tmp: $id = transmute($vpmin(tmp, crate::mem::uninitialized()));
89-
tmp.extract(0)
90-
}
91-
}
92-
impl Any for $id {
93-
#[inline]
94-
#[target_feature(enable = "v7,neon")]
95-
unsafe fn any(self) -> bool {
96-
use crate::arch::arm::$vpmax;
97-
use crate::mem::transmute;
98-
// tmp = pmax(
99-
// (a, b, c, d, e, f, g, h),
100-
// (-, -, -, -, -, -, -, -)
101-
// ) => (a, c, e, g, -, -, -, -)
102-
let tmp = $vpmax(transmute(self), crate::mem::uninitialized());
103-
// tmp = pmax(
104-
// (a, c, e, g, -, -, -, -),
105-
// (-, -, -, -, -, -, -, -)
106-
// ) => (c, g, -, -, -, -, -, -)
107-
let tmp = $vpmax(tmp, crate::mem::uninitialized());
108-
// tmp = pmax(
109-
// (c, g, -, -, -, -, -, -),
110-
// (-, -, -, -, -, -, -, -)
111-
// ) => (g, -, -, -, -, -, -, -).0 => g
112-
let tmp: $id = transmute($vpmax(tmp, crate::mem::uninitialized()));
113-
tmp.extract(0)
114-
}
115-
}
116-
};
117-
}
118-
1193
/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with
1204
/// more than two elements.
1215
macro_rules! arm_128_v7_neon_impl {
@@ -161,10 +45,6 @@ macro_rules! arm_128_v7_neon_impl {
16145

16246
/// Mask reduction implementation for `arm` targets
16347
macro_rules! impl_mask_reductions {
164-
// 64-bit wide masks
165-
(m8x8) => { arm_m8x8_v7_neon_impl!(m8x8, vpmin_u8, vpmax_u8); };
166-
(m16x4) => { arm_m16x4_v7_neon_impl!(m16x4, vpmin_u16, vpmax_u16); };
167-
(m32x2) => { arm_m32x2_v7_neon_impl!(m32x2, vpmin_u32, vpmax_u32); };
16848
// 128-bit wide masks
16949
(m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); };
17050
(m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); };

0 commit comments

Comments
 (0)