Skip to content

Commit 09d7dec

Browse files
authored
Added <0, 0, 0, 0> <1, 1, 1, 1> and <3, 3, 3, 3> to NeonShuffleFloat32x4 (#1989)
1 parent d2366a7 commit 09d7dec

1 file changed

Lines changed: 18 additions & 0 deletions

File tree

Jolt/Core/ARMNeon.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@
3737
}
3838

3939
// Specializations
40+
template <>
41+
JPH_INLINE float32x4_t NeonShuffleFloat32x4<0, 0, 0, 0>(float32x4_t inV1, float32x4_t inV2)
42+
{
43+
return vdupq_laneq_f32(inV1, 0);
44+
}
45+
4046
template <>
4147
JPH_INLINE float32x4_t NeonShuffleFloat32x4<0, 1, 0, 0>(float32x4_t inV1, float32x4_t inV2)
4248
{
@@ -139,6 +145,12 @@
139145
return vrev64q_f32(inV1);
140146
}
141147

148+
template <>
149+
JPH_INLINE float32x4_t NeonShuffleFloat32x4<1, 1, 1, 1>(float32x4_t inV1, float32x4_t inV2)
150+
{
151+
return vdupq_laneq_f32(inV1, 1);
152+
}
153+
142154
template <>
143155
JPH_INLINE float32x4_t NeonShuffleFloat32x4<1, 1, 2, 2>(float32x4_t inV1, float32x4_t inV2)
144156
{
@@ -308,6 +320,12 @@
308320
return vcombine_f32(zy, zy);
309321
}
310322

323+
template <>
324+
JPH_INLINE float32x4_t NeonShuffleFloat32x4<3, 3, 3, 3>(float32x4_t inV1, float32x4_t inV2)
325+
{
326+
return vdupq_laneq_f32(inV1, 3);
327+
}
328+
311329
// Shuffle a vector
312330
#define JPH_NEON_SHUFFLE_F32x4(vec1, vec2, index1, index2, index3, index4) NeonShuffleFloat32x4<index1, index2, index3, index4>(vec1, vec2)
313331
#define JPH_NEON_SHUFFLE_U32x4(vec1, vec2, index1, index2, index3, index4) vreinterpretq_u32_f32((NeonShuffleFloat32x4<index1, index2, index3, index4>(vreinterpretq_f32_u32(vec1), vreinterpretq_f32_u32(vec2))))

0 commit comments

Comments
 (0)