Skip to content

Commit 3b478a3

Browse files
committed
Merge remote-tracking branch 'xsimd/master' into improving-swizzle
2 parents b0a84e7 + 9f587fc commit 3b478a3

File tree

6 files changed

+115
-52
lines changed

6 files changed

+115
-52
lines changed

include/xsimd/arch/common/xsimd_common_math.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2117,7 +2117,7 @@ namespace xsimd
21172117
template <class Op, class A, class T>
21182118
XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>) noexcept
21192119
{
2120-
return self.get(0);
2120+
return ::xsimd::kernel::first(self, A {});
21212121
}
21222122

21232123
template <class Op, class A, class T, unsigned Lvl>

include/xsimd/arch/xsimd_emulated.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,13 @@ namespace xsimd
230230
return r;
231231
}
232232

233+
// first
234+
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
235+
T XSIMD_INLINE first(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
236+
{
237+
return self.data[0];
238+
}
239+
233240
#if 0
234241
// count
235242
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>

include/xsimd/arch/xsimd_rvv.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,19 @@ namespace xsimd
13351335
return result;
13361336
}
13371337

1338+
// first
1339+
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1340+
XSIMD_INLINE T first(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1341+
{
1342+
return detail::rvvmv_lane0(arg);
1343+
}
1344+
1345+
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1346+
XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& arg, requires_arch<rvv>) noexcept
1347+
{
1348+
return std::complex<T> { detail::rvvmv_lane0(arg.real()), detail::rvvmv_lane0(arg.imag()) };
1349+
}
1350+
13381351
// insert
13391352
template <class A, class T, size_t I, detail::rvv_enable_all_t<T> = 0>
13401353
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<rvv>) noexcept

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,53 @@ namespace xsimd
651651
return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other)));
652652
}
653653

654+
// first
655+
template <class A>
656+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
657+
{
658+
return _mm_cvtss_f32(self);
659+
}
660+
661+
template <class A>
662+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
663+
{
664+
return _mm_cvtsd_f64(self);
665+
}
666+
667+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
668+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
669+
{
670+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
671+
{
672+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
673+
}
674+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
675+
{
676+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
677+
}
678+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
679+
{
680+
return static_cast<T>(_mm_cvtsi128_si32(self));
681+
}
682+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
683+
{
684+
#if defined(__x86_64__)
685+
return static_cast<T>(_mm_cvtsi128_si64(self));
686+
#else
687+
__m128i m;
688+
_mm_storel_epi64(&m, self);
689+
int64_t i;
690+
std::memcpy(&i, &m, sizeof(i));
691+
return i;
692+
#endif
693+
}
694+
else
695+
{
696+
assert(false && "unsupported arch/op combination");
697+
return {};
698+
}
699+
}
700+
654701
// from_mask
655702
template <class A>
656703
XSIMD_INLINE batch_bool<float, A> from_mask(batch_bool<float, A> const&, uint64_t mask, requires_arch<sse2>) noexcept
@@ -1247,10 +1294,10 @@ namespace xsimd
12471294
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
12481295
batch<T, A> acc2 = max(acc1, step2);
12491296
if (sizeof(T) == 2)
1250-
return acc2.get(0);
1297+
return first(acc2, A {});
12511298
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
12521299
batch<T, A> acc3 = max(acc2, step3);
1253-
return acc3.get(0);
1300+
return first(acc3, A {});
12541301
}
12551302

12561303
// reduce_min
@@ -1269,10 +1316,10 @@ namespace xsimd
12691316
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
12701317
batch<T, A> acc2 = min(acc1, step2);
12711318
if (sizeof(T) == 2)
1272-
return acc2.get(0);
1319+
return first(acc2, A {});
12731320
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
12741321
batch<T, A> acc3 = min(acc2, step3);
1275-
return acc3.get(0);
1322+
return first(acc3, A {});
12761323
}
12771324

12781325
// rsqrt
@@ -1811,53 +1858,6 @@ namespace xsimd
18111858
{
18121859
return _mm_unpacklo_pd(self, other);
18131860
}
1814-
1815-
// first
1816-
template <class A>
1817-
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
1818-
{
1819-
return _mm_cvtss_f32(self);
1820-
}
1821-
1822-
template <class A>
1823-
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
1824-
{
1825-
return _mm_cvtsd_f64(self);
1826-
}
1827-
1828-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1829-
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
1830-
{
1831-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1832-
{
1833-
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
1834-
}
1835-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1836-
{
1837-
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
1838-
}
1839-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1840-
{
1841-
return static_cast<T>(_mm_cvtsi128_si32(self));
1842-
}
1843-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1844-
{
1845-
#if defined(__x86_64__)
1846-
return static_cast<T>(_mm_cvtsi128_si64(self));
1847-
#else
1848-
__m128i m;
1849-
_mm_storel_epi64(&m, self);
1850-
int64_t i;
1851-
std::memcpy(&i, &m, sizeof(i));
1852-
return i;
1853-
#endif
1854-
}
1855-
else
1856-
{
1857-
assert(false && "unsupported arch/op combination");
1858-
return {};
1859-
}
1860-
}
18611861
}
18621862
}
18631863

include/xsimd/arch/xsimd_sve.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,13 @@ namespace xsimd
949949
return svsel(index_predicate, broadcast<A, T>(val, sve {}), arg);
950950
}
951951

952+
// first
953+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
954+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sve>) noexcept
955+
{
956+
return self.data[0];
957+
}
958+
952959
// all
953960
template <class A, class T, detail::sve_enable_all_t<T> = 0>
954961
XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept

include/xsimd/arch/xsimd_wasm.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,42 @@ namespace xsimd
531531
static_cast<int32_t>(wasm_f32x4_extract_lane(self, 3)));
532532
}
533533
}
534+
// first
535+
template <class A>
536+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<wasm>) noexcept
537+
{
538+
return wasm_f32x4_extract_lane(self, 0);
539+
}
540+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
541+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<wasm>) noexcept
542+
{
543+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
544+
{
545+
return wasm_i8x16_extract_lane(self, 0);
546+
}
547+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
548+
{
549+
return wasm_i16x8_extract_lane(self, 0);
550+
}
551+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
552+
{
553+
return wasm_i32x4_extract_lane(self, 0);
554+
}
555+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
556+
{
557+
return wasm_i64x2_extract_lane(self, 0);
558+
}
559+
else
560+
{
561+
assert(false && "unsupported arch/op combination");
562+
return {};
563+
}
564+
}
565+
template <class A>
566+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<wasm>) noexcept
567+
{
568+
return wasm_f64x2_extract_lane(self, 0);
569+
}
534570

535571
// floor
536572
template <class A>

0 commit comments

Comments
 (0)