Skip to content

Commit b1d1eb2

Browse files
Use xsimd::first in reduction implementation
Provide specialization for all remaining architectures.
1 parent 5bb5320 commit b1d1eb2

File tree

6 files changed

+115
-52
lines changed

6 files changed

+115
-52
lines changed

include/xsimd/arch/common/xsimd_common_math.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2117,7 +2117,7 @@ namespace xsimd
21172117
template <class Op, class A, class T>
21182118
XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>) noexcept
21192119
{
2120-
return self.get(0);
2120+
return ::xsimd::kernel::first(self, A {});
21212121
}
21222122

21232123
template <class Op, class A, class T, unsigned Lvl>

include/xsimd/arch/xsimd_emulated.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,13 @@ namespace xsimd
230230
return r;
231231
}
232232

233+
// first
234+
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
235+
T XSIMD_INLINE first(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
236+
{
237+
return self.data[0];
238+
}
239+
233240
#if 0
234241
// count
235242
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>

include/xsimd/arch/xsimd_rvv.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,19 @@ namespace xsimd
13351335
return result;
13361336
}
13371337

1338+
// first
1339+
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1340+
XSIMD_INLINE T first(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1341+
{
1342+
return detail::rvvmv_lane0(arg);
1343+
}
1344+
1345+
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1346+
XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& arg, requires_arch<rvv>) noexcept
1347+
{
1348+
return std::complex<T> { detail::rvvmv_lane0(arg.real()), detail::rvvmv_lane0(arg.imag()) };
1349+
}
1350+
13381351
// insert
13391352
template <class A, class T, size_t I, detail::rvv_enable_all_t<T> = 0>
13401353
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<rvv>) noexcept

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,53 @@ namespace xsimd
673673
return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other)));
674674
}
675675

676+
// first
677+
template <class A>
678+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
679+
{
680+
return _mm_cvtss_f32(self);
681+
}
682+
683+
template <class A>
684+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
685+
{
686+
return _mm_cvtsd_f64(self);
687+
}
688+
689+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
690+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
691+
{
692+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
693+
{
694+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
695+
}
696+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
697+
{
698+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
699+
}
700+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
701+
{
702+
return static_cast<T>(_mm_cvtsi128_si32(self));
703+
}
704+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
705+
{
706+
#if defined(__x86_64__)
707+
return static_cast<T>(_mm_cvtsi128_si64(self));
708+
#else
709+
__m128i m;
710+
_mm_storel_epi64(&m, self);
711+
int64_t i;
712+
std::memcpy(&i, &m, sizeof(i));
713+
return i;
714+
#endif
715+
}
716+
else
717+
{
718+
assert(false && "unsupported arch/op combination");
719+
return {};
720+
}
721+
}
722+
676723
// from_mask
677724
template <class A>
678725
XSIMD_INLINE batch_bool<float, A> from_mask(batch_bool<float, A> const&, uint64_t mask, requires_arch<sse2>) noexcept
@@ -1269,10 +1316,10 @@ namespace xsimd
12691316
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
12701317
batch<T, A> acc2 = max(acc1, step2);
12711318
if (sizeof(T) == 2)
1272-
return acc2.get(0);
1319+
return first(acc2, A {});
12731320
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
12741321
batch<T, A> acc3 = max(acc2, step3);
1275-
return acc3.get(0);
1322+
return first(acc3, A {});
12761323
}
12771324

12781325
// reduce_min
@@ -1291,10 +1338,10 @@ namespace xsimd
12911338
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
12921339
batch<T, A> acc2 = min(acc1, step2);
12931340
if (sizeof(T) == 2)
1294-
return acc2.get(0);
1341+
return first(acc2, A {});
12951342
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
12961343
batch<T, A> acc3 = min(acc2, step3);
1297-
return acc3.get(0);
1344+
return first(acc3, A {});
12981345
}
12991346

13001347
// rsqrt
@@ -1783,53 +1830,6 @@ namespace xsimd
17831830
return _mm_unpacklo_pd(self, other);
17841831
}
17851832

1786-
// first
1787-
template <class A>
1788-
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
1789-
{
1790-
return _mm_cvtss_f32(self);
1791-
}
1792-
1793-
template <class A>
1794-
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
1795-
{
1796-
return _mm_cvtsd_f64(self);
1797-
}
1798-
1799-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1800-
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
1801-
{
1802-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1803-
{
1804-
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
1805-
}
1806-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1807-
{
1808-
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
1809-
}
1810-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1811-
{
1812-
return static_cast<T>(_mm_cvtsi128_si32(self));
1813-
}
1814-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1815-
{
1816-
#if defined(__x86_64__)
1817-
return static_cast<T>(_mm_cvtsi128_si64(self));
1818-
#else
1819-
__m128i m;
1820-
_mm_storel_epi64(&m, self);
1821-
int64_t i;
1822-
std::memcpy(&i, &m, sizeof(i));
1823-
return i;
1824-
#endif
1825-
}
1826-
else
1827-
{
1828-
assert(false && "unsupported arch/op combination");
1829-
return {};
1830-
}
1831-
}
1832-
18331833
}
18341834
}
18351835

include/xsimd/arch/xsimd_sve.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,13 @@ namespace xsimd
949949
return svsel(index_predicate, broadcast<A, T>(val, sve {}), arg);
950950
}
951951

952+
// first
953+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
954+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sve>) noexcept
955+
{
956+
return self.data[0];
957+
}
958+
952959
// all
953960
template <class A, class T, detail::sve_enable_all_t<T> = 0>
954961
XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept

include/xsimd/arch/xsimd_wasm.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,42 @@ namespace xsimd
531531
static_cast<int32_t>(wasm_f32x4_extract_lane(self, 3)));
532532
}
533533
}
534+
// first
535+
template <class A>
536+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<wasm>) noexcept
537+
{
538+
return wasm_f32x4_extract_lane(self, 0);
539+
}
540+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
541+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<wasm>) noexcept
542+
{
543+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
544+
{
545+
return wasm_i8x16_extract_lane(self, 0);
546+
}
547+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
548+
{
549+
return wasm_i16x8_extract_lane(self, 0);
550+
}
551+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
552+
{
553+
return wasm_i32x4_extract_lane(self, 0);
554+
}
555+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
556+
{
557+
return wasm_i64x2_extract_lane(self, 0);
558+
}
559+
else
560+
{
561+
assert(false && "unsupported arch/op combination");
562+
return {};
563+
}
564+
}
565+
template <class A>
566+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<wasm>) noexcept
567+
{
568+
return wasm_f64x2_extract_lane(self, 0);
569+
}
534570

535571
// floor
536572
template <class A>

0 commit comments

Comments
 (0)