diff --git a/include/xsimd/arch/common/xsimd_common_memory.hpp b/include/xsimd/arch/common/xsimd_common_memory.hpp index 773177233..4ad148a6f 100644 --- a/include/xsimd/arch/common/xsimd_common_memory.hpp +++ b/include/xsimd/arch/common/xsimd_common_memory.hpp @@ -260,6 +260,25 @@ namespace xsimd return buffer[i]; } + // first + template + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + return get(self, 0, common {}); + } + + template + XSIMD_INLINE T first(batch_bool const& self, requires_arch) noexcept + { + return first(batch(self), A {}); + } + + template + XSIMD_INLINE auto first(batch, A> const& self, requires_arch) noexcept -> typename batch, A>::value_type + { + return { first(self.real(), A {}), first(self.imag(), A {}) }; + } + // load template XSIMD_INLINE batch_bool load_unaligned(bool const* mem, batch_bool, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index 883f055be..76eed49c2 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -20,7 +20,6 @@ namespace xsimd { - namespace kernel { using namespace types; @@ -1861,6 +1860,46 @@ namespace xsimd auto hi = _mm256_unpackhi_pd(self, other); return _mm256_insertf128_pd(lo, _mm256_castpd256_pd128(hi), 1); } + + // first + template + XSIMD_INLINE float first(batch const& self, requires_arch) noexcept + { + return _mm256_cvtss_f32(self); + } + + template + XSIMD_INLINE double first(batch const& self, requires_arch) noexcept + { + return _mm256_cvtsd_f64(self); + } + + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return static_cast(_mm256_cvtsi256_si32(self) & 0xFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return static_cast(_mm256_cvtsi256_si32(self) & 0xFFFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return static_cast(_mm256_cvtsi256_si32(self)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + batch low = _mm256_castsi256_si128(self); + return first(low, sse4_2 {}); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } } } diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index 26947dffc..a2fc88616 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -2339,6 +2339,46 @@ namespace xsimd 2)); } + // first + template + XSIMD_INLINE float first(batch const& self, requires_arch) noexcept + { + return _mm512_cvtss_f32(self); + } + + template + XSIMD_INLINE double first(batch const& self, requires_arch) noexcept + { + return _mm512_cvtsd_f64(self); + } + + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return static_cast(_mm512_cvtsi512_si32(self) & 0xFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return static_cast(_mm512_cvtsi512_si32(self) & 0xFFFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return static_cast(_mm512_cvtsi512_si32(self)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + batch low = _mm512_castsi512_si128(self); + return first(low, sse4_2 {}); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + } } diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 94d7af5d4..59a39363b 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -1782,6 +1782,54 @@ namespace xsimd { return _mm_unpacklo_pd(self, other); } + + // first + template + XSIMD_INLINE float first(batch const& self, requires_arch) noexcept + { + return _mm_cvtss_f32(self); + } + + template + XSIMD_INLINE double first(batch const& self, requires_arch) noexcept + { + return _mm_cvtsd_f64(self); + } + + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return static_cast(_mm_cvtsi128_si32(self) & 0xFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return static_cast(_mm_cvtsi128_si32(self) & 0xFFFF); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return static_cast(_mm_cvtsi128_si32(self)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { +#if defined(__x86_64__) + return static_cast(_mm_cvtsi128_si64(self)); +#else + __m128i m; + _mm_storel_epi64(&m, self); + int64_t i; + std::memcpy(&i, &m, sizeof(i)); + return i; +#endif + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + } } diff --git a/include/xsimd/types/xsimd_batch.hpp b/include/xsimd/types/xsimd_batch.hpp index b54d84aae..0a6a07153 100644 --- a/include/xsimd/types/xsimd_batch.hpp +++ b/include/xsimd/types/xsimd_batch.hpp @@ -159,6 +159,8 @@ namespace xsimd XSIMD_INLINE T get(std::size_t i) const noexcept; + XSIMD_INLINE T first() const noexcept; + // comparison operators. Defined as friend to enable automatic // conversion of parameters from scalar to batch, at the cost of using a // proxy implementation from details::. @@ -314,6 +316,8 @@ namespace xsimd XSIMD_INLINE bool get(std::size_t i) const noexcept; + XSIMD_INLINE bool first() const noexcept; + // mask operations XSIMD_INLINE uint64_t mask() const noexcept; XSIMD_INLINE static batch_bool from_mask(uint64_t mask) noexcept; @@ -405,6 +409,8 @@ namespace xsimd XSIMD_INLINE value_type get(std::size_t i) const noexcept; + XSIMD_INLINE value_type first() const noexcept; + #ifdef XSIMD_ENABLE_XTL_COMPLEX // xtl-related methods template @@ -693,6 +699,16 @@ namespace xsimd return kernel::get(*this, i, A {}); } + /** + * Retrieve the first scalar element in this batch. + */ + template + XSIMD_INLINE T batch::first() const noexcept + { + detail::static_check_supported_config(); + return kernel::first(*this, A {}); + } + /****************************** * batch comparison operators * ******************************/ @@ -1005,6 +1021,13 @@ namespace xsimd return kernel::get(*this, i, A {}); } + template + XSIMD_INLINE bool batch_bool::first() const noexcept + { + detail::static_check_supported_config(); + return kernel::first(*this, A {}); + } + /*********************************** * batch_bool comparison operators * ***********************************/ @@ -1248,6 +1271,13 @@ namespace xsimd return kernel::get(*this, i, A {}); } + template + XSIMD_INLINE auto batch, A>::first() const noexcept -> value_type + { + detail::static_check_supported_config, A>(); + return kernel::first(*this, A {}); + } + /************************************** * batch xtl-related methods * **************************************/ diff --git a/test/test_batch.cpp b/test/test_batch.cpp index 05c13b4b8..394779072 100644 --- a/test/test_batch.cpp +++ b/test/test_batch.cpp @@ -152,6 +152,12 @@ struct batch_test } } + void test_first_element() const + { + batch_type res = batch_lhs(); + CHECK_EQ(res.first(), lhs[0]); + } + void test_arithmetic() const { // +batch @@ -934,6 +940,11 @@ TEST_CASE_TEMPLATE("[batch]", B, BATCH_TYPES) Test.test_access_operator(); } + SUBCASE("first element") + { + Test.test_first_element(); + } + SUBCASE("arithmetic") { Test.test_arithmetic(); diff --git a/test/test_batch_complex.cpp b/test/test_batch_complex.cpp index e06b31807..47ed9ca5b 100644 --- a/test/test_batch_complex.cpp +++ b/test/test_batch_complex.cpp @@ -176,6 +176,12 @@ struct batch_complex_test } } + void test_first_element() const + { + batch_type res = batch_lhs(); + CHECK_EQ(res.first(), lhs[0]); + } + void test_arithmetic() const { // +batch @@ -675,6 +681,8 @@ TEST_CASE_TEMPLATE("[xsimd complex batches]", B, BATCH_COMPLEX_TYPES) SUBCASE("access_operator") { Test.test_access_operator(); } + SUBCASE("first element") { Test.test_first_element(); } + SUBCASE("arithmetic") { Test.test_arithmetic(); } SUBCASE("computed_assignment") { Test.test_computed_assignment(); }