Skip to content

Commit ff1dc67

Browse files
authored
Added first() function for SSE/AVX/AVX512F (#1134)
* add first() function to extract the first lane from a batch. Implemented only for x86_64 architecture
1 parent a64668d commit ff1dc67

File tree

7 files changed

+196
-1
lines changed

7 files changed

+196
-1
lines changed

include/xsimd/arch/common/xsimd_common_memory.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,25 @@ namespace xsimd
260260
return buffer[i];
261261
}
262262

263+
// first
264+
template <class A, class T>
265+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<common>) noexcept
266+
{
267+
return get(self, 0, common {});
268+
}
269+
270+
template <class A, class T>
271+
XSIMD_INLINE T first(batch_bool<T, A> const& self, requires_arch<common>) noexcept
272+
{
273+
return first(batch<T, A>(self), A {});
274+
}
275+
276+
template <class A, class T>
277+
XSIMD_INLINE auto first(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept -> typename batch<std::complex<T>, A>::value_type
278+
{
279+
return { first(self.real(), A {}), first(self.imag(), A {}) };
280+
}
281+
263282
// load
264283
template <class A, class T>
265284
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<common>) noexcept

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
namespace xsimd
2222
{
23-
2423
namespace kernel
2524
{
2625
using namespace types;
@@ -1861,6 +1860,46 @@ namespace xsimd
18611860
auto hi = _mm256_unpackhi_pd(self, other);
18621861
return _mm256_insertf128_pd(lo, _mm256_castpd256_pd128(hi), 1);
18631862
}
1863+
1864+
// first
1865+
template <class A>
1866+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx>) noexcept
1867+
{
1868+
return _mm256_cvtss_f32(self);
1869+
}
1870+
1871+
template <class A>
1872+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx>) noexcept
1873+
{
1874+
return _mm256_cvtsd_f64(self);
1875+
}
1876+
1877+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1878+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx>) noexcept
1879+
{
1880+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1881+
{
1882+
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFF);
1883+
}
1884+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1885+
{
1886+
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFFFF);
1887+
}
1888+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1889+
{
1890+
return static_cast<T>(_mm256_cvtsi256_si32(self));
1891+
}
1892+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1893+
{
1894+
batch<T, sse4_2> low = _mm256_castsi256_si128(self);
1895+
return first(low, sse4_2 {});
1896+
}
1897+
else
1898+
{
1899+
assert(false && "unsupported arch/op combination");
1900+
return {};
1901+
}
1902+
}
18641903
}
18651904
}
18661905

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2351,6 +2351,46 @@ namespace xsimd
23512351
2));
23522352
}
23532353

2354+
// first
2355+
template <class A>
2356+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx512f>) noexcept
2357+
{
2358+
return _mm512_cvtss_f32(self);
2359+
}
2360+
2361+
template <class A>
2362+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx512f>) noexcept
2363+
{
2364+
return _mm512_cvtsd_f64(self);
2365+
}
2366+
2367+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2368+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx512f>) noexcept
2369+
{
2370+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
2371+
{
2372+
return static_cast<T>(_mm512_cvtsi512_si32(self) & 0xFF);
2373+
}
2374+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
2375+
{
2376+
return static_cast<T>(_mm512_cvtsi512_si32(self) & 0xFFFF);
2377+
}
2378+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
2379+
{
2380+
return static_cast<T>(_mm512_cvtsi512_si32(self));
2381+
}
2382+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
2383+
{
2384+
batch<T, sse4_2> low = _mm512_castsi512_si128(self);
2385+
return first(low, sse4_2 {});
2386+
}
2387+
else
2388+
{
2389+
assert(false && "unsupported arch/op combination");
2390+
return {};
2391+
}
2392+
}
2393+
23542394
}
23552395

23562396
}

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,54 @@ namespace xsimd
17821782
{
17831783
return _mm_unpacklo_pd(self, other);
17841784
}
1785+
1786+
// first
1787+
template <class A>
1788+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
1789+
{
1790+
return _mm_cvtss_f32(self);
1791+
}
1792+
1793+
template <class A>
1794+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
1795+
{
1796+
return _mm_cvtsd_f64(self);
1797+
}
1798+
1799+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1800+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
1801+
{
1802+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1803+
{
1804+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
1805+
}
1806+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1807+
{
1808+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
1809+
}
1810+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1811+
{
1812+
return static_cast<T>(_mm_cvtsi128_si32(self));
1813+
}
1814+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1815+
{
1816+
#if defined(__x86_64__)
1817+
return static_cast<T>(_mm_cvtsi128_si64(self));
1818+
#else
1819+
__m128i m;
1820+
_mm_storel_epi64(&m, self);
1821+
int64_t i;
1822+
std::memcpy(&i, &m, sizeof(i));
1823+
return i;
1824+
#endif
1825+
}
1826+
else
1827+
{
1828+
assert(false && "unsupported arch/op combination");
1829+
return {};
1830+
}
1831+
}
1832+
17851833
}
17861834
}
17871835

include/xsimd/types/xsimd_batch.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ namespace xsimd
159159

160160
XSIMD_INLINE T get(std::size_t i) const noexcept;
161161

162+
XSIMD_INLINE T first() const noexcept;
163+
162164
// comparison operators. Defined as friend to enable automatic
163165
// conversion of parameters from scalar to batch, at the cost of using a
164166
// proxy implementation from details::.
@@ -314,6 +316,8 @@ namespace xsimd
314316

315317
XSIMD_INLINE bool get(std::size_t i) const noexcept;
316318

319+
XSIMD_INLINE bool first() const noexcept;
320+
317321
// mask operations
318322
XSIMD_INLINE uint64_t mask() const noexcept;
319323
XSIMD_INLINE static batch_bool from_mask(uint64_t mask) noexcept;
@@ -405,6 +409,8 @@ namespace xsimd
405409

406410
XSIMD_INLINE value_type get(std::size_t i) const noexcept;
407411

412+
XSIMD_INLINE value_type first() const noexcept;
413+
408414
#ifdef XSIMD_ENABLE_XTL_COMPLEX
409415
// xtl-related methods
410416
template <bool i3ec>
@@ -693,6 +699,16 @@ namespace xsimd
693699
return kernel::get(*this, i, A {});
694700
}
695701

702+
/**
703+
* Retrieve the first scalar element in this batch.
704+
*/
705+
template <class T, class A>
706+
XSIMD_INLINE T batch<T, A>::first() const noexcept
707+
{
708+
detail::static_check_supported_config<T, A>();
709+
return kernel::first(*this, A {});
710+
}
711+
696712
/******************************
697713
* batch comparison operators *
698714
******************************/
@@ -1005,6 +1021,13 @@ namespace xsimd
10051021
return kernel::get(*this, i, A {});
10061022
}
10071023

1024+
template <class T, class A>
1025+
XSIMD_INLINE bool batch_bool<T, A>::first() const noexcept
1026+
{
1027+
detail::static_check_supported_config<T, A>();
1028+
return kernel::first(*this, A {});
1029+
}
1030+
10081031
/***********************************
10091032
* batch_bool comparison operators *
10101033
***********************************/
@@ -1248,6 +1271,13 @@ namespace xsimd
12481271
return kernel::get(*this, i, A {});
12491272
}
12501273

1274+
template <class T, class A>
1275+
XSIMD_INLINE auto batch<std::complex<T>, A>::first() const noexcept -> value_type
1276+
{
1277+
detail::static_check_supported_config<std::complex<T>, A>();
1278+
return kernel::first(*this, A {});
1279+
}
1280+
12511281
/**************************************
12521282
* batch<complex> xtl-related methods *
12531283
**************************************/

test/test_batch.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ struct batch_test
152152
}
153153
}
154154

155+
void test_first_element() const
156+
{
157+
batch_type res = batch_lhs();
158+
CHECK_EQ(res.first(), lhs[0]);
159+
}
160+
155161
void test_arithmetic() const
156162
{
157163
// +batch
@@ -948,6 +954,11 @@ TEST_CASE_TEMPLATE("[batch]", B, BATCH_TYPES)
948954
Test.test_access_operator();
949955
}
950956

957+
SUBCASE("first element")
958+
{
959+
Test.test_first_element();
960+
}
961+
951962
SUBCASE("arithmetic")
952963
{
953964
Test.test_arithmetic();

test/test_batch_complex.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ struct batch_complex_test
176176
}
177177
}
178178

179+
void test_first_element() const
180+
{
181+
batch_type res = batch_lhs();
182+
CHECK_EQ(res.first(), lhs[0]);
183+
}
184+
179185
void test_arithmetic() const
180186
{
181187
// +batch
@@ -675,6 +681,8 @@ TEST_CASE_TEMPLATE("[xsimd complex batches]", B, BATCH_COMPLEX_TYPES)
675681

676682
SUBCASE("access_operator") { Test.test_access_operator(); }
677683

684+
SUBCASE("first element") { Test.test_first_element(); }
685+
678686
SUBCASE("arithmetic") { Test.test_arithmetic(); }
679687

680688
SUBCASE("computed_assignment") { Test.test_computed_assignment(); }

0 commit comments

Comments
 (0)