@@ -673,6 +673,54 @@ namespace xsimd
673673 return _mm_castsi128_pd (_mm_cmpeq_epi32 (_mm_castpd_si128 (self), _mm_castpd_si128 (other)));
674674 }
675675
676+ // first
677+ template <class A >
678+ XSIMD_INLINE float first (batch<float , A> const & self, requires_arch<sse2>) noexcept
679+ {
680+ return _mm_cvtss_f32 (self);
681+ }
682+
683+ template <class A >
684+ XSIMD_INLINE double first (batch<double , A> const & self, requires_arch<sse2>) noexcept
685+ {
686+ return _mm_cvtsd_f64 (self);
687+ }
688+
689+ template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
690+ XSIMD_INLINE T first (batch<T, A> const & self, requires_arch<sse2>) noexcept
691+ {
692+ XSIMD_IF_CONSTEXPR (sizeof (T) == 1 )
693+ {
694+ return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFF );
695+ }
696+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 2 )
697+ {
698+ return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFFFF );
699+ }
700+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
701+ {
702+ return static_cast <T>(_mm_cvtsi128_si32 (self));
703+ }
704+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
705+ {
706+ #if defined(__x86_64__)
707+ return static_cast <T>(_mm_cvtsi128_si64 (self));
708+ #else
709+ __m128i m;
710+ _mm_storel_epi64 (&m, self);
711+ int64_t i;
712+ std::memcpy (&i, &m, sizeof (i));
713+ return i;
714+ #endif
715+ }
716+ else
717+ {
718+ assert (false && " unsupported arch/op combination" );
719+ return {};
720+ }
721+ }
722+
723+
676724 // from_mask
677725 template <class A >
678726 XSIMD_INLINE batch_bool<float , A> from_mask (batch_bool<float , A> const &, uint64_t mask, requires_arch<sse2>) noexcept
@@ -1269,10 +1317,10 @@ namespace xsimd
12691317 batch<T, A> step2 = _mm_shufflelo_epi16 (acc1, mask2);
12701318 batch<T, A> acc2 = max (acc1, step2);
12711319 if (sizeof (T) == 2 )
1272- return acc2. get ( 0 );
1320+ return first (acc2, A{} );
12731321 batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t >(acc2) >> 8 );
12741322 batch<T, A> acc3 = max (acc2, step3);
1275- return acc3. get ( 0 );
1323+ return first (acc3, A{} );
12761324 }
12771325
12781326 // reduce_min
@@ -1291,10 +1339,10 @@ namespace xsimd
12911339 batch<T, A> step2 = _mm_shufflelo_epi16 (acc1, mask2);
12921340 batch<T, A> acc2 = min (acc1, step2);
12931341 if (sizeof (T) == 2 )
1294- return acc2. get ( 0 );
1342+ return first (acc2, A{} );
12951343 batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t >(acc2) >> 8 );
12961344 batch<T, A> acc3 = min (acc2, step3);
1297- return acc3. get ( 0 );
1345+ return first (acc3, A{} );
12981346 }
12991347
13001348 // rsqrt
@@ -1783,53 +1831,6 @@ namespace xsimd
17831831 return _mm_unpacklo_pd (self, other);
17841832 }
17851833
1786- // first
1787- template <class A >
1788- XSIMD_INLINE float first (batch<float , A> const & self, requires_arch<sse2>) noexcept
1789- {
1790- return _mm_cvtss_f32 (self);
1791- }
1792-
1793- template <class A >
1794- XSIMD_INLINE double first (batch<double , A> const & self, requires_arch<sse2>) noexcept
1795- {
1796- return _mm_cvtsd_f64 (self);
1797- }
1798-
1799- template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
1800- XSIMD_INLINE T first (batch<T, A> const & self, requires_arch<sse2>) noexcept
1801- {
1802- XSIMD_IF_CONSTEXPR (sizeof (T) == 1 )
1803- {
1804- return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFF );
1805- }
1806- else XSIMD_IF_CONSTEXPR (sizeof (T) == 2 )
1807- {
1808- return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFFFF );
1809- }
1810- else XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
1811- {
1812- return static_cast <T>(_mm_cvtsi128_si32 (self));
1813- }
1814- else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
1815- {
1816- #if defined(__x86_64__)
1817- return static_cast <T>(_mm_cvtsi128_si64 (self));
1818- #else
1819- __m128i m;
1820- _mm_storel_epi64 (&m, self);
1821- int64_t i;
1822- std::memcpy (&i, &m, sizeof (i));
1823- return i;
1824- #endif
1825- }
1826- else
1827- {
1828- assert (false && " unsupported arch/op combination" );
1829- return {};
1830- }
1831- }
1832-
18331834 }
18341835}
18351836
0 commit comments