@@ -651,6 +651,53 @@ namespace xsimd
651651 return _mm_castsi128_pd (_mm_cmpeq_epi32 (_mm_castpd_si128 (self), _mm_castpd_si128 (other)));
652652 }
653653
654+ // first
655+ template <class A >
656+ XSIMD_INLINE float first (batch<float , A> const & self, requires_arch<sse2>) noexcept
657+ {
658+ return _mm_cvtss_f32 (self);
659+ }
660+
661+ template <class A >
662+ XSIMD_INLINE double first (batch<double , A> const & self, requires_arch<sse2>) noexcept
663+ {
664+ return _mm_cvtsd_f64 (self);
665+ }
666+
667+ template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
668+ XSIMD_INLINE T first (batch<T, A> const & self, requires_arch<sse2>) noexcept
669+ {
670+ XSIMD_IF_CONSTEXPR (sizeof (T) == 1 )
671+ {
672+ return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFF );
673+ }
674+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 2 )
675+ {
676+ return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFFFF );
677+ }
678+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
679+ {
680+ return static_cast <T>(_mm_cvtsi128_si32 (self));
681+ }
682+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
683+ {
684+ #if defined(__x86_64__)
685+ return static_cast <T>(_mm_cvtsi128_si64 (self));
686+ #else
687+ __m128i m;
688+ _mm_storel_epi64 (&m, self);
689+ int64_t i;
690+ std::memcpy (&i, &m, sizeof (i));
691+ return i;
692+ #endif
693+ }
694+ else
695+ {
696+ assert (false && " unsupported arch/op combination" );
697+ return {};
698+ }
699+ }
700+
654701 // from_mask
655702 template <class A >
656703 XSIMD_INLINE batch_bool<float , A> from_mask (batch_bool<float , A> const &, uint64_t mask, requires_arch<sse2>) noexcept
@@ -1247,10 +1294,10 @@ namespace xsimd
12471294 batch<T, A> step2 = _mm_shufflelo_epi16 (acc1, mask2);
12481295 batch<T, A> acc2 = max (acc1, step2);
12491296 if (sizeof (T) == 2 )
1250- return acc2. get ( 0 );
1297+ return first (acc2, A {} );
12511298 batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t >(acc2) >> 8 );
12521299 batch<T, A> acc3 = max (acc2, step3);
1253- return acc3. get ( 0 );
1300+ return first (acc3, A {} );
12541301 }
12551302
12561303 // reduce_min
@@ -1269,10 +1316,10 @@ namespace xsimd
12691316 batch<T, A> step2 = _mm_shufflelo_epi16 (acc1, mask2);
12701317 batch<T, A> acc2 = min (acc1, step2);
12711318 if (sizeof (T) == 2 )
1272- return acc2. get ( 0 );
1319+ return first (acc2, A {} );
12731320 batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t >(acc2) >> 8 );
12741321 batch<T, A> acc3 = min (acc2, step3);
1275- return acc3. get ( 0 );
1322+ return first (acc3, A {} );
12761323 }
12771324
12781325 // rsqrt
@@ -1811,53 +1858,6 @@ namespace xsimd
18111858 {
18121859 return _mm_unpacklo_pd (self, other);
18131860 }
1814-
1815- // first
1816- template <class A >
1817- XSIMD_INLINE float first (batch<float , A> const & self, requires_arch<sse2>) noexcept
1818- {
1819- return _mm_cvtss_f32 (self);
1820- }
1821-
1822- template <class A >
1823- XSIMD_INLINE double first (batch<double , A> const & self, requires_arch<sse2>) noexcept
1824- {
1825- return _mm_cvtsd_f64 (self);
1826- }
1827-
1828- template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
1829- XSIMD_INLINE T first (batch<T, A> const & self, requires_arch<sse2>) noexcept
1830- {
1831- XSIMD_IF_CONSTEXPR (sizeof (T) == 1 )
1832- {
1833- return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFF );
1834- }
1835- else XSIMD_IF_CONSTEXPR (sizeof (T) == 2 )
1836- {
1837- return static_cast <T>(_mm_cvtsi128_si32 (self) & 0xFFFF );
1838- }
1839- else XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
1840- {
1841- return static_cast <T>(_mm_cvtsi128_si32 (self));
1842- }
1843- else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
1844- {
1845- #if defined(__x86_64__)
1846- return static_cast <T>(_mm_cvtsi128_si64 (self));
1847- #else
1848- __m128i m;
1849- _mm_storel_epi64 (&m, self);
1850- int64_t i;
1851- std::memcpy (&i, &m, sizeof (i));
1852- return i;
1853- #endif
1854- }
1855- else
1856- {
1857- assert (false && " unsupported arch/op combination" );
1858- return {};
1859- }
1860- }
18611861 }
18621862}
18631863
0 commit comments