Skip to content

Commit d54c9e3

Browse files
Use xsimd::first in reduction implementation
1 parent 0576683 commit d54c9e3

File tree

2 files changed

+53
-52
lines changed

2 files changed

+53
-52
lines changed

include/xsimd/arch/common/xsimd_common_math.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2117,7 +2117,7 @@ namespace xsimd
21172117
template <class Op, class A, class T>
21182118
XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>) noexcept
21192119
{
2120-
return self.get(0);
2120+
return first(self);
21212121
}
21222122

21232123
template <class Op, class A, class T, unsigned Lvl>

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 52 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,54 @@ namespace xsimd
673673
return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other)));
674674
}
675675

676+
// first
677+
template <class A>
678+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
679+
{
680+
return _mm_cvtss_f32(self);
681+
}
682+
683+
template <class A>
684+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
685+
{
686+
return _mm_cvtsd_f64(self);
687+
}
688+
689+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
690+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
691+
{
692+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
693+
{
694+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
695+
}
696+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
697+
{
698+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
699+
}
700+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
701+
{
702+
return static_cast<T>(_mm_cvtsi128_si32(self));
703+
}
704+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
705+
{
706+
#if defined(__x86_64__)
707+
return static_cast<T>(_mm_cvtsi128_si64(self));
708+
#else
709+
__m128i m;
710+
_mm_storel_epi64(&m, self);
711+
int64_t i;
712+
std::memcpy(&i, &m, sizeof(i));
713+
return i;
714+
#endif
715+
}
716+
else
717+
{
718+
assert(false && "unsupported arch/op combination");
719+
return {};
720+
}
721+
}
722+
723+
676724
// from_mask
677725
template <class A>
678726
XSIMD_INLINE batch_bool<float, A> from_mask(batch_bool<float, A> const&, uint64_t mask, requires_arch<sse2>) noexcept
@@ -1269,10 +1317,10 @@ namespace xsimd
12691317
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
12701318
batch<T, A> acc2 = max(acc1, step2);
12711319
if (sizeof(T) == 2)
1272-
return acc2.get(0);
1320+
return first(acc2, A{});
12731321
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
12741322
batch<T, A> acc3 = max(acc2, step3);
1275-
return acc3.get(0);
1323+
return first(acc3, A{});
12761324
}
12771325

12781326
// reduce_min
@@ -1291,10 +1339,10 @@ namespace xsimd
12911339
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
12921340
batch<T, A> acc2 = min(acc1, step2);
12931341
if (sizeof(T) == 2)
1294-
return acc2.get(0);
1342+
return first(acc2, A{});
12951343
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
12961344
batch<T, A> acc3 = min(acc2, step3);
1297-
return acc3.get(0);
1345+
return first(acc3, A{});
12981346
}
12991347

13001348
// rsqrt
@@ -1783,53 +1831,6 @@ namespace xsimd
17831831
return _mm_unpacklo_pd(self, other);
17841832
}
17851833

1786-
// first
1787-
template <class A>
1788-
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
1789-
{
1790-
return _mm_cvtss_f32(self);
1791-
}
1792-
1793-
template <class A>
1794-
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
1795-
{
1796-
return _mm_cvtsd_f64(self);
1797-
}
1798-
1799-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1800-
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
1801-
{
1802-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1803-
{
1804-
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
1805-
}
1806-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1807-
{
1808-
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
1809-
}
1810-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1811-
{
1812-
return static_cast<T>(_mm_cvtsi128_si32(self));
1813-
}
1814-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1815-
{
1816-
#if defined(__x86_64__)
1817-
return static_cast<T>(_mm_cvtsi128_si64(self));
1818-
#else
1819-
__m128i m;
1820-
_mm_storel_epi64(&m, self);
1821-
int64_t i;
1822-
std::memcpy(&i, &m, sizeof(i));
1823-
return i;
1824-
#endif
1825-
}
1826-
else
1827-
{
1828-
assert(false && "unsupported arch/op combination");
1829-
return {};
1830-
}
1831-
}
1832-
18331834
}
18341835
}
18351836

0 commit comments

Comments
 (0)