Skip to content

Commit 532340f

Browse files
committed
fix instructions missing on 32-bit
1 parent 82e677c commit 532340f

File tree

3 files changed

+26
-5
lines changed

3 files changed

+26
-5
lines changed

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,9 +590,15 @@ namespace xsimd
590590
// GCC/Clang/MSVC will turn it into the correct load.
591591
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
592592
{
593+
#if defined(__x86_64__)
593594
uint64_t tmp;
594595
memcpy(&tmp, mem, sizeof(tmp));
595-
return _mm256_sub_epi32(_mm256_set1_epi8(0), _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(tmp)));
596+
auto val = _mm_cvtsi64_si128(tmp);
597+
#else
598+
__m128i val;
599+
memcpy(&val, mem, sizeof(uint64_t));
600+
#endif
601+
return _mm256_sub_epi32(_mm256_set1_epi8(0), _mm256_cvtepu8_epi32(val));
596602
}
597603
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
598604
{
@@ -1005,8 +1011,13 @@ namespace xsimd
10051011
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
10061012
{
10071013
auto pack_16 = _mm_packs_epi32(b_lo, b_hi);
1008-
auto val = _mm_cvtsi128_si64(_mm_sub_epi8(_mm_set1_epi8(0), _mm_packs_epi16(pack_16, pack_16)));
1009-
memcpy(mem, &val, sizeof(val));
1014+
auto val = _mm_sub_epi8(_mm_set1_epi8(0), _mm_packs_epi16(pack_16, pack_16));
1015+
#if defined(__x86_64__)
1016+
auto val_lo = _mm_cvtsi128_si64(val);
1017+
memcpy(mem, &val_lo, sizeof(val_lo));
1018+
#else
1019+
memcpy(mem, &val, sizeof(uint64_t));
1020+
#endif
10101021
}
10111022
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
10121023
{

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1714,8 +1714,12 @@ namespace xsimd
17141714
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
17151715
{
17161716
auto val = _mm_sub_epi8(_mm_set1_epi8(0), _mm_packs_epi16(b, b));
1717-
// store only lower 64 bits
1717+
#if defined(__x86_64__)
1718+
auto val_lo = _mm_cvtsi128_si64(val);
1719+
memcpy(mem, &val_lo, sizeof(val_lo));
1720+
#else
17181721
memcpy(mem, &val, sizeof(uint64_t));
1722+
#endif
17191723
}
17201724
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
17211725
{

include/xsimd/arch/xsimd_sse4_1.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,15 @@ namespace xsimd
136136
// GCC/Clang/MSVC will turn it into the correct load.
137137
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
138138
{
139+
#if defined(__x86_64__)
139140
uint64_t tmp;
140141
memcpy(&tmp, mem, sizeof(tmp));
141-
return _mm_sub_epi16(_mm_set1_epi8(0), _mm_cvtepu8_epi16(_mm_cvtsi64_si128(tmp)));
142+
auto val = _mm_cvtsi64_si128(tmp);
143+
#else
144+
__m128i val;
145+
memcpy(&val, mem, sizeof(uint64_t));
146+
#endif
147+
return _mm_sub_epi16(_mm_set1_epi8(0), _mm_cvtepu8_epi16(val));
142148
}
143149
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
144150
{

0 commit comments

Comments
 (0)