Skip to content

Commit cd50008

Browse files
author
Guillaume Piolat
committed
A few bonus intrinsics:
- _MM_TRANSPOSE4_EPI32 (same as _MM_TRANSPOSE4_PS but with epi32) - _mm_movelh_epi32 (same as _mm_movelh_ps but for epi32) - _mm_movehl_epi32 (same as _mm_movehl_ps but for epi32)
1 parent ef93399 commit cd50008

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

source/inteli/xmmintrin.d

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,23 @@ unittest
17511751
assert(R.array == correct);
17521752
}
17531753

1754+
/// Move the upper 2 32-bit integer elements from `b` to the lower 2 elements of result, and
1755+
/// copy the upper 2 elements from `a` to the upper 2 elements of dst.
1756+
__m128i _mm_movehl_epi32 (__m128i a, __m128i b) pure @trusted
1757+
{
1758+
a.ptr[0] = b.array[2];
1759+
a.ptr[1] = b.array[3];
1760+
return a;
1761+
}
1762+
unittest
1763+
{
1764+
__m128i A = _mm_setr_epi32(1, 2, 3, 4);
1765+
__m128i B = _mm_setr_epi32(5, 6, 7, 8);
1766+
__m128i R = _mm_movehl_epi32(A, B);
1767+
int[4] correct = [7, 8, 3, 4];
1768+
assert(R.array == correct);
1769+
}
1770+
17541771
/// Move the lower 2 single-precision (32-bit) floating-point elements from `b` to the upper 2 elements of result, and
17551772
/// copy the lower 2 elements from `a` to the lower 2 elements of result
17561773
__m128 _mm_movelh_ps (__m128 a, __m128 b) pure @trusted
@@ -1776,6 +1793,23 @@ unittest
17761793
assert(R.array == correct);
17771794
}
17781795

1796+
/// Move the lower 2 32-bit integers `b` to the upper 2 elements of result, and
1797+
/// copy the lower 2 elements from `a` to the lower 2 elements of result
1798+
__m128i _mm_movelh_epi32 (__m128i a, __m128i b) pure @trusted // #BONUS
1799+
{
1800+
a.ptr[2] = b.array[0];
1801+
a.ptr[3] = b.array[1];
1802+
return a;
1803+
}
1804+
unittest
1805+
{
1806+
__m128i A = _mm_setr_epi32(1, 2, 3, 4);
1807+
__m128i B = _mm_setr_epi32(5, 6, 7, 8);
1808+
__m128i R = _mm_movelh_epi32(A, B);
1809+
int[4] correct = [1, 2, 5, 6];
1810+
assert(R.array == correct);
1811+
}
1812+
17791813
/// Create mask from the most significant bit of each 8-bit element in `a`.
17801814
int _mm_movemask_pi8 (__m64 a) pure @safe
17811815
{
@@ -2972,6 +3006,37 @@ unittest
29723006
assert(l3.array == r3);
29733007
}
29743008

3009+
/// Transpose the 4x4 matrix formed by the 4 rows of 32-bit integer elements in row0, row1,
3010+
/// row2, and row3, and store the transposed matrix in these vectors (row0 now contains column 0, etc.).
3011+
void _MM_TRANSPOSE4_EPI32 (ref __m128i row0, ref __m128i row1, ref __m128i row2, ref __m128i row3) pure @safe // #BONUS
3012+
{
3013+
__m128i tmp3, tmp2, tmp1, tmp0;
3014+
tmp0 = _mm_unpacklo_epi32(row0, row1);
3015+
tmp2 = _mm_unpacklo_epi32(row2, row3);
3016+
tmp1 = _mm_unpackhi_epi32(row0, row1);
3017+
tmp3 = _mm_unpackhi_epi32(row2, row3);
3018+
row0 = _mm_movelh_epi32(tmp0, tmp2);
3019+
row1 = _mm_movehl_epi32(tmp2, tmp0);
3020+
row2 = _mm_movelh_epi32(tmp1, tmp3);
3021+
row3 = _mm_movehl_epi32(tmp3, tmp1);
3022+
}
3023+
unittest
3024+
{
3025+
__m128i l0 = _mm_setr_epi32(0, 1, 2, 3);
3026+
__m128i l1 = _mm_set_epi32(7, 6, 5, 4);
3027+
__m128i l2 = _mm_setr_epi32(8, 9, 10, 11);
3028+
__m128i l3 = _mm_setr_epi32(12, 13, 14, 15);
3029+
_MM_TRANSPOSE4_EPI32(l0, l1, l2, l3);
3030+
int[4] r0 = [0, 4, 8, 12];
3031+
int[4] r1 = [1, 5, 9, 13];
3032+
int[4] r2 = [2, 6, 10, 14];
3033+
int[4] r3 = [3, 7, 11, 15];
3034+
assert(l0.array == r0);
3035+
assert(l1.array == r1);
3036+
assert(l2.array == r2);
3037+
assert(l3.array == r3);
3038+
}
3039+
29753040
// Note: the only difference between these intrinsics is the signalling
29763041
// behaviour of quiet NaNs. This is incorrect but the case where
29773042
// you would want to differentiate between qNaN and sNaN and then

0 commit comments

Comments
 (0)