@@ -405,6 +405,62 @@ unittest
405405 }
406406}
407407
408+ // / Reverse endianness of 16-bit integers in `a`.
409+ __m128i _mm_bswap_epi16 (__m128i a) pure @safe // #BONUS
410+ {
411+ __m128i order = _mm_setr_epi8(1 , 0 , 3 , 2 , 5 , 4 , 7 , 6 , 9 , 8 , 11 , 10 , 13 , 12 , 15 , 14 );
412+ return _mm_shuffle_epi8 (a, order);
413+ }
414+ unittest
415+ {
416+ __m128i A = _mm_setr_epi16(0x1122 , 0x3344 , 0 , - 1 , 0x1122 , 0x3344 , 0 , - 1 );
417+ short8 R = cast (short8) _mm_bswap_epi16(A);
418+ short [8 ] correct = [0x2211 , 0x4433 , 0 , - 1 , 0x2211 , 0x4433 , 0 , - 1 ];
419+ assert (R.array == correct);
420+ }
421+
422+ // / Reverse endianness of 32-bit integers in `a`.
423+ __m128i _mm_bswap_epi32 (__m128i a) pure @safe // #BONUS
424+ {
425+ __m128i order = _mm_setr_epi8(3 , 2 , 1 , 0 , 7 , 6 , 5 , 4 , 11 , 10 , 9 , 8 , 15 , 14 , 13 , 12 );
426+ return _mm_shuffle_epi8 (a, order);
427+ }
428+ unittest
429+ {
430+ __m128i A = _mm_setr_epi32(0x11223344 , 0x33445566 , 0 , - 1 );
431+ int4 R = cast (int4) _mm_bswap_epi32(A);
432+ int [4 ] correct = [0x44332211 , 0x66554433 , 0 , - 1 ];
433+ assert (R.array == correct);
434+ }
435+
436+ // / Reverse endianness of 64-bit integers in `a`.
437+ __m128i _mm_bswap_epi64 (__m128i a) pure @safe // #BONUS
438+ {
439+ __m128i order = _mm_setr_epi8(7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 );
440+ return _mm_shuffle_epi8 (a, order);
441+ }
442+ unittest
443+ {
444+ __m128i A = _mm_setr_epi64(0x11223344_55667788, - 1 );
445+ long2 R = cast (long2) _mm_bswap_epi64(A);
446+ long [2 ] correct = [0x88776655_44332211, - 1 ];
447+ assert (R.array == correct);
448+ }
449+
450+ // / Reverse endianness of 128-bit register `a`.
451+ __m128i _mm_bswap_si128 (__m128i a) pure @safe // #BONUS
452+ {
453+ __m128i order = _mm_setr_epi8(15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
454+ return _mm_shuffle_epi8 (a, order);
455+ }
456+ unittest
457+ {
458+ __m128i A = _mm_setr_epi64(0x11223344_55667788, - 1 );
459+ long2 R = cast (long2) _mm_bswap_si128(A);
460+ long [2 ] correct = [- 1 , 0x88776655_44332211, ];
461+ assert (R.array == correct);
462+ }
463+
408464// / Horizontally add adjacent pairs of 16-bit integers in `a` and `b`, and pack the signed 16-bit results.
409465__m128i _mm_hadd_epi16 (__m128i a, __m128i b) pure @trusted
410466{
0 commit comments