@@ -86,31 +86,22 @@ _pg_has_avx2()
8686
8787/* Setup for RUN_16BIT_SHUFFLE_OUT */
8888#define SETUP_SHUFFLE \
89- __m256i shuff_out_A = \
90- _mm256_set_epi8(0x80, 23, 0x80, 22, 0x80, 21, 0x80, 20, 0x80, 19, \
91- 0x80, 18, 0x80, 17, 0x80, 16, 0x80, 7, 0x80, 6, 0x80, \
92- 5, 0x80, 4, 0x80, 3, 0x80, 2, 0x80, 1, 0x80, 0); \
93- \
94- __m256i shuff_out_B = _mm256_set_epi8( \
95- 0x80, 31, 0x80, 30, 0x80, 29, 0x80, 28, 0x80, 27, 0x80, 26, 0x80, 25, \
96- 0x80, 24, 0x80, 15, 0x80, 14, 0x80, 13, 0x80, 12, 0x80, 11, 0x80, 10, \
97- 0x80, 9, 0x80, 8); \
98- \
99- __m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB; \
100- mm256_colorA = _mm256_shuffle_epi8(mm256_color, shuff_out_A); \
101- mm256_colorB = _mm256_shuffle_epi8(mm256_color, shuff_out_B);
89+ __m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB, mm256_zero; \
90+ mm256_zero = _mm256_setzero_si256(); \
91+ mm256_colorA = _mm256_unpacklo_epi8(mm256_color, mm256_zero); \
92+ mm256_colorB = _mm256_unpackhi_epi8(mm256_color, mm256_zero);
10293
10394#define RUN_16BIT_SHUFFLE_OUT (FILL_CODE ) \
10495 /* ==== shuffle pixels out into two registers each, src */ \
10596 /* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
106- shuff_dst = _mm256_shuffle_epi8 (mm256_dst, shuff_out_A ); \
97+ shuff_dst = _mm256_unpacklo_epi8 (mm256_dst, mm256_zero ); \
10798 mm256_color = mm256_colorA; \
10899 \
109100 {FILL_CODE} \
110101 \
111102 _shuff16_temp = shuff_dst; \
112103 \
113- shuff_dst = _mm256_shuffle_epi8 (mm256_dst, shuff_out_B ); \
104+ shuff_dst = _mm256_unpackhi_epi8 (mm256_dst, mm256_zero ); \
114105 mm256_color = mm256_colorB; \
115106 \
116107 {FILL_CODE} \
0 commit comments