@@ -184,6 +184,12 @@ pg_avx2_at_runtime_but_uncompiled()
184184 8 + _a_off, 0x80, 8 + _a_off, 0x80, 0 + _a_off, 0x80, 0 + _a_off, \
185185 0x80, 0 + _a_off, 0x80, 0 + _a_off);
186186
187+ /* Divides each element in input mm256i by 255
188+ * See: https://stackoverflow.com/a/35286833/13816541 */
189+ #define DO_AVX2_DIV255_U16 (MM256I ) \
190+ _mm256_srli_epi16( \
191+ _mm256_mulhi_epu16(MM256I, _mm256_set1_epi16((short)0x8081)), 7);
192+
187193#if defined(__AVX2__ ) && defined(HAVE_IMMINTRIN_H ) && \
188194 !defined(SDL_DISABLE_IMMINTRIN_H )
189195void
@@ -281,8 +287,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
281287
282288 // figure out alpha
283289 temp = _mm256_mullo_epi16 (src_alpha , dst_alpha );
284- temp = _mm256_srli_epi16 (
285- _mm256_mulhi_epu16 (temp , _mm256_set1_epi16 ((short )0x8081 )), 7 );
290+ temp = DO_AVX2_DIV255_U16 (temp );
286291 new_dst_alpha = _mm256_sub_epi16 (dst_alpha , temp );
287292 new_dst_alpha = _mm256_add_epi16 (src_alpha , new_dst_alpha );
288293
@@ -364,8 +369,7 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
364369
365370 // figure out alpha
366371 temp = _mm256_mullo_epi16 (src_alpha , dst_alpha );
367- temp = _mm256_srli_epi16 (
368- _mm256_mulhi_epu16 (temp , _mm256_set1_epi16 ((short )0x8081 )), 7 );
372+ temp = DO_AVX2_DIV255_U16 (temp );
369373 new_dst_alpha = _mm256_sub_epi16 (dst_alpha , temp );
370374 new_dst_alpha = _mm256_add_epi16 (src_alpha , new_dst_alpha );
371375
0 commit comments