@@ -346,48 +346,51 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
346346 dstA = srcA + dstA - ((srcA * dstA) / 255);
347347 */
348348
349- RUN_AVX2_BLITTER (RUN_16BIT_SHUFFLE_OUT (
350- src_alpha = _mm256_shuffle_epi8 (shuff_src , shuff_out_alpha );
351-
352- // src_alpha = src_alpha * module_alpha / 255
353- src_alpha = _mm256_mullo_epi16 (src_alpha , modulate_alpha );
354- src_alpha = _mm256_srli_epi16 (
355- _mm256_mulhi_epu16 (src_alpha , _mm256_set1_epi16 ((short )0x8081 )),
356- 7 );
357-
358- dst_alpha = _mm256_shuffle_epi8 (shuff_dst , shuff_out_alpha );
359- // if the destination is opaque, it takes the max of each alpha with 255
360- // otherwise it takes with the max with 0. This is equivalent to
361- // if opaque: alpha = 255
362- dst_alpha = _mm256_max_epi16 (dst_alpha , _mm256_set1_epi16 (dst_alpha_offset ));
363-
364- // figure out alpha
365- temp = _mm256_mullo_epi16 (src_alpha , dst_alpha );
366- temp = _mm256_srli_epi16 (
367- _mm256_mulhi_epu16 (temp , _mm256_set1_epi16 ((short )0x8081 )), 7 );
368- new_dst_alpha = _mm256_sub_epi16 (dst_alpha , temp );
369- new_dst_alpha = _mm256_add_epi16 (src_alpha , new_dst_alpha );
370-
371- // if preexisting dst alpha is 0, src alpha should be set to 255
372- // enforces that dest alpha 0 means "copy source RGB"
373- // happens after real src alpha values used to calculate dst alpha
374- // compares each 16 bit block to zeroes, yielding 0xFFFF or 0x0000--
375- // shifts out bottom 8 bits to get to 0x00FF or 0x0000.
376- dst_alpha = _mm256_cmpeq_epi16 (dst_alpha , _mm256_setzero_si256 ());
377- dst_alpha = _mm256_srli_epi16 (dst_alpha , 8 );
378- src_alpha = _mm256_max_epu8 (dst_alpha , src_alpha );
379-
380- // figure out RGB
381- temp = _mm256_sub_epi16 (shuff_src , shuff_dst );
382- temp = _mm256_mullo_epi16 (temp , src_alpha );
383- temp = _mm256_add_epi16 (temp , shuff_src );
384- shuff_dst = _mm256_slli_epi16 (shuff_dst , 8 );
385- shuff_dst = _mm256_add_epi16 (shuff_dst , temp );
386- shuff_dst = _mm256_srli_epi16 (shuff_dst , 8 );
387-
388- // blend together dstRGB and dstA
389- shuff_dst =
390- _mm256_blendv_epi8 (shuff_dst , new_dst_alpha , combine_rgba_mask );))
349+ RUN_AVX2_BLITTER (
350+ RUN_16BIT_SHUFFLE_OUT (
351+ src_alpha = _mm256_shuffle_epi8 (shuff_src , shuff_out_alpha );
352+
353+ // src_alpha = src_alpha * module_alpha / 255
354+ src_alpha = _mm256_mullo_epi16 (src_alpha , modulate_alpha );
355+ src_alpha = _mm256_srli_epi16 (
356+ _mm256_mulhi_epu16 (src_alpha ,
357+ _mm256_set1_epi16 ((short )0x8081 )),
358+ 7 );
359+
360+ dst_alpha = _mm256_shuffle_epi8 (shuff_dst , shuff_out_alpha );
361+ // if the destination is opaque, it takes the max of each alpha
362+ // with 255 otherwise it takes with the max with 0. This is
363+ // equivalent to if opaque: alpha = 255
364+ dst_alpha = _mm256_max_epi16 (dst_alpha ,
365+ _mm256_set1_epi16 (dst_alpha_offset ));
366+
367+ // figure out alpha
368+ temp = _mm256_mullo_epi16 (src_alpha , dst_alpha );
369+ temp = _mm256_srli_epi16 (
370+ _mm256_mulhi_epu16 (temp , _mm256_set1_epi16 ((short )0x8081 )), 7 );
371+ new_dst_alpha = _mm256_sub_epi16 (dst_alpha , temp );
372+ new_dst_alpha = _mm256_add_epi16 (src_alpha , new_dst_alpha );
373+
374+ // if preexisting dst alpha is 0, src alpha should be set to 255
375+ // enforces that dest alpha 0 means "copy source RGB"
376+ // happens after real src alpha values used to calculate dst alpha
377+ // compares each 16 bit block to zeroes, yielding 0xFFFF or
378+ // 0x0000-- shifts out bottom 8 bits to get to 0x00FF or 0x0000.
379+ dst_alpha = _mm256_cmpeq_epi16 (dst_alpha , _mm256_setzero_si256 ());
380+ dst_alpha = _mm256_srli_epi16 (dst_alpha , 8 );
381+ src_alpha = _mm256_max_epu8 (dst_alpha , src_alpha );
382+
383+ // figure out RGB
384+ temp = _mm256_sub_epi16 (shuff_src , shuff_dst );
385+ temp = _mm256_mullo_epi16 (temp , src_alpha );
386+ temp = _mm256_add_epi16 (temp , shuff_src );
387+ shuff_dst = _mm256_slli_epi16 (shuff_dst , 8 );
388+ shuff_dst = _mm256_add_epi16 (shuff_dst , temp );
389+ shuff_dst = _mm256_srli_epi16 (shuff_dst , 8 );
390+
391+ // blend together dstRGB and dstA
392+ shuff_dst = _mm256_blendv_epi8 (shuff_dst , new_dst_alpha ,
393+ combine_rgba_mask );))
391394}
392395#else
393396void
0 commit comments