@@ -346,51 +346,49 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
346346 dstA = srcA + dstA - ((srcA * dstA) / 255);
347347 */
348348
349- RUN_AVX2_BLITTER (
350- RUN_16BIT_SHUFFLE_OUT (
351- src_alpha = _mm256_shuffle_epi8 (shuff_src , shuff_out_alpha );
352-
353- // src_alpha = src_alpha * module_alpha / 255
354- src_alpha = _mm256_mullo_epi16 (src_alpha , modulate_alpha );
355- src_alpha = _mm256_srli_epi16 (
356- _mm256_mulhi_epu16 (src_alpha ,
357- _mm256_set1_epi16 ((short )0x8081 )),
358- 7 );
359-
360- dst_alpha = _mm256_shuffle_epi8 (shuff_dst , shuff_out_alpha );
361- // if the destination is opaque, it takes the max of each alpha
362- // with 255 otherwise it takes with the max with 0. This is
363- // equivalent to if opaque: alpha = 255
364- dst_alpha = _mm256_max_epi16 (dst_alpha ,
365- _mm256_set1_epi16 (dst_alpha_offset ));
366-
367- // figure out alpha
368- temp = _mm256_mullo_epi16 (src_alpha , dst_alpha );
369- temp = _mm256_srli_epi16 (
370- _mm256_mulhi_epu16 (temp , _mm256_set1_epi16 ((short )0x8081 )), 7 );
371- new_dst_alpha = _mm256_sub_epi16 (dst_alpha , temp );
372- new_dst_alpha = _mm256_add_epi16 (src_alpha , new_dst_alpha );
373-
374- // if preexisting dst alpha is 0, src alpha should be set to 255
375- // enforces that dest alpha 0 means "copy source RGB"
376- // happens after real src alpha values used to calculate dst alpha
377- // compares each 16 bit block to zeroes, yielding 0xFFFF or
378- // 0x0000-- shifts out bottom 8 bits to get to 0x00FF or 0x0000.
379- dst_alpha = _mm256_cmpeq_epi16 (dst_alpha , _mm256_setzero_si256 ());
380- dst_alpha = _mm256_srli_epi16 (dst_alpha , 8 );
381- src_alpha = _mm256_max_epu8 (dst_alpha , src_alpha );
382-
383- // figure out RGB
384- temp = _mm256_sub_epi16 (shuff_src , shuff_dst );
385- temp = _mm256_mullo_epi16 (temp , src_alpha );
386- temp = _mm256_add_epi16 (temp , shuff_src );
387- shuff_dst = _mm256_slli_epi16 (shuff_dst , 8 );
388- shuff_dst = _mm256_add_epi16 (shuff_dst , temp );
389- shuff_dst = _mm256_srli_epi16 (shuff_dst , 8 );
390-
391- // blend together dstRGB and dstA
392- shuff_dst = _mm256_blendv_epi8 (shuff_dst , new_dst_alpha ,
393- combine_rgba_mask );))
349+ RUN_AVX2_BLITTER (RUN_16BIT_SHUFFLE_OUT (
350+ src_alpha = _mm256_shuffle_epi8 (shuff_src , shuff_out_alpha );
351+
352+ // src_alpha = src_alpha * module_alpha / 255
353+ src_alpha = _mm256_mullo_epi16 (src_alpha , modulate_alpha );
354+ src_alpha = _mm256_srli_epi16 (
355+ _mm256_mulhi_epu16 (src_alpha , _mm256_set1_epi16 ((short )0x8081 )),
356+ 7 );
357+
358+ dst_alpha = _mm256_shuffle_epi8 (shuff_dst , shuff_out_alpha );
359+ // if the destination is opaque, it takes the max of each alpha
360+ // with 255 otherwise it takes with the max with 0. This is
361+ // equivalent to if opaque: alpha = 255
362+ dst_alpha =
363+ _mm256_max_epi16 (dst_alpha , _mm256_set1_epi16 (dst_alpha_offset ));
364+
365+ // figure out alpha
366+ temp = _mm256_mullo_epi16 (src_alpha , dst_alpha );
367+ temp = _mm256_srli_epi16 (
368+ _mm256_mulhi_epu16 (temp , _mm256_set1_epi16 ((short )0x8081 )), 7 );
369+ new_dst_alpha = _mm256_sub_epi16 (dst_alpha , temp );
370+ new_dst_alpha = _mm256_add_epi16 (src_alpha , new_dst_alpha );
371+
372+ // if preexisting dst alpha is 0, src alpha should be set to 255
373+ // enforces that dest alpha 0 means "copy source RGB"
374+ // happens after real src alpha values used to calculate dst alpha
375+ // compares each 16 bit block to zeroes, yielding 0xFFFF or
376+ // 0x0000-- shifts out bottom 8 bits to get to 0x00FF or 0x0000.
377+ dst_alpha = _mm256_cmpeq_epi16 (dst_alpha , _mm256_setzero_si256 ());
378+ dst_alpha = _mm256_srli_epi16 (dst_alpha , 8 );
379+ src_alpha = _mm256_max_epu8 (dst_alpha , src_alpha );
380+
381+ // figure out RGB
382+ temp = _mm256_sub_epi16 (shuff_src , shuff_dst );
383+ temp = _mm256_mullo_epi16 (temp , src_alpha );
384+ temp = _mm256_add_epi16 (temp , shuff_src );
385+ shuff_dst = _mm256_slli_epi16 (shuff_dst , 8 );
386+ shuff_dst = _mm256_add_epi16 (shuff_dst , temp );
387+ shuff_dst = _mm256_srli_epi16 (shuff_dst , 8 );
388+
389+ // blend together dstRGB and dstA
390+ shuff_dst =
391+ _mm256_blendv_epi8 (shuff_dst , new_dst_alpha , combine_rgba_mask );))
394392}
395393#else
396394void
0 commit comments