Skip to content

Commit 6135200

Browse files
committed
format harder better faster stronger
1 parent 6e7ad5c commit 6135200

File tree

1 file changed

+43
-45
lines changed

1 file changed

+43
-45
lines changed

src_c/simd_blitters_avx2.c

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -346,51 +346,49 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
346346
dstA = srcA + dstA - ((srcA * dstA) / 255);
347347
*/
348348

349-
RUN_AVX2_BLITTER(
350-
RUN_16BIT_SHUFFLE_OUT(
351-
src_alpha = _mm256_shuffle_epi8(shuff_src, shuff_out_alpha);
352-
353-
// src_alpha = src_alpha * module_alpha / 255
354-
src_alpha = _mm256_mullo_epi16(src_alpha, modulate_alpha);
355-
src_alpha = _mm256_srli_epi16(
356-
_mm256_mulhi_epu16(src_alpha,
357-
_mm256_set1_epi16((short)0x8081)),
358-
7);
359-
360-
dst_alpha = _mm256_shuffle_epi8(shuff_dst, shuff_out_alpha);
361-
// if the destination is opaque, it takes the max of each alpha
362-
// with 255 otherwise it takes with the max with 0. This is
363-
// equivalent to if opaque: alpha = 255
364-
dst_alpha = _mm256_max_epi16(dst_alpha,
365-
_mm256_set1_epi16(dst_alpha_offset));
366-
367-
// figure out alpha
368-
temp = _mm256_mullo_epi16(src_alpha, dst_alpha);
369-
temp = _mm256_srli_epi16(
370-
_mm256_mulhi_epu16(temp, _mm256_set1_epi16((short)0x8081)), 7);
371-
new_dst_alpha = _mm256_sub_epi16(dst_alpha, temp);
372-
new_dst_alpha = _mm256_add_epi16(src_alpha, new_dst_alpha);
373-
374-
// if preexisting dst alpha is 0, src alpha should be set to 255
375-
// enforces that dest alpha 0 means "copy source RGB"
376-
// happens after real src alpha values used to calculate dst alpha
377-
// compares each 16 bit block to zeroes, yielding 0xFFFF or
378-
// 0x0000-- shifts out bottom 8 bits to get to 0x00FF or 0x0000.
379-
dst_alpha = _mm256_cmpeq_epi16(dst_alpha, _mm256_setzero_si256());
380-
dst_alpha = _mm256_srli_epi16(dst_alpha, 8);
381-
src_alpha = _mm256_max_epu8(dst_alpha, src_alpha);
382-
383-
// figure out RGB
384-
temp = _mm256_sub_epi16(shuff_src, shuff_dst);
385-
temp = _mm256_mullo_epi16(temp, src_alpha);
386-
temp = _mm256_add_epi16(temp, shuff_src);
387-
shuff_dst = _mm256_slli_epi16(shuff_dst, 8);
388-
shuff_dst = _mm256_add_epi16(shuff_dst, temp);
389-
shuff_dst = _mm256_srli_epi16(shuff_dst, 8);
390-
391-
// blend together dstRGB and dstA
392-
shuff_dst = _mm256_blendv_epi8(shuff_dst, new_dst_alpha,
393-
combine_rgba_mask);))
349+
RUN_AVX2_BLITTER(RUN_16BIT_SHUFFLE_OUT(
350+
src_alpha = _mm256_shuffle_epi8(shuff_src, shuff_out_alpha);
351+
352+
// src_alpha = src_alpha * module_alpha / 255
353+
src_alpha = _mm256_mullo_epi16(src_alpha, modulate_alpha);
354+
src_alpha = _mm256_srli_epi16(
355+
_mm256_mulhi_epu16(src_alpha, _mm256_set1_epi16((short)0x8081)),
356+
7);
357+
358+
dst_alpha = _mm256_shuffle_epi8(shuff_dst, shuff_out_alpha);
359+
// if the destination is opaque, it takes the max of each alpha
360+
// with 255 otherwise it takes with the max with 0. This is
361+
// equivalent to if opaque: alpha = 255
362+
dst_alpha =
363+
_mm256_max_epi16(dst_alpha, _mm256_set1_epi16(dst_alpha_offset));
364+
365+
// figure out alpha
366+
temp = _mm256_mullo_epi16(src_alpha, dst_alpha);
367+
temp = _mm256_srli_epi16(
368+
_mm256_mulhi_epu16(temp, _mm256_set1_epi16((short)0x8081)), 7);
369+
new_dst_alpha = _mm256_sub_epi16(dst_alpha, temp);
370+
new_dst_alpha = _mm256_add_epi16(src_alpha, new_dst_alpha);
371+
372+
// if preexisting dst alpha is 0, src alpha should be set to 255
373+
// enforces that dest alpha 0 means "copy source RGB"
374+
// happens after real src alpha values used to calculate dst alpha
375+
// compares each 16 bit block to zeroes, yielding 0xFFFF or
376+
// 0x0000-- shifts out bottom 8 bits to get to 0x00FF or 0x0000.
377+
dst_alpha = _mm256_cmpeq_epi16(dst_alpha, _mm256_setzero_si256());
378+
dst_alpha = _mm256_srli_epi16(dst_alpha, 8);
379+
src_alpha = _mm256_max_epu8(dst_alpha, src_alpha);
380+
381+
// figure out RGB
382+
temp = _mm256_sub_epi16(shuff_src, shuff_dst);
383+
temp = _mm256_mullo_epi16(temp, src_alpha);
384+
temp = _mm256_add_epi16(temp, shuff_src);
385+
shuff_dst = _mm256_slli_epi16(shuff_dst, 8);
386+
shuff_dst = _mm256_add_epi16(shuff_dst, temp);
387+
shuff_dst = _mm256_srli_epi16(shuff_dst, 8);
388+
389+
// blend together dstRGB and dstA
390+
shuff_dst =
391+
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
394392
}
395393
#else
396394
void

0 commit comments

Comments
 (0)