Skip to content

Commit f5b674f

Browse files
authored
Merge pull request #2210 from Starbuck5/Use-builtin-broadcasting-intrinsics-in-blitters
Use builtin broadcasting intrinsics in blitters
2 parents e5a27e5 + 6ce4bf7 commit f5b674f

File tree

2 files changed

+17
-32
lines changed

2 files changed

+17
-32
lines changed

src_c/simd_blitters_avx2.c

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -453,12 +453,8 @@ blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
453453
0x80, 12, 0x80, 11, 0x80, 10, 0x80, 9, 0x80, 8);
454454

455455
mm_zero = _mm_setzero_si128();
456-
mm_two_five_fives = _mm_set_epi64x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF);
457-
458-
mm256_two_five_fives = _mm256_set_epi8(
459-
0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF,
460-
0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF,
461-
0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF);
456+
mm_two_five_fives = _mm_set1_epi64x(0x00FF00FF00FF00FF);
457+
mm256_two_five_fives = _mm256_set1_epi16(0x00FF);
462458

463459
while (height--) {
464460
if (pre_8_width > 0) {
@@ -578,16 +574,12 @@ blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
578574
0x80, 12, 0x80, 11, 0x80, 10, 0x80, 9, 0x80, 8);
579575

580576
mm_zero = _mm_setzero_si128();
581-
mm_two_five_fives = _mm_set_epi64x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF);
582-
mm_alpha_mask = _mm_cvtsi32_si128(amask);
583577

584-
mm256_two_five_fives = _mm256_set_epi8(
585-
0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF,
586-
0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF,
587-
0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF);
578+
mm_two_five_fives = _mm_set1_epi64x(0x00FF00FF00FF00FF);
579+
mm256_two_five_fives = _mm256_set1_epi16(0x00FF);
588580

589-
mm256_alpha_mask = _mm256_set_epi32(amask, amask, amask, amask, amask,
590-
amask, amask, amask);
581+
mm_alpha_mask = _mm_cvtsi32_si128(amask);
582+
mm256_alpha_mask = _mm256_set1_epi32(amask);
591583

592584
while (height--) {
593585
if (pre_8_width > 0) {
@@ -772,8 +764,7 @@ blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
772764
__m256i mm256_src, mm256_dst, mm256_alpha_mask;
773765

774766
mm_alpha_mask = _mm_cvtsi32_si128(amask);
775-
mm256_alpha_mask = _mm256_set_epi32(amask, amask, amask, amask, amask,
776-
amask, amask, amask);
767+
mm256_alpha_mask = _mm256_set1_epi32(amask);
777768

778769
while (height--) {
779770
if (pre_8_width > 0) {
@@ -925,8 +916,7 @@ blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
925916
__m256i mm256_src, mm256_dst, mm256_alpha_mask;
926917

927918
mm_alpha_mask = _mm_cvtsi32_si128(amask);
928-
mm256_alpha_mask = _mm256_set_epi32(amask, amask, amask, amask, amask,
929-
amask, amask, amask);
919+
mm256_alpha_mask = _mm256_set1_epi32(amask);
930920

931921
while (height--) {
932922
if (pre_8_width > 0) {
@@ -1078,8 +1068,7 @@ blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
10781068
__m256i mm256_src, mm256_dst, mm256_alpha_mask;
10791069

10801070
mm_alpha_mask = _mm_cvtsi32_si128(amask);
1081-
mm256_alpha_mask = _mm256_set_epi32(amask, amask, amask, amask, amask,
1082-
amask, amask, amask);
1071+
mm256_alpha_mask = _mm256_set1_epi32(amask);
10831072

10841073
while (height--) {
10851074
if (pre_8_width > 0) {
@@ -1231,8 +1220,7 @@ blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
12311220
__m256i mm256_src, mm256_dst, mm256_alpha_mask;
12321221

12331222
mm_alpha_mask = _mm_cvtsi32_si128(amask);
1234-
mm256_alpha_mask = _mm256_set_epi32(amask, amask, amask, amask, amask,
1235-
amask, amask, amask);
1223+
mm256_alpha_mask = _mm256_set1_epi32(amask);
12361224

12371225
while (height--) {
12381226
if (pre_8_width > 0) {
@@ -1351,10 +1339,7 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
13511339
12 + a_index, 0x80, 12 + a_index, 0x80, 12 + a_index, 0x80,
13521340
8 + a_index, 0x80, 8 + a_index, 0x80, 8 + a_index, 0x80, 8 + a_index);
13531341

1354-
mm256_ones = _mm256_set_epi8(
1355-
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
1356-
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
1357-
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01);
1342+
mm256_ones = _mm256_set1_epi16(0x0001);
13581343

13591344
while (height--) {
13601345
if (pre_8_width > 0) {

src_c/simd_blitters_sse2.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ blit_blend_rgba_mul_sse2(SDL_BlitInfo *info)
663663
__m128i mm_src, mm_dst, mm_zero, mm_two_five_fives;
664664

665665
mm_zero = _mm_setzero_si128();
666-
mm_two_five_fives = _mm_set_epi64x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF);
666+
mm_two_five_fives = _mm_set1_epi64x(0x00FF00FF00FF00FF);
667667

668668
while (height--) {
669669
if (pre_2_width > 0) {
@@ -754,7 +754,7 @@ blit_blend_rgb_mul_sse2(SDL_BlitInfo *info)
754754
__m128i mm_src, mm_dst, mm_zero, mm_two_five_fives, mm_alpha_mask;
755755

756756
mm_zero = _mm_setzero_si128();
757-
mm_two_five_fives = _mm_set_epi64x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF);
757+
mm_two_five_fives = _mm_set1_epi64x(0x00FF00FF00FF00FF);
758758
mm_alpha_mask = _mm_set_epi64x(0x0000000000000000, amask64);
759759

760760
while (height--) {
@@ -910,7 +910,7 @@ blit_blend_rgb_add_sse2(SDL_BlitInfo *info)
910910

911911
__m128i mm_src, mm_dst, mm_alpha_mask;
912912

913-
mm_alpha_mask = _mm_set_epi32(amask, amask, amask, amask);
913+
mm_alpha_mask = _mm_set1_epi32(amask);
914914

915915
while (height--) {
916916
if (pre_4_width > 0) {
@@ -1038,7 +1038,7 @@ blit_blend_rgb_sub_sse2(SDL_BlitInfo *info)
10381038

10391039
__m128i mm_src, mm_dst, mm_alpha_mask;
10401040

1041-
mm_alpha_mask = _mm_set_epi32(amask, amask, amask, amask);
1041+
mm_alpha_mask = _mm_set1_epi32(amask);
10421042

10431043
while (height--) {
10441044
if (pre_4_width > 0) {
@@ -1166,7 +1166,7 @@ blit_blend_rgb_max_sse2(SDL_BlitInfo *info)
11661166

11671167
__m128i mm_src, mm_dst, mm_alpha_mask;
11681168

1169-
mm_alpha_mask = _mm_set_epi32(amask, amask, amask, amask);
1169+
mm_alpha_mask = _mm_set1_epi32(amask);
11701170

11711171
while (height--) {
11721172
if (pre_4_width > 0) {
@@ -1294,7 +1294,7 @@ blit_blend_rgb_min_sse2(SDL_BlitInfo *info)
12941294

12951295
__m128i mm_src, mm_dst, mm_alpha_mask;
12961296

1297-
mm_alpha_mask = _mm_set_epi32(amask, amask, amask, amask);
1297+
mm_alpha_mask = _mm_set1_epi32(amask);
12981298

12991299
while (height--) {
13001300
if (pre_4_width > 0) {

0 commit comments

Comments
 (0)