@@ -109,10 +109,6 @@ blit_blend_rgba_max(SDL_BlitInfo *info);
109109
110110static void
111111blit_blend_premultiplied (SDL_BlitInfo * info );
112- #ifdef __MMX__
113- static void
114- blit_blend_premultiplied_mmx (SDL_BlitInfo * info );
115- #endif /* __MMX__ */
116112
117113static int
118114SoftBlitPyGame (SDL_Surface * src , SDL_Rect * srcrect , SDL_Surface * dst ,
@@ -567,27 +563,33 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst,
567563 break ;
568564 }
569565 case PYGAME_BLEND_PREMULTIPLIED : {
566+ #if !defined(__EMSCRIPTEN__ )
567+ #if SDL_BYTEORDER == SDL_LIL_ENDIAN
570568 if (src -> format -> BytesPerPixel == 4 &&
571569 dst -> format -> BytesPerPixel == 4 &&
572570 src -> format -> Rmask == dst -> format -> Rmask &&
573571 src -> format -> Gmask == dst -> format -> Gmask &&
574572 src -> format -> Bmask == dst -> format -> Bmask &&
575- info .src_blend != SDL_BLENDMODE_NONE ) {
576- #if defined(__MMX__ ) || defined(__SSE2__ ) || defined(PG_ENABLE_ARM_NEON )
573+ info .src_blend != SDL_BLENDMODE_NONE &&
574+ pg_has_avx2 () && (src != dst )) {
575+ blit_blend_premultiplied_avx2 (& info );
576+ break ;
577+ }
577578#if PG_ENABLE_SSE_NEON
578- if (pg_HasSSE_NEON ()) {
579- blit_blend_premultiplied_sse2 (& info );
580- break ;
581- }
582- #endif /* PG_ENABLE_SSE_NEON */
583- #ifdef __MMX__
584- if (SDL_HasMMX () == SDL_TRUE ) {
585- blit_blend_premultiplied_mmx (& info );
586- break ;
587- }
588- #endif /*__MMX__*/
589- #endif /*__MMX__ || __SSE2__ || PG_ENABLE_ARM_NEON*/
579+ if (src -> format -> BytesPerPixel == 4 &&
580+ dst -> format -> BytesPerPixel == 4 &&
581+ src -> format -> Rmask == dst -> format -> Rmask &&
582+ src -> format -> Gmask == dst -> format -> Gmask &&
583+ src -> format -> Bmask == dst -> format -> Bmask &&
584+ src -> format -> Amask == 0xFF000000 &&
585+ info .src_blend != SDL_BLENDMODE_NONE &&
586+ pg_HasSSE_NEON () && (src != dst )) {
587+ blit_blend_premultiplied_sse2 (& info );
588+ break ;
590589 }
590+ #endif /* PG_ENABLE_SSE_NEON */
591+ #endif /* SDL_BYTEORDER == SDL_LIL_ENDIAN */
592+ #endif /* __EMSCRIPTEN__ */
591593
592594 blit_blend_premultiplied (& info );
593595 break ;
@@ -1262,83 +1264,6 @@ blit_blend_rgba_max(SDL_BlitInfo *info)
12621264 }
12631265}
12641266
1265- #ifdef __MMX__
1266- /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1267- static void
1268- blit_blend_premultiplied_mmx (SDL_BlitInfo * info )
1269- {
1270- int n ;
1271- int width = info -> width ;
1272- int height = info -> height ;
1273- Uint32 * srcp = (Uint32 * )info -> s_pixels ;
1274- int srcskip = info -> s_skip >> 2 ;
1275- Uint32 * dstp = (Uint32 * )info -> d_pixels ;
1276- int dstskip = info -> d_skip >> 2 ;
1277- SDL_PixelFormat * srcfmt = info -> src ;
1278- Uint32 amask = srcfmt -> Amask ;
1279- Uint32 ashift = srcfmt -> Ashift ;
1280- Uint64 multmask2 ;
1281-
1282- __m64 src1 , dst1 , mm_alpha , mm_zero , mm_alpha2 ;
1283-
1284- mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */
1285- multmask2 = 0x00FF00FF00FF00FFULL ;
1286-
1287- while (height -- ) {
1288- /* *INDENT-OFF* */
1289- LOOP_UNROLLED4 (
1290- {
1291- Uint32 alpha = * srcp & amask ;
1292- if (alpha == 0 ) {
1293- /* do nothing */
1294- }
1295- else if (alpha == amask ) {
1296- * dstp = * srcp ;
1297- }
1298- else {
1299- src1 = _mm_cvtsi32_si64 (
1300- * srcp ); /* src(ARGB) -> src1 (0000ARGB) */
1301- src1 =
1302- _mm_unpacklo_pi8 (src1 , mm_zero ); /* 0A0R0G0B -> src1 */
1303-
1304- dst1 = _mm_cvtsi32_si64 (
1305- * dstp ); /* dst(ARGB) -> dst1 (0000ARGB) */
1306- dst1 =
1307- _mm_unpacklo_pi8 (dst1 , mm_zero ); /* 0A0R0G0B -> dst1 */
1308-
1309- mm_alpha = _mm_cvtsi32_si64 (
1310- alpha ); /* alpha -> mm_alpha (0000000A) */
1311- mm_alpha = _mm_srli_si64 (
1312- mm_alpha ,
1313- ashift ); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
1314- mm_alpha = _mm_unpacklo_pi16 (
1315- mm_alpha , mm_alpha ); /* 00000A0A -> mm_alpha */
1316- mm_alpha2 = _mm_unpacklo_pi32 (
1317- mm_alpha , mm_alpha ); /* 0A0A0A0A -> mm_alpha2 */
1318- mm_alpha2 = _mm_xor_si64 (
1319- mm_alpha2 ,
1320- * (__m64 * )& multmask2 ); /* 255 - mm_alpha -> mm_alpha */
1321-
1322- /* pre-multiplied alpha blend */
1323- dst1 = _mm_mullo_pi16 (dst1 , mm_alpha2 );
1324- dst1 = _mm_srli_pi16 (dst1 , 8 );
1325- dst1 = _mm_add_pi16 (src1 , dst1 );
1326- dst1 = _mm_packs_pu16 (dst1 , mm_zero );
1327-
1328- * dstp = _mm_cvtsi64_si32 (dst1 ); /* dst1 -> pixel */
1329- }
1330- ++ srcp ;
1331- ++ dstp ;
1332- },
1333- n , width );
1334- /* *INDENT-ON* */
1335- srcp += srcskip ;
1336- dstp += dstskip ;
1337- }
1338- _mm_empty ();
1339- }
1340- #endif /*__MMX__*/
1341-
13421267static void
13431268blit_blend_premultiplied (SDL_BlitInfo * info )
13441269{
0 commit comments