diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 9f1716c0db810..d135a83839f6e 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -412,6 +412,66 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) #endif /* __MMX__ */ +#if defined(__loongarch_sx) + +static void +BlitRGBtoRGBPixelAlphaLSX(SDL_BlitInfo * info) +{ + int width = info->dst_w; + int height = info->dst_h; + Uint32 *srcp = (Uint32 *) info->src; + int srcskip = info->src_skip >> 2; + Uint32 *dstp = (Uint32 *) info->dst; + int dstskip = info->dst_skip >> 2; + SDL_PixelFormat *sf = info->src_fmt; + Uint32 amask = sf->Amask; + Uint32 ashift = sf->Ashift; + Uint64 multmask, multmask2; + + __m128i src1, src2, src3, dst1, alpha, alpha2; + multmask = 0x00FF; + multmask <<= (ashift * 2); + multmask2 = 0x00FF00FF00FF00FFULL; + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP4({ + Uint32 alpha1 = *srcp & amask; + if (alpha1 == 0) { + /* do nothing */ + } else if (alpha1 == amask) { + *dstp = *srcp; + } else { + src1 = __lsx_vreplgr2vr_w(*srcp); + src1 = __lsx_vinsgr2vr_w(src1, *dstp, 1); + src2 = __lsx_vsllwil_hu_bu(src1, 0); + + alpha = __lsx_vreplgr2vr_w(alpha1); + alpha = __lsx_vsrl_d(alpha, __lsx_vreplgr2vr_d(ashift)); + alpha = __lsx_vilvl_h(alpha, alpha); + alpha2 = __lsx_vilvl_w(alpha, alpha); + alpha = __lsx_vor_v(alpha2, __lsx_vreplgr2vr_d(multmask)); + alpha2 = __lsx_vxor_v(alpha2, __lsx_vreplgr2vr_d(multmask2)); + + src3 = __lsx_vilvl_d(alpha2, alpha); + src1 = __lsx_vmul_h(src2, src3); + src1 = __lsx_vsrli_h(src1, 8); + src2 = __lsx_vilvh_d(src1, src1); + src1 = __lsx_vadd_h(src1, src2); + dst1 = __lsx_vssrlni_bu_h(src1, src1, 0); + __lsx_vstelm_w(dst1, dstp, 0, 0); + } + ++srcp; + ++dstp; + }, width); + /* *INDENT-ON* */ + srcp += srcskip; + dstp += dstskip; + } +} + +#endif /* __loongarch_sx */ + #ifdef SDL_ARM_SIMD_BLITTERS void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride); @@ -1456,7 +1516,7 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface) case 4: if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { -#if defined(__MMX__) || defined(__3dNOW__) +#if defined(__MMX__) || defined(__3dNOW__) || defined(__loongarch_sx) if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { #ifdef __3dNOW__ if (SDL_Has3DNow()) { @@ -1467,9 +1527,14 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface) if (SDL_HasMMX()) { return BlitRGBtoRGBPixelAlphaMMX; } +#endif +#ifdef __loongarch_sx + if (SDL_HasLSX()) { + return BlitRGBtoRGBPixelAlphaLSX; + } #endif } -#endif /* __MMX__ || __3dNOW__ */ +#endif /* __MMX__ || __3dNOW__ || __loongarch_sx*/ if (sf->Amask == 0xff000000) { #ifdef SDL_ARM_NEON_BLITTERS if (SDL_HasNEON()) { diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c index 8d87d9e0accac..8d34f2ed5b2ff 100644 --- a/src/video/SDL_fillrect.c +++ b/src/video/SDL_fillrect.c @@ -135,6 +135,61 @@ DEFINE_SSE_FILLRECT(4, Uint32) /* *INDENT-ON* */ /* clang-format on */ #endif /* __SSE__ */ +#if defined(__loongarch_sx) + +#define LSX_BEGIN __m128i c128 = __lsx_vreplgr2vr_w(color); + +#define LSX_WORK \ + for (i = n / 64; i--;) { \ + __lsx_vst(c128, p, 0); \ + __lsx_vst(c128, p, 16); \ + __lsx_vst(c128, p, 32); \ + __lsx_vst(c128, p, 48); \ + p += 64; \ + } + +#define DEFINE_LSX_FILLRECT(bpp, type) \ +static void \ +SDL_FillRect##bpp##LSX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ +{ \ + int i, n; \ + Uint8 *p = NULL; \ + \ + LSX_BEGIN; \ + \ + while (h--) { \ + n = w * bpp; \ + p = pixels; \ + \ + if (n > 63) { \ + int adjust = 16 - ((uintptr_t)p & 15); \ + if (adjust < 16) { \ + n -= adjust; \ + adjust /= bpp; \ + while (adjust--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + LSX_WORK; \ + } \ + if (n & 63) { \ + int remainder = (n & 63); \ + remainder /= bpp; \ + while (remainder--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + pixels += pitch; \ + } \ + \ +} + +DEFINE_LSX_FILLRECT(4, Uint32) + +#endif + static void SDL_FillRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h) { int n; @@ -423,6 +478,13 @@ int SDL_FillRects(SDL_Surface *dst, const SDL_Rect *rects, int count, break; } #endif + +#ifdef __loongarch_sx + if (SDL_HasLSX()) { + fill_function = SDL_FillRect4LSX; + break; + } +#endif fill_function = SDL_FillRect4; break; }