Skip to content

Commit 569b0ae

Browse files
committed
[loong64] Fix the remaining implicit vector casts
CL 909436 fixed a few implicit vector casts, but they are not tested with a proper Skia build (in fact it was mainly for fixing those errors that happen to show up in Debian packaging e.g. wpewebkit), so many of the implicit casts remain. The changes here are tested with GCC 14, and confirmed to not affect Clang builds. However, due to an issue with Clang headers [1], -fno-lax-vector-conversions cannot be enabled for LoongArch Clang builds yet, at least for Clang < 19.1.4 [2]. [1]: llvm/llvm-project#110834 [2]: llvm/llvm-project#114958 Change-Id: I3b4e9479cb6f9628b4cf796a0ac25098bc1836a2 [Kaiyang Wu: backport to webkit2gtk] Link: https://skia-review.googlesource.com/c/skia/+/908137 Signed-off-by: Kaiyang Wu <[email protected]>
1 parent 554ea2c commit 569b0ae

File tree

2 files changed

+103
-105
lines changed

2 files changed

+103
-105
lines changed

Source/ThirdParty/skia/src/core/SkBlurEngine.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -330,61 +330,61 @@ class GaussPass final : public Pass {
330330
skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
331331
skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
332332
skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
333-
v4u32 sum0 = __lsx_vld(fSum0, 0); // same as skvx::Vec<4, uint32_t>::Load(fSum0);
334-
v4u32 sum1 = __lsx_vld(fSum1, 0);
335-
v4u32 sum2 = __lsx_vld(fSum2, 0);
333+
v4u32 sum0 = (v4u32)__lsx_vld(fSum0, 0); // same as skvx::Vec<4, uint32_t>::Load(fSum0);
334+
v4u32 sum1 = (v4u32)__lsx_vld(fSum1, 0);
335+
v4u32 sum2 = (v4u32)__lsx_vld(fSum2, 0);
336336

337337
auto processValue = [&](v4u32& vLeadingEdge){
338338
sum0 += vLeadingEdge;
339339
sum1 += sum0;
340340
sum2 += sum1;
341341

342-
v4u32 divisorFactor = __lsx_vreplgr2vr_w(fDivider.divisorFactor());
343-
v4u32 blurred = __lsx_vmuh_w(divisorFactor, sum2);
342+
v4u32 divisorFactor = (v4u32)__lsx_vreplgr2vr_w(fDivider.divisorFactor());
343+
v4u32 blurred = (v4u32)__lsx_vmuh_w((__m128i)divisorFactor, (__m128i)sum2);
344344

345-
v4u32 buffer2Value = __lsx_vld(buffer2Cursor, 0); //Not fBuffer0Cursor, out of bounds.
345+
v4u32 buffer2Value = (v4u32)__lsx_vld(buffer2Cursor, 0); // Not fBuffer0Cursor, out of bounds.
346346
sum2 -= buffer2Value;
347347
__lsx_vst(sum1, (void *)buffer2Cursor, 0);
348348
buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
349-
v4u32 buffer1Value = __lsx_vld(buffer1Cursor, 0);
349+
v4u32 buffer1Value = (v4u32)__lsx_vld(buffer1Cursor, 0);
350350
sum1 -= buffer1Value;
351351
__lsx_vst(sum0, (void *)buffer1Cursor, 0);
352352
buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
353-
v4u32 buffer0Value = __lsx_vld(buffer0Cursor, 0);
353+
v4u32 buffer0Value = (v4u32)__lsx_vld(buffer0Cursor, 0);
354354
sum0 -= buffer0Value;
355355
__lsx_vst(vLeadingEdge, (void *)buffer0Cursor, 0);
356356
buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
357357

358358
v16u8 shuf = {0x0,0x4,0x8,0xc,0x0};
359-
v16u8 ret = __lsx_vshuf_b(blurred, blurred, shuf);
359+
v16u8 ret = (v16u8)__lsx_vshuf_b((__m128i)blurred, (__m128i)blurred, (__m128i)shuf);
360360
return ret;
361361
};
362362

363-
v4u32 zero = __lsx_vldi(0x0);
363+
v4u32 zero = (v4u32)__lsx_vldi(0x0);
364364
if (!src && !dst) {
365365
while (n --> 0) {
366366
(void)processValue(zero);
367367
}
368368
} else if (src && !dst) {
369369
while (n --> 0) {
370-
v4u32 edge = __lsx_vinsgr2vr_w(zero, *src, 0);
371-
edge = __lsx_vilvl_b(zero, edge);
372-
edge = __lsx_vilvl_h(zero, edge);
370+
v4u32 edge = (v4u32)__lsx_vinsgr2vr_w((__m128i)zero, *src, 0);
371+
edge = (v4u32)__lsx_vilvl_b((__m128i)zero, (__m128i)edge);
372+
edge = (v4u32)__lsx_vilvl_h((__m128i)zero, (__m128i)edge);
373373
(void)processValue(edge);
374374
src += srcStride;
375375
}
376376
} else if (!src && dst) {
377377
while (n --> 0) {
378-
v4u32 ret = processValue(zero);
378+
v4u32 ret = (v4u32)processValue(zero);
379379
__lsx_vstelm_w(ret, dst, 0, 0); // 3rd is offset, 4th is idx.
380380
dst += dstStride;
381381
}
382382
} else if (src && dst) {
383383
while (n --> 0) {
384-
v4u32 edge = __lsx_vinsgr2vr_w(zero, *src, 0);
385-
edge = __lsx_vilvl_b(zero, edge);
386-
edge = __lsx_vilvl_h(zero, edge);
387-
v4u32 ret = processValue(edge);
384+
v4u32 edge = (v4u32)__lsx_vinsgr2vr_w(zero, *src, 0);
385+
edge = (v4u32)__lsx_vilvl_b((__m128i)zero, (__m128i)edge);
386+
edge = (v4u32)__lsx_vilvl_h((__m128i)zero, (__m128i)edge);
387+
v4u32 ret = (v4u32)processValue(edge);
388388
__lsx_vstelm_w(ret, dst, 0, 0);
389389
src += srcStride;
390390
dst += dstStride;

Source/ThirdParty/skia/src/opts/SkRasterPipeline_opts.h

Lines changed: 85 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -922,35 +922,33 @@ namespace SK_OPTS_NS {
922922
sk_bit_cast<__m256i>(c)));
923923
}
924924

925-
SI F min(F a, F b) { return __lasx_xvfmin_s(a,b); }
926-
SI F max(F a, F b) { return __lasx_xvfmax_s(a,b); }
927-
SI I32 min(I32 a, I32 b) { return __lasx_xvmin_w(a,b); }
928-
SI U32 min(U32 a, U32 b) { return __lasx_xvmin_wu(a,b); }
929-
SI I32 max(I32 a, I32 b) { return __lasx_xvmax_w(a,b); }
930-
SI U32 max(U32 a, U32 b) { return __lasx_xvmax_wu(a,b); }
925+
SI F min(F a, F b) { return (F)__lasx_xvfmin_s((__m256)a, (__m256)b); }
926+
SI F max(F a, F b) { return (F)__lasx_xvfmax_s((__m256)a, (__m256)b); }
927+
SI I32 min(I32 a, I32 b) { return (I32)__lasx_xvmin_w((__m256i)a, (__m256i)b); }
928+
SI U32 min(U32 a, U32 b) { return (U32)__lasx_xvmin_wu((__m256i)a, (__m256i)b); }
929+
SI I32 max(I32 a, I32 b) { return (I32)__lasx_xvmax_w((__m256i)a, (__m256i)b); }
930+
SI U32 max(U32 a, U32 b) { return (U32)__lasx_xvmax_wu((__m256i)a, (__m256i)b); }
931931

932932
SI F mad(F f, F m, F a) { return __lasx_xvfmadd_s(f, m, a); }
933933
SI F nmad(F f, F m, F a) { return __lasx_xvfmadd_s(-f, m, a); }
934-
SI F abs_ (F v) { return (F)__lasx_xvand_v((I32)v, (I32)(0-v)); }
934+
SI F abs_(F v) { return (F)__lasx_xvand_v((__m256i)v, (__m256i)(I32)(0-v)); }
935935
SI I32 abs_(I32 v) { return max(v, -v); }
936936
SI F rcp_approx(F v) { return __lasx_xvfrecip_s(v); }
937937
SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, F() + 2.0f); }
938938
SI F rsqrt_approx (F v) { return __lasx_xvfrsqrt_s(v); }
939939
SI F sqrt_(F v) { return __lasx_xvfsqrt_s(v); }
940940

941941
SI U32 iround(F v) {
942-
F t = F() + 0.5f;
943-
return __lasx_xvftintrz_w_s(v + t);
942+
return (U32)__lasx_xvftintrz_w_s(v + 0.5f);
944943
}
945944

946945
SI U32 round(F v) {
947-
F t = F() + 0.5f;
948-
return __lasx_xvftintrz_w_s(v + t);
946+
return (U32)__lasx_xvftintrz_w_s(v + 0.5f);
949947
}
950948

951949
SI U16 pack(U32 v) {
952-
return __lsx_vpickev_h(__lsx_vsat_wu(emulate_lasx_d_xr2vr_h(v), 15),
953-
__lsx_vsat_wu(emulate_lasx_d_xr2vr_l(v), 15));
950+
return (U16)__lsx_vpickev_h(__lsx_vsat_wu(emulate_lasx_d_xr2vr_h((__m256i)v), 15),
951+
__lsx_vsat_wu(emulate_lasx_d_xr2vr_l((__m256i)v), 15));
954952
}
955953

956954
SI U8 pack(U16 v) {
@@ -960,12 +958,12 @@ namespace SK_OPTS_NS {
960958
}
961959

962960
SI bool any(I32 c){
963-
v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), c));
961+
v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), (__m256i)c));
964962
return (retv[0] | retv[4]) != 0b0000;
965963
}
966964

967965
SI bool all(I32 c){
968-
v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), c));
966+
v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), (__m256i)c));
969967
return (retv[0] & retv[4]) == 0b1111;
970968
}
971969

@@ -998,16 +996,16 @@ namespace SK_OPTS_NS {
998996
}
999997

1000998
SI void load2(const uint16_t* ptr, U16* r, U16* g) {
1001-
U16 _0123 = __lsx_vld(ptr, 0),
1002-
_4567 = __lsx_vld(ptr, 16);
1003-
*r = __lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_4567, 16), 16), 15),
1004-
__lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_0123, 16), 16), 15));
1005-
*g = __lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(_4567, 16), 15),
1006-
__lsx_vsat_w(__lsx_vsrai_w(_0123, 16), 15));
999+
U16 _0123 = (U16)__lsx_vld(ptr, 0),
1000+
_4567 = (U16)__lsx_vld(ptr, 16);
1001+
*r = (U16)__lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_4567, 16), 16), 15),
1002+
__lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_0123, 16), 16), 15));
1003+
*g = (U16)__lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(_4567, 16), 15),
1004+
__lsx_vsat_w(__lsx_vsrai_w(_0123, 16), 15));
10071005
}
10081006
SI void store2(uint16_t* ptr, U16 r, U16 g) {
1009-
auto _0123 = __lsx_vilvl_h(g, r),
1010-
_4567 = __lsx_vilvh_h(g, r);
1007+
auto _0123 = __lsx_vilvl_h((__m128i)g, (__m128i)r),
1008+
_4567 = __lsx_vilvh_h((__m128i)g, (__m128i)r);
10111009
__lsx_vst(_0123, ptr, 0);
10121010
__lsx_vst(_4567, ptr, 16);
10131011
}
@@ -1028,17 +1026,17 @@ namespace SK_OPTS_NS {
10281026
rg4567 = __lsx_vilvl_h(_57, _46),
10291027
ba4567 = __lsx_vilvh_h(_57, _46);
10301028

1031-
*r = __lsx_vilvl_d(rg4567, rg0123);
1032-
*g = __lsx_vilvh_d(rg4567, rg0123);
1033-
*b = __lsx_vilvl_d(ba4567, ba0123);
1034-
*a = __lsx_vilvh_d(ba4567, ba0123);
1029+
*r = (U16)__lsx_vilvl_d(rg4567, rg0123);
1030+
*g = (U16)__lsx_vilvh_d(rg4567, rg0123);
1031+
*b = (U16)__lsx_vilvl_d(ba4567, ba0123);
1032+
*a = (U16)__lsx_vilvh_d(ba4567, ba0123);
10351033
}
10361034

10371035
SI void store4(uint16_t* ptr, U16 r, U16 g, U16 b, U16 a) {
1038-
auto rg0123 = __lsx_vilvl_h(g, r), // r0 g0 r1 g1 r2 g2 r3 g3
1039-
rg4567 = __lsx_vilvh_h(g, r), // r4 g4 r5 g5 r6 g6 r7 g7
1040-
ba0123 = __lsx_vilvl_h(a, b),
1041-
ba4567 = __lsx_vilvh_h(a, b);
1036+
auto rg0123 = __lsx_vilvl_h((__m128i)g, (__m128i)r), // r0 g0 r1 g1 r2 g2 r3 g3
1037+
rg4567 = __lsx_vilvh_h((__m128i)g, (__m128i)r), // r4 g4 r5 g5 r6 g6 r7 g7
1038+
ba0123 = __lsx_vilvl_h((__m128i)a, (__m128i)b),
1039+
ba4567 = __lsx_vilvh_h((__m128i)a, (__m128i)b);
10421040

10431041
auto _01 =__lsx_vilvl_w(ba0123, rg0123),
10441042
_23 =__lsx_vilvh_w(ba0123, rg0123),
@@ -1121,29 +1119,29 @@ namespace SK_OPTS_NS {
11211119
sk_bit_cast<__m128i>(c)));
11221120
}
11231121

1124-
SI F min(F a, F b) { return __lsx_vfmin_s(a,b); }
1125-
SI F max(F a, F b) { return __lsx_vfmax_s(a,b); }
1126-
SI I32 min(I32 a, I32 b) { return __lsx_vmin_w(a,b); }
1127-
SI U32 min(U32 a, U32 b) { return __lsx_vmin_wu(a,b); }
1128-
SI I32 max(I32 a, I32 b) { return __lsx_vmax_w(a,b); }
1129-
SI U32 max(U32 a, U32 b) { return __lsx_vmax_wu(a,b); }
1122+
SI F min(F a, F b) { return (F)__lsx_vfmin_s((__m128)a, (__m128)b); }
1123+
SI F max(F a, F b) { return (F)__lsx_vfmax_s((__m128)a, (__m128)b); }
1124+
SI I32 min(I32 a, I32 b) { return (I32)__lsx_vmin_w((__m128i)a, (__m128i)b); }
1125+
SI U32 min(U32 a, U32 b) { return (U32)__lsx_vmin_wu((__m128i)a, (__m128i)b); }
1126+
SI I32 max(I32 a, I32 b) { return (I32)__lsx_vmax_w((__m128i)a, (__m128i)b); }
1127+
SI U32 max(U32 a, U32 b) { return (U32)__lsx_vmax_wu((__m128i)a, (__m128i)b); }
11301128

1131-
SI F mad(F f, F m, F a) { return __lsx_vfmadd_s(f, m, a); }
1132-
SI F nmad(F f, F m, F a) { return __lsx_vfmadd_s(-f, m, a); }
1133-
SI F abs_(F v) { return (F)__lsx_vand_v((I32)v, (I32)(0-v)); }
1129+
SI F mad(F f, F m, F a) { return (F)__lsx_vfmadd_s((__m128)f, (__m128)m, (__m128)a); }
1130+
SI F nmad(F f, F m, F a) { return (F)__lsx_vfmadd_s((__m128)(-f), (__m128)m, (__m128)a); }
1131+
SI F abs_(F v) { return (F)__lsx_vand_v((__m128i)(I32)v, (__m128i)(I32)(0-v)); }
11341132
SI I32 abs_(I32 v) { return max(v, -v); }
1135-
SI F rcp_approx (F v) { return __lsx_vfrecip_s(v); }
1133+
SI F rcp_approx (F v) { return (F)__lsx_vfrecip_s((__m128)v); }
11361134
SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, F() + 2.0f); }
1137-
SI F rsqrt_approx (F v) { return __lsx_vfrsqrt_s(v); }
1138-
SI F sqrt_(F v) { return __lsx_vfsqrt_s (v); }
1135+
SI F rsqrt_approx (F v) { return (F)__lsx_vfrsqrt_s((__m128)v); }
1136+
SI F sqrt_(F v) { return (F)__lsx_vfsqrt_s ((__m128)v); }
11391137

11401138
SI U32 iround(F v) {
1141-
F t = F() + 0.5f;
1142-
return __lsx_vftintrz_w_s(v + t); }
1139+
return (U32)__lsx_vftintrz_w_s(v + 0.5f);
1140+
}
11431141

11441142
SI U32 round(F v) {
1145-
F t = F() + 0.5f;
1146-
return __lsx_vftintrz_w_s(v + t); }
1143+
return (U32)__lsx_vftintrz_w_s(v + 0.5f);
1144+
}
11471145

11481146
SI U16 pack(U32 v) {
11491147
__m128i tmp = __lsx_vsat_wu(v, 15);
@@ -1159,12 +1157,12 @@ namespace SK_OPTS_NS {
11591157
}
11601158

11611159
SI bool any(I32 c){
1162-
v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), c));
1160+
v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), (__m128i)c));
11631161
return retv[0] != 0b0000;
11641162
}
11651163

11661164
SI bool all(I32 c){
1167-
v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), c));
1165+
v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), (__m128i)c));
11681166
return retv[0] == 0b1111;
11691167
}
11701168

@@ -1211,7 +1209,7 @@ namespace SK_OPTS_NS {
12111209
}
12121210

12131211
SI void store2(uint16_t* ptr, U16 r, U16 g) {
1214-
U32 rg = __lsx_vilvl_h(widen_cast<__m128i>(g), widen_cast<__m128i>(r));
1212+
U32 rg = (U32)__lsx_vilvl_h(widen_cast<__m128i>(g), widen_cast<__m128i>(r));
12151213
__lsx_vst(rg, ptr, 0);
12161214
}
12171215

@@ -3391,35 +3389,35 @@ SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t,
33913389
} else
33923390
#elif defined(SKRP_CPU_LASX)
33933391
if (c->stopCount <= 8) {
3394-
fr = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[0], 0), idx);
3395-
br = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[0], 0), idx);
3396-
fg = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[1], 0), idx);
3397-
bg = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[1], 0), idx);
3398-
fb = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[2], 0), idx);
3399-
bb = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[2], 0), idx);
3400-
fa = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[3], 0), idx);
3401-
ba = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[3], 0), idx);
3392+
fr = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[0], 0), (__m256i)idx);
3393+
br = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[0], 0), (__m256i)idx);
3394+
fg = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[1], 0), (__m256i)idx);
3395+
bg = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[1], 0), (__m256i)idx);
3396+
fb = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[2], 0), (__m256i)idx);
3397+
bb = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[2], 0), (__m256i)idx);
3398+
fa = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[3], 0), (__m256i)idx);
3399+
ba = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[3], 0), (__m256i)idx);
34023400
} else
34033401
#elif defined(SKRP_CPU_LSX)
34043402
if (c->stopCount <= 4) {
34053403
__m128i zero = __lsx_vldi(0);
3406-
fr = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[0], 0));
3407-
br = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[0], 0));
3408-
fg = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[1], 0));
3409-
bg = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[1], 0));
3410-
fb = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[2], 0));
3411-
bb = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[2], 0));
3412-
fa = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[3], 0));
3413-
ba = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[3], 0));
3404+
fr = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[0], 0));
3405+
br = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[0], 0));
3406+
fg = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[1], 0));
3407+
bg = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[1], 0));
3408+
fb = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[2], 0));
3409+
bb = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[2], 0));
3410+
fa = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[3], 0));
3411+
ba = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[3], 0));
34143412
} else
34153413
#endif
34163414
{
34173415
#if defined(SKRP_CPU_LSX)
34183416
// This can reduce some vpickve2gr instructions.
3419-
int i0 = __lsx_vpickve2gr_w(idx, 0);
3420-
int i1 = __lsx_vpickve2gr_w(idx, 1);
3421-
int i2 = __lsx_vpickve2gr_w(idx, 2);
3422-
int i3 = __lsx_vpickve2gr_w(idx, 3);
3417+
int i0 = __lsx_vpickve2gr_w((__m128i)idx, 0);
3418+
int i1 = __lsx_vpickve2gr_w((__m128i)idx, 1);
3419+
int i2 = __lsx_vpickve2gr_w((__m128i)idx, 2);
3420+
int i3 = __lsx_vpickve2gr_w((__m128i)idx, 3);
34233421
fr = gather((int *)c->fs[0], i0, i1, i2, i3);
34243422
br = gather((int *)c->bs[0], i0, i1, i2, i3);
34253423
fg = gather((int *)c->fs[1], i0, i1, i2, i3);
@@ -5931,7 +5929,7 @@ SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
59315929
split(v, &_02,&_13);
59325930
__m256i tmp0 = __lasx_xvsat_wu(_02, 15);
59335931
__m256i tmp1 = __lasx_xvsat_wu(_13, 15);
5934-
return __lasx_xvpickev_h(tmp1, tmp0);
5932+
return (U16)__lasx_xvpickev_h(tmp1, tmp0);
59355933
};
59365934
#elif defined(SKRP_CPU_LSX)
59375935
__m128i _01, _23, rg, ba;
@@ -5941,10 +5939,10 @@ SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
59415939

59425940
__m128i mask_00ff = __lsx_vreplgr2vr_h(0xff);
59435941

5944-
*r = __lsx_vand_v(rg, mask_00ff);
5945-
*g = __lsx_vsrli_h(rg, 8);
5946-
*b = __lsx_vand_v(ba, mask_00ff);
5947-
*a = __lsx_vsrli_h(ba, 8);
5942+
*r = (U16)__lsx_vand_v(rg, mask_00ff);
5943+
*g = (U16)__lsx_vsrli_h(rg, 8);
5944+
*b = (U16)__lsx_vand_v(ba, mask_00ff);
5945+
*a = (U16)__lsx_vsrli_h(ba, 8);
59485946
#else
59495947
auto cast_U16 = [](U32 v) -> U16 {
59505948
return cast<U16>(v);
@@ -5972,26 +5970,26 @@ SI void load_8888_(const uint32_t* ptr, U16* r, U16* g, U16* b, U16* a) {
59725970
SI void store_8888_(uint32_t* ptr, U16 r, U16 g, U16 b, U16 a) {
59735971
#if defined(SKRP_CPU_LSX)
59745972
__m128i mask = __lsx_vreplgr2vr_h(255);
5975-
r = __lsx_vmin_hu(r, mask);
5976-
g = __lsx_vmin_hu(g, mask);
5977-
b = __lsx_vmin_hu(b, mask);
5978-
a = __lsx_vmin_hu(a, mask);
5973+
r = (U16)__lsx_vmin_hu((__m128i)r, mask);
5974+
g = (U16)__lsx_vmin_hu((__m128i)g, mask);
5975+
b = (U16)__lsx_vmin_hu((__m128i)b, mask);
5976+
a = (U16)__lsx_vmin_hu((__m128i)a, mask);
59795977

5980-
g = __lsx_vslli_h(g, 8);
5978+
g = (U16)__lsx_vslli_h(g, 8);
59815979
r = r | g;
5982-
a = __lsx_vslli_h(a, 8);
5980+
a = (U16)__lsx_vslli_h(a, 8);
59835981
a = a | b;
59845982

59855983
__m128i r_lo = __lsx_vsllwil_wu_hu(r, 0);
5986-
__m128i r_hi = __lsx_vexth_wu_hu(r);
5984+
__m128i r_hi = __lsx_vexth_wu_hu((__m128i)r);
59875985
__m128i a_lo = __lsx_vsllwil_wu_hu(a, 0);
5988-
__m128i a_hi = __lsx_vexth_wu_hu(a);
5986+
__m128i a_hi = __lsx_vexth_wu_hu((__m128i)a);
59895987

59905988
a_lo = __lsx_vslli_w(a_lo, 16);
59915989
a_hi = __lsx_vslli_w(a_hi, 16);
59925990

5993-
r = r_lo | a_lo;
5994-
a = r_hi | a_hi;
5991+
r = (U16)(r_lo | a_lo);
5992+
a = (U16)(r_hi | a_hi);
59955993
store(ptr, join<U32>(r, a));
59965994
#else
59975995
r = min(r, 255);
@@ -6557,8 +6555,8 @@ STAGE_GP(bilerp_clamp_8888, const SkRasterPipeline_GatherCtx* ctx) {
65576555
qy_lo = __lsx_vxor_v(qy_lo, temp);
65586556
qy_hi = __lsx_vxor_v(qy_hi, temp);
65596557

6560-
I16 tx = __lsx_vpickev_h(qx_hi, qx_lo);
6561-
I16 ty = __lsx_vpickev_h(qy_hi, qy_lo);
6558+
I16 tx = (I16)__lsx_vpickev_h(qx_hi, qx_lo);
6559+
I16 ty = (I16)__lsx_vpickev_h(qy_hi, qy_lo);
65626560
#else
65636561
I16 tx = cast<I16>(qx ^ 0x8000),
65646562
ty = cast<I16>(qy ^ 0x8000);

0 commit comments

Comments
 (0)