@@ -922,35 +922,33 @@ namespace SK_OPTS_NS {
922922 sk_bit_cast<__m256i>(c)));
923923 }
924924
925- SI F min (F a, F b) { return __lasx_xvfmin_s (a, b); }
926- SI F max (F a, F b) { return __lasx_xvfmax_s (a, b); }
927- SI I32 min (I32 a, I32 b) { return __lasx_xvmin_w (a, b); }
928- SI U32 min (U32 a, U32 b) { return __lasx_xvmin_wu (a, b); }
929- SI I32 max (I32 a, I32 b) { return __lasx_xvmax_w (a, b); }
930- SI U32 max (U32 a, U32 b) { return __lasx_xvmax_wu (a, b); }
925+ SI F min (F a, F b) { return (F) __lasx_xvfmin_s ((__m256)a, (__m256) b); }
926+ SI F max (F a, F b) { return (F) __lasx_xvfmax_s ((__m256)a, (__m256) b); }
927+ SI I32 min (I32 a, I32 b) { return (I32) __lasx_xvmin_w ((__m256i)a, (__m256i) b); }
928+ SI U32 min (U32 a, U32 b) { return (U32) __lasx_xvmin_wu ((__m256i)a, (__m256i) b); }
929+ SI I32 max (I32 a, I32 b) { return (I32) __lasx_xvmax_w ((__m256i)a, (__m256i) b); }
930+ SI U32 max (U32 a, U32 b) { return (U32) __lasx_xvmax_wu ((__m256i)a, (__m256i) b); }
931931
932932 SI F mad (F f, F m, F a) { return __lasx_xvfmadd_s (f, m, a); }
933933 SI F nmad (F f, F m, F a) { return __lasx_xvfmadd_s (-f, m, a); }
934- SI F abs_ (F v) { return (F)__lasx_xvand_v ((I32 )v, (I32)(0 -v)); }
934+ SI F abs_ (F v) { return (F)__lasx_xvand_v ((__m256i )v, (__m256i)( I32)(0 -v)); }
935935 SI I32 abs_ (I32 v) { return max (v, -v); }
936936 SI F rcp_approx (F v) { return __lasx_xvfrecip_s (v); }
937937 SI F rcp_precise (F v) { F e = rcp_approx (v); return e * nmad (v, e, F () + 2 .0f ); }
938938 SI F rsqrt_approx (F v) { return __lasx_xvfrsqrt_s (v); }
939939 SI F sqrt_ (F v) { return __lasx_xvfsqrt_s (v); }
940940
941941 SI U32 iround (F v) {
942- F t = F () + 0 .5f ;
943- return __lasx_xvftintrz_w_s (v + t);
942+ return (U32)__lasx_xvftintrz_w_s (v + 0 .5f );
944943 }
945944
946945 SI U32 round (F v) {
947- F t = F () + 0 .5f ;
948- return __lasx_xvftintrz_w_s (v + t);
946+ return (U32)__lasx_xvftintrz_w_s (v + 0 .5f );
949947 }
950948
951949 SI U16 pack (U32 v) {
952- return __lsx_vpickev_h (__lsx_vsat_wu (emulate_lasx_d_xr2vr_h (v), 15 ),
953- __lsx_vsat_wu (emulate_lasx_d_xr2vr_l (v), 15 ));
950+ return (U16) __lsx_vpickev_h (__lsx_vsat_wu (emulate_lasx_d_xr2vr_h ((__m256i) v), 15 ),
951+ __lsx_vsat_wu (emulate_lasx_d_xr2vr_l ((__m256i) v), 15 ));
954952 }
955953
956954 SI U8 pack (U16 v) {
@@ -960,12 +958,12 @@ namespace SK_OPTS_NS {
960958 }
961959
962960 SI bool any (I32 c){
963- v8i32 retv = (v8i32)__lasx_xvmskltz_w (__lasx_xvslt_wu (__lasx_xvldi (0 ), c));
961+ v8i32 retv = (v8i32)__lasx_xvmskltz_w (__lasx_xvslt_wu (__lasx_xvldi (0 ), (__m256i) c));
964962 return (retv[0 ] | retv[4 ]) != 0b0000 ;
965963 }
966964
967965 SI bool all (I32 c){
968- v8i32 retv = (v8i32)__lasx_xvmskltz_w (__lasx_xvslt_wu (__lasx_xvldi (0 ), c));
966+ v8i32 retv = (v8i32)__lasx_xvmskltz_w (__lasx_xvslt_wu (__lasx_xvldi (0 ), (__m256i) c));
969967 return (retv[0 ] & retv[4 ]) == 0b1111 ;
970968 }
971969
@@ -998,16 +996,16 @@ namespace SK_OPTS_NS {
998996 }
999997
1000998 SI void load2 (const uint16_t * ptr, U16* r, U16* g) {
1001- U16 _0123 = __lsx_vld (ptr, 0 ),
1002- _4567 = __lsx_vld (ptr, 16 );
1003- *r = __lsx_vpickev_h (__lsx_vsat_w (__lsx_vsrai_w (__lsx_vslli_w (_4567, 16 ), 16 ), 15 ),
1004- __lsx_vsat_w (__lsx_vsrai_w (__lsx_vslli_w (_0123, 16 ), 16 ), 15 ));
1005- *g = __lsx_vpickev_h (__lsx_vsat_w (__lsx_vsrai_w (_4567, 16 ), 15 ),
1006- __lsx_vsat_w (__lsx_vsrai_w (_0123, 16 ), 15 ));
999+ U16 _0123 = (U16) __lsx_vld (ptr, 0 ),
1000+ _4567 = (U16) __lsx_vld (ptr, 16 );
1001+ *r = (U16) __lsx_vpickev_h (__lsx_vsat_w (__lsx_vsrai_w (__lsx_vslli_w (_4567, 16 ), 16 ), 15 ),
1002+ __lsx_vsat_w (__lsx_vsrai_w (__lsx_vslli_w (_0123, 16 ), 16 ), 15 ));
1003+ *g = (U16) __lsx_vpickev_h (__lsx_vsat_w (__lsx_vsrai_w (_4567, 16 ), 15 ),
1004+ __lsx_vsat_w (__lsx_vsrai_w (_0123, 16 ), 15 ));
10071005 }
10081006 SI void store2 (uint16_t * ptr, U16 r, U16 g) {
1009- auto _0123 = __lsx_vilvl_h (g, r),
1010- _4567 = __lsx_vilvh_h (g, r);
1007+ auto _0123 = __lsx_vilvl_h ((__m128i) g, (__m128i) r),
1008+ _4567 = __lsx_vilvh_h ((__m128i) g, (__m128i) r);
10111009 __lsx_vst (_0123, ptr, 0 );
10121010 __lsx_vst (_4567, ptr, 16 );
10131011 }
@@ -1028,17 +1026,17 @@ namespace SK_OPTS_NS {
10281026 rg4567 = __lsx_vilvl_h (_57, _46),
10291027 ba4567 = __lsx_vilvh_h (_57, _46);
10301028
1031- *r = __lsx_vilvl_d (rg4567, rg0123);
1032- *g = __lsx_vilvh_d (rg4567, rg0123);
1033- *b = __lsx_vilvl_d (ba4567, ba0123);
1034- *a = __lsx_vilvh_d (ba4567, ba0123);
1029+ *r = (U16) __lsx_vilvl_d (rg4567, rg0123);
1030+ *g = (U16) __lsx_vilvh_d (rg4567, rg0123);
1031+ *b = (U16) __lsx_vilvl_d (ba4567, ba0123);
1032+ *a = (U16) __lsx_vilvh_d (ba4567, ba0123);
10351033 }
10361034
10371035 SI void store4 (uint16_t * ptr, U16 r, U16 g, U16 b, U16 a) {
1038- auto rg0123 = __lsx_vilvl_h (g, r), // r0 g0 r1 g1 r2 g2 r3 g3
1039- rg4567 = __lsx_vilvh_h (g, r), // r4 g4 r5 g5 r6 g6 r7 g7
1040- ba0123 = __lsx_vilvl_h (a, b),
1041- ba4567 = __lsx_vilvh_h (a, b);
1036+ auto rg0123 = __lsx_vilvl_h ((__m128i) g, (__m128i) r), // r0 g0 r1 g1 r2 g2 r3 g3
1037+ rg4567 = __lsx_vilvh_h ((__m128i) g, (__m128i) r), // r4 g4 r5 g5 r6 g6 r7 g7
1038+ ba0123 = __lsx_vilvl_h ((__m128i) a, (__m128i) b),
1039+ ba4567 = __lsx_vilvh_h ((__m128i) a, (__m128i) b);
10421040
10431041 auto _01 =__lsx_vilvl_w (ba0123, rg0123),
10441042 _23 =__lsx_vilvh_w (ba0123, rg0123),
@@ -1121,29 +1119,29 @@ namespace SK_OPTS_NS {
11211119 sk_bit_cast<__m128i>(c)));
11221120 }
11231121
1124- SI F min (F a, F b) { return __lsx_vfmin_s (a, b); }
1125- SI F max (F a, F b) { return __lsx_vfmax_s (a, b); }
1126- SI I32 min (I32 a, I32 b) { return __lsx_vmin_w (a, b); }
1127- SI U32 min (U32 a, U32 b) { return __lsx_vmin_wu (a, b); }
1128- SI I32 max (I32 a, I32 b) { return __lsx_vmax_w (a, b); }
1129- SI U32 max (U32 a, U32 b) { return __lsx_vmax_wu (a, b); }
1122+ SI F min (F a, F b) { return (F) __lsx_vfmin_s ((__m128)a, (__m128) b); }
1123+ SI F max (F a, F b) { return (F) __lsx_vfmax_s ((__m128)a, (__m128) b); }
1124+ SI I32 min (I32 a, I32 b) { return (I32) __lsx_vmin_w ((__m128i)a, (__m128i) b); }
1125+ SI U32 min (U32 a, U32 b) { return (U32) __lsx_vmin_wu ((__m128i)a, (__m128i) b); }
1126+ SI I32 max (I32 a, I32 b) { return (I32) __lsx_vmax_w ((__m128i)a, (__m128i) b); }
1127+ SI U32 max (U32 a, U32 b) { return (U32) __lsx_vmax_wu ((__m128i)a, (__m128i) b); }
11301128
1131- SI F mad (F f, F m, F a) { return __lsx_vfmadd_s (f, m, a); }
1132- SI F nmad (F f, F m, F a) { return __lsx_vfmadd_s (-f, m, a); }
1133- SI F abs_ (F v) { return (F)__lsx_vand_v ((I32)v, (I32)(0 -v)); }
1129+ SI F mad (F f, F m, F a) { return (F) __lsx_vfmadd_s ((__m128) f, (__m128) m, (__m128) a); }
1130+ SI F nmad (F f, F m, F a) { return (F) __lsx_vfmadd_s ((__m128)(-f), (__m128) m, (__m128) a); }
1131+ SI F abs_ (F v) { return (F)__lsx_vand_v ((__m128i)( I32)v, (__m128i)( I32)(0 -v)); }
11341132 SI I32 abs_ (I32 v) { return max (v, -v); }
1135- SI F rcp_approx (F v) { return __lsx_vfrecip_s (v); }
1133+ SI F rcp_approx (F v) { return (F) __lsx_vfrecip_s ((__m128) v); }
11361134 SI F rcp_precise (F v) { F e = rcp_approx (v); return e * nmad (v, e, F () + 2 .0f ); }
1137- SI F rsqrt_approx (F v) { return __lsx_vfrsqrt_s (v); }
1138- SI F sqrt_ (F v) { return __lsx_vfsqrt_s (v); }
1135+ SI F rsqrt_approx (F v) { return (F) __lsx_vfrsqrt_s ((__m128) v); }
1136+ SI F sqrt_ (F v) { return (F) __lsx_vfsqrt_s ((__m128) v); }
11391137
11401138 SI U32 iround (F v) {
1141- F t = F () + 0 .5f ;
1142- return __lsx_vftintrz_w_s (v + t); }
1139+ return (U32) __lsx_vftintrz_w_s (v + 0 .5f ) ;
1140+ }
11431141
11441142 SI U32 round (F v) {
1145- F t = F () + 0 .5f ;
1146- return __lsx_vftintrz_w_s (v + t); }
1143+ return (U32) __lsx_vftintrz_w_s (v + 0 .5f ) ;
1144+ }
11471145
11481146 SI U16 pack (U32 v) {
11491147 __m128i tmp = __lsx_vsat_wu (v, 15 );
@@ -1159,12 +1157,12 @@ namespace SK_OPTS_NS {
11591157 }
11601158
11611159 SI bool any (I32 c){
1162- v4i32 retv = (v4i32)__lsx_vmskltz_w (__lsx_vslt_wu (__lsx_vldi (0 ), c));
1160+ v4i32 retv = (v4i32)__lsx_vmskltz_w (__lsx_vslt_wu (__lsx_vldi (0 ), (__m128i) c));
11631161 return retv[0 ] != 0b0000 ;
11641162 }
11651163
11661164 SI bool all (I32 c){
1167- v4i32 retv = (v4i32)__lsx_vmskltz_w (__lsx_vslt_wu (__lsx_vldi (0 ), c));
1165+ v4i32 retv = (v4i32)__lsx_vmskltz_w (__lsx_vslt_wu (__lsx_vldi (0 ), (__m128i) c));
11681166 return retv[0 ] == 0b1111 ;
11691167 }
11701168
@@ -1211,7 +1209,7 @@ namespace SK_OPTS_NS {
12111209 }
12121210
12131211 SI void store2 (uint16_t * ptr, U16 r, U16 g) {
1214- U32 rg = __lsx_vilvl_h (widen_cast<__m128i>(g), widen_cast<__m128i>(r));
1212+ U32 rg = (U32) __lsx_vilvl_h (widen_cast<__m128i>(g), widen_cast<__m128i>(r));
12151213 __lsx_vst (rg, ptr, 0 );
12161214 }
12171215
@@ -3391,35 +3389,35 @@ SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t,
33913389 } else
33923390#elif defined(SKRP_CPU_LASX)
33933391 if (c->stopCount <= 8 ) {
3394- fr = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->fs [0 ], 0 ), idx);
3395- br = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->bs [0 ], 0 ), idx);
3396- fg = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->fs [1 ], 0 ), idx);
3397- bg = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->bs [1 ], 0 ), idx);
3398- fb = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->fs [2 ], 0 ), idx);
3399- bb = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->bs [2 ], 0 ), idx);
3400- fa = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->fs [3 ], 0 ), idx);
3401- ba = (__m256 )__lasx_xvperm_w (__lasx_xvld (c->bs [3 ], 0 ), idx);
3392+ fr = (F )__lasx_xvperm_w (__lasx_xvld (c->fs [0 ], 0 ), (__m256i) idx);
3393+ br = (F )__lasx_xvperm_w (__lasx_xvld (c->bs [0 ], 0 ), (__m256i) idx);
3394+ fg = (F )__lasx_xvperm_w (__lasx_xvld (c->fs [1 ], 0 ), (__m256i) idx);
3395+ bg = (F )__lasx_xvperm_w (__lasx_xvld (c->bs [1 ], 0 ), (__m256i) idx);
3396+ fb = (F )__lasx_xvperm_w (__lasx_xvld (c->fs [2 ], 0 ), (__m256i) idx);
3397+ bb = (F )__lasx_xvperm_w (__lasx_xvld (c->bs [2 ], 0 ), (__m256i) idx);
3398+ fa = (F )__lasx_xvperm_w (__lasx_xvld (c->fs [3 ], 0 ), (__m256i) idx);
3399+ ba = (F )__lasx_xvperm_w (__lasx_xvld (c->bs [3 ], 0 ), (__m256i) idx);
34023400 } else
34033401#elif defined(SKRP_CPU_LSX)
34043402 if (c->stopCount <= 4 ) {
34053403 __m128i zero = __lsx_vldi (0 );
3406- fr = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->fs [0 ], 0 ));
3407- br = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->bs [0 ], 0 ));
3408- fg = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->fs [1 ], 0 ));
3409- bg = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->bs [1 ], 0 ));
3410- fb = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->fs [2 ], 0 ));
3411- bb = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->bs [2 ], 0 ));
3412- fa = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->fs [3 ], 0 ));
3413- ba = (__m128 )__lsx_vshuf_w (idx, zero, __lsx_vld (c->bs [3 ], 0 ));
3404+ fr = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->fs [0 ], 0 ));
3405+ br = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->bs [0 ], 0 ));
3406+ fg = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->fs [1 ], 0 ));
3407+ bg = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->bs [1 ], 0 ));
3408+ fb = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->fs [2 ], 0 ));
3409+ bb = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->bs [2 ], 0 ));
3410+ fa = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->fs [3 ], 0 ));
3411+ ba = (F )__lsx_vshuf_w ((__m128i) idx, zero, __lsx_vld (c->bs [3 ], 0 ));
34143412 } else
34153413#endif
34163414 {
34173415#if defined(SKRP_CPU_LSX)
34183416 // This can reduce some vpickve2gr instructions.
3419- int i0 = __lsx_vpickve2gr_w (idx, 0 );
3420- int i1 = __lsx_vpickve2gr_w (idx, 1 );
3421- int i2 = __lsx_vpickve2gr_w (idx, 2 );
3422- int i3 = __lsx_vpickve2gr_w (idx, 3 );
3417+ int i0 = __lsx_vpickve2gr_w ((__m128i) idx, 0 );
3418+ int i1 = __lsx_vpickve2gr_w ((__m128i) idx, 1 );
3419+ int i2 = __lsx_vpickve2gr_w ((__m128i) idx, 2 );
3420+ int i3 = __lsx_vpickve2gr_w ((__m128i) idx, 3 );
34233421 fr = gather ((int *)c->fs [0 ], i0, i1, i2, i3);
34243422 br = gather ((int *)c->bs [0 ], i0, i1, i2, i3);
34253423 fg = gather ((int *)c->fs [1 ], i0, i1, i2, i3);
@@ -5931,7 +5929,7 @@ SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
59315929 split (v, &_02,&_13);
59325930 __m256i tmp0 = __lasx_xvsat_wu (_02, 15 );
59335931 __m256i tmp1 = __lasx_xvsat_wu (_13, 15 );
5934- return __lasx_xvpickev_h (tmp1, tmp0);
5932+ return (U16) __lasx_xvpickev_h (tmp1, tmp0);
59355933 };
59365934#elif defined(SKRP_CPU_LSX)
59375935 __m128i _01, _23, rg, ba;
@@ -5941,10 +5939,10 @@ SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
59415939
59425940 __m128i mask_00ff = __lsx_vreplgr2vr_h (0xff );
59435941
5944- *r = __lsx_vand_v (rg, mask_00ff);
5945- *g = __lsx_vsrli_h (rg, 8 );
5946- *b = __lsx_vand_v (ba, mask_00ff);
5947- *a = __lsx_vsrli_h (ba, 8 );
5942+ *r = (U16) __lsx_vand_v (rg, mask_00ff);
5943+ *g = (U16) __lsx_vsrli_h (rg, 8 );
5944+ *b = (U16) __lsx_vand_v (ba, mask_00ff);
5945+ *a = (U16) __lsx_vsrli_h (ba, 8 );
59485946#else
59495947 auto cast_U16 = [](U32 v) -> U16 {
59505948 return cast<U16>(v);
@@ -5972,26 +5970,26 @@ SI void load_8888_(const uint32_t* ptr, U16* r, U16* g, U16* b, U16* a) {
59725970SI void store_8888_ (uint32_t * ptr, U16 r, U16 g, U16 b, U16 a) {
59735971#if defined(SKRP_CPU_LSX)
59745972 __m128i mask = __lsx_vreplgr2vr_h (255 );
5975- r = __lsx_vmin_hu (r, mask);
5976- g = __lsx_vmin_hu (g, mask);
5977- b = __lsx_vmin_hu (b, mask);
5978- a = __lsx_vmin_hu (a, mask);
5973+ r = (U16) __lsx_vmin_hu ((__m128i) r, mask);
5974+ g = (U16) __lsx_vmin_hu ((__m128i) g, mask);
5975+ b = (U16) __lsx_vmin_hu ((__m128i) b, mask);
5976+ a = (U16) __lsx_vmin_hu ((__m128i) a, mask);
59795977
5980- g = __lsx_vslli_h (g, 8 );
5978+ g = (U16) __lsx_vslli_h (g, 8 );
59815979 r = r | g;
5982- a = __lsx_vslli_h (a, 8 );
5980+ a = (U16) __lsx_vslli_h (a, 8 );
59835981 a = a | b;
59845982
59855983 __m128i r_lo = __lsx_vsllwil_wu_hu (r, 0 );
5986- __m128i r_hi = __lsx_vexth_wu_hu (r);
5984+ __m128i r_hi = __lsx_vexth_wu_hu ((__m128i) r);
59875985 __m128i a_lo = __lsx_vsllwil_wu_hu (a, 0 );
5988- __m128i a_hi = __lsx_vexth_wu_hu (a);
5986+ __m128i a_hi = __lsx_vexth_wu_hu ((__m128i) a);
59895987
59905988 a_lo = __lsx_vslli_w (a_lo, 16 );
59915989 a_hi = __lsx_vslli_w (a_hi, 16 );
59925990
5993- r = r_lo | a_lo;
5994- a = r_hi | a_hi;
5991+ r = (U16)( r_lo | a_lo) ;
5992+ a = (U16)( r_hi | a_hi) ;
59955993 store (ptr, join<U32>(r, a));
59965994#else
59975995 r = min (r, 255 );
@@ -6557,8 +6555,8 @@ STAGE_GP(bilerp_clamp_8888, const SkRasterPipeline_GatherCtx* ctx) {
65576555 qy_lo = __lsx_vxor_v (qy_lo, temp);
65586556 qy_hi = __lsx_vxor_v (qy_hi, temp);
65596557
6560- I16 tx = __lsx_vpickev_h (qx_hi, qx_lo);
6561- I16 ty = __lsx_vpickev_h (qy_hi, qy_lo);
6558+ I16 tx = (I16) __lsx_vpickev_h (qx_hi, qx_lo);
6559+ I16 ty = (I16) __lsx_vpickev_h (qy_hi, qy_lo);
65626560#else
65636561 I16 tx = cast<I16>(qx ^ 0x8000 ),
65646562 ty = cast<I16>(qy ^ 0x8000 );
0 commit comments