Skip to content

Commit 554ea2c

Browse files
committed
[loong64] Fix missing rounding in loong64 scaled_mult implementation
The reference semantics of scaled_mult include rounding, but the original implementation did not do so. This is triggering an SkASSERT in the unit test case FilterResult_raster_RescaleWithTransform, from constrained_add's debug checks. The fixed implementation bumps the cost of each scaled_mult from 2 to 5 instruction-count-wise (5 to 8 clock-cycle-wise with the LA464 and LA664 micro-architectures), due to unavailability of rounding multiply operations in current LoongArch spec. However the computation now matches the reference scalar semantics, and proper testing of debug builds is now possible. Change-Id: I45e43a7a7e6d50b4c32c5e69a6d1d7de341eccf1 [Kaiyang Wu: backport to webkit2gtk] Link: https://skia-review.googlesource.com/c/skia/+/908136 Signed-off-by: Kaiyang Wu <[email protected]>
1 parent c550e97 commit 554ea2c

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

Source/ThirdParty/skia/src/opts/SkRasterPipeline_opts.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5532,11 +5532,19 @@ SI I16 scaled_mult(I16 a, I16 b) {
55325532
#elif defined(SKRP_CPU_NEON)
55335533
return vqrdmulhq_s16(a, b);
55345534
#elif defined(SKRP_CPU_LASX)
5535-
I16 res = __lasx_xvmuh_h(a, b);
5536-
return __lasx_xvslli_h(res, 1);
5535+
Vec<8, int32_t> even = (Vec<8, int32_t>)__lasx_xvmulwev_w_h((__m256i)a, (__m256i)b);
5536+
Vec<8, int32_t> odd = (Vec<8, int32_t>)__lasx_xvmulwod_w_h((__m256i)a, (__m256i)b);
5537+
Vec<8, int32_t> roundingTerm = (Vec<8, int32_t>)__lasx_xvldi(-0xec0); // v8i32(0x40 << 8)
5538+
even = (even + roundingTerm) >> 15;
5539+
odd = (odd + roundingTerm) >> 15;
5540+
return (I16)__lasx_xvpackev_h((__m256i)odd, (__m256i)even);
55375541
#elif defined(SKRP_CPU_LSX)
5538-
I16 res = __lsx_vmuh_h(a, b);
5539-
return __lsx_vslli_h(res, 1);
5542+
Vec<4, int32_t> even = (Vec<4, int32_t>)__lsx_vmulwev_w_h((__m128i)a, (__m128i)b);
5543+
Vec<4, int32_t> odd = (Vec<4, int32_t>)__lsx_vmulwod_w_h((__m128i)a, (__m128i)b);
5544+
Vec<4, int32_t> roundingTerm = (Vec<4, int32_t>)__lsx_vldi(-0xec0); // v4i32(0x40 << 8)
5545+
even = (even + roundingTerm) >> 15;
5546+
odd = (odd + roundingTerm) >> 15;
5547+
return (I16)__lsx_vpackev_h((__m128i)odd, (__m128i)even);
55405548
#else
55415549
const I32 roundingTerm = I32_(1 << 14);
55425550
return cast<I16>((cast<I32>(a) * cast<I32>(b) + roundingTerm) >> 15);

0 commit comments

Comments
 (0)