diff --git a/test/comp_avg_pred_test.cc b/test/comp_avg_pred_test.cc index d8fabd5bef..83d6de7449 100644 --- a/test/comp_avg_pred_test.cc +++ b/test/comp_avg_pred_test.cc @@ -227,6 +227,11 @@ INSTANTIATE_TEST_SUITE_P(NEON, AvgPredTestLBD, ::testing::Values(&vpx_comp_avg_pred_neon)); #endif // HAVE_NEON +#if HAVE_RVV +INSTANTIATE_TEST_SUITE_P(RVV, AvgPredTestLBD, + ::testing::Values(&vpx_comp_avg_pred_rvv)); +#endif // HAVE_RVV + #if HAVE_VSX INSTANTIATE_TEST_SUITE_P(VSX, AvgPredTestLBD, ::testing::Values(&vpx_comp_avg_pred_vsx)); diff --git a/vpx_dsp/riscv/avg_pred_rvv.c b/vpx_dsp/riscv/avg_pred_rvv.c new file mode 100644 index 0000000000..83293430dc --- /dev/null +++ b/vpx_dsp/riscv/avg_pred_rvv.c @@ -0,0 +1,72 @@ +#include +#include "./vpx_dsp_rtcd.h" +#include +#include +#include + +void vpx_comp_avg_pred_rvv(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + + const size_t avl = 16; + size_t vl; + if (width > 8){ + int x, y = height; + vuint8m8_t vr, vp; + do { + for (x = 0; x < width; x += vl){ + vl = __riscv_vsetvl_e8m8(width - x); + vp = __riscv_vle8_v_u8m8(pred + x, vl); + vr = __riscv_vle8_v_u8m8(ref + x, vl); + vr = __riscv_vaaddu_vv_u8m8(vp, vr, __RISCV_VXRM_RNU, vl); + __riscv_vse8_v_u8m8(comp_pred + x, vr, vl); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } while (--y); + } else if (width == 8) { + int i = width * height; + size_t k; + uint8_t *index; + vuint8m1_t vr, vp, vindex; + do { + vl = __riscv_vsetvl_e8m1(avl); + index = (uint8_t *)malloc(vl * sizeof(uint8_t)); + memset(index, 0, vl); + vp = __riscv_vle8_v_u8m1(pred, vl); + for(k = 0; k < vl; k++){ + if(k < vl / 2){ + index[k] = (uint8_t)k; + }else{ + index[k] = (uint8_t)(ref_stride + k - 8); + } + } + vindex = __riscv_vle8_v_u8m1(index, vl); + vr = __riscv_vloxei8_v_u8m1(ref, vindex, vl); + vr = __riscv_vaaddu_vv_u8m1(vp, vr, __RISCV_VXRM_RNU, vl); + __riscv_vse8_v_u8m1(comp_pred, vr, vl); + ref += 2 * ref_stride; + pred += vl; + comp_pred += vl; + i -= vl; + } while (i); + } else { + int i = width * height; + vuint8m1_t vr, vp; + vuint32m1_t a_u32; + assert(width == 4); + vl = __riscv_vsetvl_e8m1(avl); + do { + vp = __riscv_vle8_v_u8m1(pred, vl); + a_u32 = __riscv_vlse32_v_u32m1((const uint32_t*)ref, ref_stride, vl / 4); + vr = __riscv_vreinterpret_v_u32m1_u8m1(a_u32); + ref += 4 * ref_stride; + vr = __riscv_vaaddu_vv_u8m1(vp, vr, __RISCV_VXRM_RNU, vl); + __riscv_vse8_v_u8m1(comp_pred, vr, vl); + + pred += vl; + comp_pred += vl; + i -= vl; + } while (i); + } +} \ No newline at end of file diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 04969f37e1..346ff9a1ea 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -413,6 +413,8 @@ DSP_SRCS-$(HAVE_NEON) += arm/avg_pred_neon.c DSP_SRCS-$(HAVE_NEON) += arm/subpel_variance_neon.c DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c +DSP_SRCS-$(HAVE_RVV) += riscv/avg_pred_rvv.c + DSP_SRCS-$(HAVE_MSA) += mips/variance_msa.c DSP_SRCS-$(HAVE_MSA) += mips/sub_pixel_variance_msa.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index f20f4e0454..cde056c84b 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1321,7 +1321,7 @@ () specialize qw/vpx_get4x4sse_cs neon msa vsx/; add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; - specialize qw/vpx_comp_avg_pred neon sse2 avx2 vsx lsx/; + specialize qw/vpx_comp_avg_pred neon sse2 avx2 vsx lsx rvv/; # # Subpixel Variance