@@ -77,8 +77,13 @@ typedef uint32x4_t flb_vector32;
7777typedef vuint8m1_t flb_vector8 ;
7878typedef vuint32m1_t flb_vector32 ;
7979
80- /* Currently, VLEN is assumed to 128. */
81- #define RVV_VEC_INST_LEN (128 / 8) /* 16 */
80+ #ifdef FLB_RVV_VLEN
81+ #define RVV_VEC8_INST_LEN (FLB_RVV_VLEN / 8)
82+ #define RVV_VEC32_INST_LEN (FLB_RVV_VLEN / 8 / 4)
83+ #else
84+ #define RVV_VEC8_INST_LEN (128 / 8) /* 16 */
85+ #define RVV_VEC32_INST_LEN (128 / 8 / 4) /* 4 */
86+ #endif
8287
8388#else
8489/*
@@ -116,7 +121,7 @@ static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s)
116121#elif defined(FLB_SIMD_NEON )
117122 * v = vld1q_u8 (s );
118123#elif defined(FLB_SIMD_RVV )
119- * v = __riscv_vle8_v_u8m1 (s , 16 );
124+ * v = __riscv_vle8_v_u8m1 (s , RVV_VEC8_INST_LEN );
120125#else
121126 memset (v , 0 , sizeof (flb_vector8 ));
122127#endif
@@ -153,7 +158,7 @@ static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vecto
153158#elif defined(FLB_SIMD_NEON )
154159 return vqsubq_u8 (v1 , v2 );
155160#elif defined(FLB_SIMD_RVV )
156- return __riscv_vssubu_vv_u8m1 (v1 , v2 , 16 );
161+ return __riscv_vssubu_vv_u8m1 (v1 , v2 , RVV_VEC8_INST_LEN );
157162#endif
158163}
159164#endif /* ! FLB_SIMD_NONE */
@@ -170,8 +175,10 @@ static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8
170175#elif defined(FLB_SIMD_NEON )
171176 return vceqq_u8 (v1 , v2 );
172177#elif defined(FLB_SIMD_RVV )
173- vbool8_t ret = __riscv_vmseq_vv_u8m1_b8 (v1 , v2 , 16 );
174- return __riscv_vmerge_vvm_u8m1 (__riscv_vmv_v_x_u8m1 (0 , 16 ), __riscv_vmv_v_x_u8m1 (UINT8_MAX , 16 ), ret , 16 );
178+ vbool8_t ret = __riscv_vmseq_vv_u8m1_b8 (v1 , v2 , RVV_VEC8_INST_LEN );
179+ return __riscv_vmerge_vvm_u8m1 (__riscv_vmv_v_x_u8m1 (0 , RVV_VEC8_INST_LEN ),
180+ __riscv_vmv_v_x_u8m1 (UINT8_MAX , RVV_VEC8_INST_LEN ),
181+ ret , RVV_VEC8_INST_LEN );
175182#endif
176183}
177184#endif /* ! FLB_SIMD_NONE */
@@ -184,8 +191,10 @@ static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vect
184191#elif defined(FLB_SIMD_NEON )
185192 return vceqq_u32 (v1 , v2 );
186193#elif defined(FLB_SIMD_RVV )
187- vbool32_t ret = __riscv_vmseq_vv_u32m1_b32 (v1 , v2 , 4 );
188- return __riscv_vmerge_vvm_u32m1 (__riscv_vmv_v_x_u32m1 (0 , 4 ), __riscv_vmv_v_x_u32m1 (UINT32_MAX , 4 ), ret , 4 );
194+ vbool32_t ret = __riscv_vmseq_vv_u32m1_b32 (v1 , v2 , RVV_VEC32_INST_LEN );
195+ return __riscv_vmerge_vvm_u32m1 (__riscv_vmv_v_x_u32m1 (0 , RVV_VEC32_INST_LEN ),
196+ __riscv_vmv_v_x_u32m1 (UINT32_MAX , RVV_VEC32_INST_LEN ),
197+ ret , RVV_VEC32_INST_LEN );
189198#endif
190199}
191200#endif /* ! FLB_SIMD_NONE */
@@ -200,7 +209,7 @@ static inline flb_vector8 flb_vector8_broadcast(const uint8_t c)
200209#elif defined(FLB_SIMD_NEON )
201210 return vdupq_n_u8 (c );
202211#elif defined(FLB_SIMD_RVV )
203- return __riscv_vmv_v_x_u8m1 (c , 16 );
212+ return __riscv_vmv_v_x_u8m1 (c , RVV_VEC8_INST_LEN );
204213#else
205214 return ~UINT64CONST (0 ) / 0xFF * c ;
206215#endif
@@ -216,7 +225,9 @@ static inline bool flb_vector8_is_highbit_set(const flb_vector8 v)
216225#elif defined(FLB_SIMD_NEON )
217226 return vmaxvq_u8 (v ) > 0x7F ;
218227#elif defined(FLB_SIMD_RVV )
219- return __riscv_vmv_x_s_u8m1_u8 (__riscv_vredmaxu_vs_u8m1_u8m1 (v , __riscv_vmv_v_x_u8m1 (0 , 16 ), 16 ));
228+ return __riscv_vmv_x_s_u8m1_u8 (__riscv_vredmaxu_vs_u8m1_u8m1 (v ,
229+ __riscv_vmv_v_x_u8m1 (0 , RVV_VEC8_INST_LEN ),
230+ RVV_VEC8_INST_LEN ));
220231#else
221232 return v & flb_vector8_broadcast (0x80 );
222233#endif
0 commit comments