@@ -1052,33 +1052,39 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
10521052#endif
10531053#if defined(ZSTD_ARCH_RISCV_RVV ) && (__riscv_xlen == 64 )
10541054FORCE_INLINE_TEMPLATE ZSTD_VecMask
1055- ZSTD_row_getRVVMask (int nbChunks , const BYTE * const src , const BYTE tag , const U32 head )
1055+ ZSTD_row_getRVVMask (int rowEntries , const BYTE * const src , const BYTE tag , const U32 head )
10561056{
10571057 ZSTD_VecMask matches ;
10581058 size_t vl ;
10591059
10601060 if (rowEntries == 16 ) {
10611061 vl = __riscv_vsetvl_e8m1 (16 );
1062- vuint8m1_t chunk = __riscv_vle8_v_u8m1 (src , vl );
1063- vbool8_t mask = __riscv_vmseq_vx_u8m1_b8 (chunk , tag , vl );
1064- vuint16m1_t mask_u16 = __riscv_vreinterpret_v_b8_u16m1 (mask );
1065- matches = __riscv_vmv_x_s_u16m1_u16 (mask_u16 );
1066- return ZSTD_rotateRight_U16 ((U16 )matches , head );
1062+ {
1063+ vuint8m1_t chunk = __riscv_vle8_v_u8m1 (src , vl );
1064+ vbool8_t mask = __riscv_vmseq_vx_u8m1_b8 (chunk , tag , vl );
1065+ vuint16m1_t mask_u16 = __riscv_vreinterpret_v_b8_u16m1 (mask );
1066+ matches = __riscv_vmv_x_s_u16m1_u16 (mask_u16 );
1067+ return ZSTD_rotateRight_U16 ((U16 )matches , head );
1068+ }
10671069
10681070 } else if (rowEntries == 32 ) {
10691071 vl = __riscv_vsetvl_e8m2 (32 );
1070- vuint8m2_t chunk = __riscv_vle8_v_u8m2 (src , vl );
1071- vbool4_t mask = __riscv_vmseq_vx_u8m2_b4 (chunk , tag , vl );
1072- vuint32m1_t mask_u32 = __riscv_vreinterpret_v_b4_u32m1 (mask );
1073- matches = __riscv_vmv_x_s_u32m1_u32 (mask_u32 );
1074- return ZSTD_rotateRight_U32 ((U32 )matches , head );
1072+ {
1073+ vuint8m2_t chunk = __riscv_vle8_v_u8m2 (src , vl );
1074+ vbool4_t mask = __riscv_vmseq_vx_u8m2_b4 (chunk , tag , vl );
1075+ vuint32m1_t mask_u32 = __riscv_vreinterpret_v_b4_u32m1 (mask );
1076+ matches = __riscv_vmv_x_s_u32m1_u32 (mask_u32 );
1077+ return ZSTD_rotateRight_U32 ((U32 )matches , head );
1078+ }
10751079 } else { // rowEntries = 64
10761080 vl = __riscv_vsetvl_e8m4 (64 );
1077- vuint8m4_t chunk = __riscv_vle8_v_u8m4 (src , vl );
1078- vbool2_t mask = __riscv_vmseq_vx_u8m4_b2 (chunk , tag , vl );
1079- vuint64m1_t mask_u64 = __riscv_vreinterpret_v_b2_u64m1 (mask );
1080- matches = __riscv_vmv_x_s_u64m1_u64 (mask_u64 );
1081- return ZSTD_rotateRight_U64 (matches , head );
1081+ {
1082+ vuint8m4_t chunk = __riscv_vle8_v_u8m4 (src , vl );
1083+ vbool2_t mask = __riscv_vmseq_vx_u8m4_b2 (chunk , tag , vl );
1084+ vuint64m1_t mask_u64 = __riscv_vreinterpret_v_b2_u64m1 (mask );
1085+ matches = __riscv_vmv_x_s_u64m1_u64 (mask_u64 );
1086+ return ZSTD_rotateRight_U64 (matches , head );
1087+ }
10821088 }
10831089}
10841090#endif
0 commit comments