@@ -1517,13 +1517,32 @@ namespace xsimd
15171517 {
15181518 XSIMD_IF_CONSTEXPR ((8 * sizeof (T)) >= batch_bool<T, A>::size)
15191519 {
1520+ // (A) Easy case: the number of slots fits in T.
15201521 const auto zero = detail::broadcast<as_unsigned_integer_t <T>, types::detail::rvv_width_m1>(T (0 ));
15211522 auto ones = detail::broadcast<as_unsigned_integer_t <T>, A::width>(1 );
15221523 auto iota = detail::vindex<A, as_unsigned_integer_t <T>>();
15231524 auto upowers = detail::rvvsll (ones, iota);
15241525 auto r = __riscv_vredor (self.data .as_mask (), upowers, (typename decltype (zero)::register_type)zero, batch_bool<T, A>::size);
15251526 return detail::reduce_scalar<A, as_unsigned_integer_t <T>>(r);
15261527 }
1528+ else XSIMD_IF_CONSTEXPR ((2 * 8 * sizeof (T)) == batch_bool<T, A>::size) {
1529+ // (B) We need two rounds, one for the low part, one for the high part.
1530+
1531+ // The low part is similar to the approach in (A).
1532+ const auto zero = detail::broadcast<as_unsigned_integer_t <T>, types::detail::rvv_width_m1>(T (0 ));
1533+ auto ones = detail::broadcast<as_unsigned_integer_t <T>, A::width>(1 );
1534+ auto iota_low = detail::vindex<A, as_unsigned_integer_t <T>>();
1535+ auto upowers_low = detail::rvvsll (ones, iota_low);
1536+ auto r_low = __riscv_vredor (self.data .as_mask (), upowers_low, (typename decltype (zero)::register_type)zero, batch_bool<T, A>::size);
1537+
1538+ // The high part requires a sub before the shift. The lower part
1539+ // gets a negative number interpreted as a very high positive
1540+ // number because we work on unsigned number.
1541+ auto iota_high = __riscv_vsub (iota_low, 8 * sizeof (T), batch_bool<T, A>::size);
1542+ auto upowers_high = detail::rvvsll (ones, iota_high);
1543+ auto r_high = __riscv_vredor (self.data .as_mask (), upowers_high, (typename decltype (zero)::register_type)zero, batch_bool<T, A>::size);
1544+ return detail::reduce_scalar<A, as_unsigned_integer_t <T>>(r_low) | (detail::reduce_scalar<A, as_unsigned_integer_t <T>>(r_high) << 8 * sizeof (T));
1545+ }
15271546 else
15281547 {
15291548 return mask (self, common {});
0 commit comments