@@ -1193,16 +1193,36 @@ namespace xsimd
11931193 return detail::compare_int_avx512f<A, T, _MM_CMPINT_LE>(self, other);
11941194 }
11951195
1196+ namespace detail
1197+ {
1198+ // Adapted from https://github.com/serge-sans-paille/fast-bitset-from-bool-array
1199+ // Generate a bitset from an array of boolean.
1200+ XSIMD_INLINE unsigned char tobitset (unsigned char unpacked[8 ])
1201+ {
1202+ uint64_t data;
1203+ memcpy (&data, unpacked, sizeof (uint64_t ));
1204+
1205+ const uint64_t magic = (0x80 + 0x4000 + 0x200000 + 0x10000000 + 0x0800000000 + 0x040000000000 + 0x02000000000000 + 0x0100000000000000 );
1206+
1207+ unsigned char res = ((data * magic) >> 56 ) & 0xFF ;
1208+ return res;
1209+ }
1210+ }
1211+
11961212 // load
11971213 template <class A , class T >
11981214 XSIMD_INLINE batch_bool<T, A> load (bool const * mem, batch_bool<T, A>, requires_arch<avx512f>) noexcept
11991215 {
12001216 using register_type = typename batch_bool<T, A>::register_type;
12011217 constexpr auto size = batch_bool<T, A>::size;
1218+ constexpr auto iter = size / 8 ;
1219+ static_assert (size % 8 == 0 , " incorrect size of bool batch" );
12021220 register_type mask = 0 ;
1203- for (std::size_t i = 0 ; i < size; ++i)
1204- mask |= (register_type (mem[i] ? 1 : 0 ) << i);
1205-
1221+ for (std::size_t i = 0 ; i < iter; ++i)
1222+ {
1223+ unsigned char block = detail::tobitset ((unsigned char *)mem + i * 8 );
1224+ mask |= (register_type (block) << (i * 8 ));
1225+ }
12061226 return mask;
12071227 }
12081228
0 commit comments