Skip to content

Commit dd7fc7f

Browse files
make logical for SVE
1 parent 3809853 commit dd7fc7f

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

numpy/_core/src/umath/loops_logical.dispatch.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -179,15 +179,19 @@ static void simd_reduce_logical_BOOL(npy_bool* op, npy_bool* ip, npy_intp len) {
179179
#if defined(NPY_HAVE_SSE2)
180180
NPY_PREFETCH(reinterpret_cast<const char *>(ip + wstep), 0, 3);
181181
#endif
182-
vec_u8 v[UNROLL] = {};
183-
for(int i = 0; i < UNROLL; i++) {
184-
v[i] = hn::LoadU(u8, ip + vstep * i);
185-
}
186-
187-
vec_u8 m01 = traits.reduce(v[0], v[1]);
188-
vec_u8 m23 = traits.reduce(v[2], v[3]);
189-
vec_u8 m45 = traits.reduce(v[4], v[5]);
190-
vec_u8 m67 = traits.reduce(v[6], v[7]);
182+
vec_u8 v0 = hn::LoadU(u8, ip);
183+
vec_u8 v1 = hn::LoadU(u8, ip + vstep);
184+
vec_u8 v2 = hn::LoadU(u8, ip + vstep * 2);
185+
vec_u8 v3 = hn::LoadU(u8, ip + vstep * 3);
186+
vec_u8 v4 = hn::LoadU(u8, ip + vstep * 4);
187+
vec_u8 v5 = hn::LoadU(u8, ip + vstep * 5);
188+
vec_u8 v6 = hn::LoadU(u8, ip + vstep * 6);
189+
vec_u8 v7 = hn::LoadU(u8, ip + vstep * 7);
190+
191+
vec_u8 m01 = traits.reduce(v0, v1);
192+
vec_u8 m23 = traits.reduce(v2, v3);
193+
vec_u8 m45 = traits.reduce(v4, v5);
194+
vec_u8 m67 = traits.reduce(v6, v7);
191195

192196
vec_u8 m0123 = traits.reduce(m01, m23);
193197
vec_u8 m4567 = traits.reduce(m45, m67);

0 commit comments

Comments
 (0)