Skip to content

Commit 9d68e6e

Browse files
committed
BUG: Fix off-by-one error in nomemoverlap check.
The original check (since introduced in 085cdbe) returned overlap for adjacent arrays. This fixes one of the factors causing test_repeated_square_consistency failure, so the test should now always pass.
1 parent 623ecfa commit 9d68e6e

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

numpy/_core/src/umath/fast_loop_macros.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,15 +341,15 @@ abs_ptrdiff(char *a, char *b)
341341
((labs(steps[0]) < MAX_STEP_SIZE) && \
342342
(labs(steps[1]) < MAX_STEP_SIZE) && \
343343
(labs(steps[2]) < MAX_STEP_SIZE) && \
344-
(nomemoverlap(args[0], steps[0] * dimensions[0], args[2], steps[2] * dimensions[0])) && \
345-
(nomemoverlap(args[1], steps[1] * dimensions[0], args[2], steps[2] * dimensions[0])))
344+
(nomemoverlap(args[0], steps[0], args[2], steps[2], dimensions[0])) && \
345+
(nomemoverlap(args[1], steps[1], args[2], steps[2], dimensions[0])))
346346

347347
#define IS_UNARY_TWO_OUT_SMALL_STEPS_AND_NOMEMOVERLAP \
348348
((labs(steps[0]) < MAX_STEP_SIZE) && \
349349
(labs(steps[1]) < MAX_STEP_SIZE) && \
350350
(labs(steps[2]) < MAX_STEP_SIZE) && \
351-
(nomemoverlap(args[0], steps[0] * dimensions[0], args[2], steps[2] * dimensions[0])) && \
352-
(nomemoverlap(args[0], steps[0] * dimensions[0], args[1], steps[1] * dimensions[0])))
351+
(nomemoverlap(args[0], steps[0], args[2], steps[2], dimensions[0])) && \
352+
(nomemoverlap(args[0], steps[0], args[1], steps[1], dimensions[0])))
353353

354354
/*
355355
* 1) Output should be contiguous, can handle strided input data
@@ -359,7 +359,7 @@ abs_ptrdiff(char *a, char *b)
359359
#define IS_OUTPUT_BLOCKABLE_UNARY(esizein, esizeout, vsize) \
360360
((steps[0] & (esizein-1)) == 0 && \
361361
steps[1] == (esizeout) && llabs(steps[0]) < MAX_STEP_SIZE && \
362-
(nomemoverlap(args[1], steps[1] * dimensions[0], args[0], steps[0] * dimensions[0])))
362+
(nomemoverlap(args[1], steps[1], args[0], steps[0], dimensions[0])))
363363

364364
#define IS_BLOCKABLE_REDUCE(esize, vsize) \
365365
(steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \

numpy/_core/src/umath/loops_utils.h.src

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,31 @@
1616
#endif
1717
/*
1818
* nomemoverlap - returns false if two strided arrays have an overlapping
19-
* region in memory. ip_size/op_size = size of the arrays which can be negative
20-
* indicating negative steps.
19+
* region in memory.
2120
*/
2221
NPY_FINLINE npy_bool
23-
nomemoverlap(char *ip, npy_intp ip_size, char *op, npy_intp op_size)
22+
nomemoverlap(char *ip, npy_intp ip_step, char *op, npy_intp op_step, npy_intp len)
2423
{
24+
// Calculate inclusive ranges for offsets of items in arrays.
25+
// The end pointer points to address of the last item.
26+
const npy_intp ip_offset = ip_step * (len - 1);
27+
const npy_intp op_offset = op_step * (len - 1);
2528
char *ip_start, *ip_end, *op_start, *op_end;
26-
if (ip_size < 0) {
27-
ip_start = ip + ip_size;
29+
if (ip_step < 0) {
30+
ip_start = ip + ip_offset;
2831
ip_end = ip;
2932
}
3033
else {
3134
ip_start = ip;
32-
ip_end = ip + ip_size;
35+
ip_end = ip + ip_offset;
3336
}
34-
if (op_size < 0) {
35-
op_start = op + op_size;
37+
if (op_step < 0) {
38+
op_start = op + op_offset;
3639
op_end = op;
3740
}
3841
else {
3942
op_start = op;
40-
op_end = op + op_size;
43+
op_end = op + op_offset;
4144
}
4245
return (ip_start == op_start && op_end == ip_end) ||
4346
(ip_start > op_end) || (op_start > ip_end);
@@ -48,7 +51,7 @@ nomemoverlap(char *ip, npy_intp ip_size, char *op, npy_intp op_size)
4851
NPY_FINLINE npy_bool
4952
is_mem_overlap(const void *src, npy_intp src_step, const void *dst, npy_intp dst_step, npy_intp len)
5053
{
51-
return !(nomemoverlap((char*)src, src_step*len, (char*)dst, dst_step*len));
54+
return !(nomemoverlap((char*)src, src_step, (char*)dst, dst_step, len));
5255
}
5356

5457
/*

0 commit comments

Comments
 (0)