@@ -323,34 +323,6 @@ abs_ptrdiff(char *a, char *b)
323
323
((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \
324
324
((abs_ptrdiff(args[1], args[0]) == 0))))
325
325
326
- /*
327
- * Avoid using SIMD for very large step sizes for several reasons:
328
- * 1) Supporting large step sizes requires use of i64gather/scatter_ps instructions,
329
- * in which case we need two i64gather instructions and an additional vinsertf32x8
330
- * instruction to load a single zmm register (since one i64gather instruction
331
- * loads into a ymm register). This is not ideal for performance.
332
- * 2) Gather and scatter instructions can be slow when the loads/stores
333
- * cross page boundaries.
334
- *
335
- * We instead rely on i32gather/scatter_ps instructions which use a 32-bit index
336
- * element. The index needs to be < INT_MAX to avoid overflow. MAX_STEP_SIZE
337
- * ensures this. The condition also requires that the input and output arrays
338
- * should have no overlap in memory.
339
- */
340
- #define IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP \
341
- ((labs(steps[0]) < MAX_STEP_SIZE) && \
342
- (labs(steps[1]) < MAX_STEP_SIZE) && \
343
- (labs(steps[2]) < MAX_STEP_SIZE) && \
344
- (nomemoverlap(args[0], steps[0], args[2], steps[2], dimensions[0])) && \
345
- (nomemoverlap(args[1], steps[1], args[2], steps[2], dimensions[0])))
346
-
347
- #define IS_UNARY_TWO_OUT_SMALL_STEPS_AND_NOMEMOVERLAP \
348
- ((labs(steps[0]) < MAX_STEP_SIZE) && \
349
- (labs(steps[1]) < MAX_STEP_SIZE) && \
350
- (labs(steps[2]) < MAX_STEP_SIZE) && \
351
- (nomemoverlap(args[0], steps[0], args[2], steps[2], dimensions[0])) && \
352
- (nomemoverlap(args[0], steps[0], args[1], steps[1], dimensions[0])))
353
-
354
326
/*
355
327
* 1) Output should be contiguous, can handle strided input data
356
328
* 2) Input step should be smaller than MAX_STEP_SIZE for performance
0 commit comments