|
129 | 129 | * when these conditions are not met VML functions may produce incorrect output |
130 | 130 | */ |
131 | 131 | #define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) ) |
| 132 | +#define DISJOINT_OR_SAME_TWO_DTYPES(p1, p2, n, s1, s2) (((p1) == (p2)) || ((p2) + (n)*(s2) < (p1)) || ((p1) + (n)*(s1) < (p2)) ) |
132 | 133 |
|
133 | 134 | /* |
134 | 135 | * include vectorized functions and dispatchers |
@@ -316,8 +317,7 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st |
316 | 317 | can_vectorize |
317 | 318 | , |
318 | 319 | const @type@ in1 = *(@type@ *)ip1; |
319 | | - const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
320 | | - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 320 | + ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
321 | 321 | *(@type@ *)op1 = @scalarf@(in1); |
322 | 322 | ) |
323 | 323 | } |
@@ -355,8 +355,7 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s |
355 | 355 | can_vectorize |
356 | 356 | , |
357 | 357 | const @type@ in1 = *(@type@ *)ip1; |
358 | | - const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
359 | | - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 358 | + ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
360 | 359 | *(@type@ *)op1 = @scalarf@(in1); |
361 | 360 | ) |
362 | 361 | } |
@@ -493,8 +492,7 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s |
493 | 492 | can_vectorize |
494 | 493 | , |
495 | 494 | const @type@ in1 = *(@type@ *)ip1; |
496 | | - const int invalid_cases = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; |
497 | | - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 495 | + ignore_fpstatus = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; |
498 | 496 | *(@type@ *)op1 = @scalarf@(in1); |
499 | 497 | ) |
500 | 498 | } |
@@ -2124,10 +2122,9 @@ mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_in |
2124 | 2122 | BINARY_LOOP { |
2125 | 2123 | const @type@ in1 = *(@type@ *)ip1; |
2126 | 2124 | const @type@ in2 = *(@type@ *)ip2; |
2127 | | - int invalid_cases = !npy_isnan(in1) && in2 == 0; |
2128 | | - invalid_cases |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); |
2129 | | - invalid_cases |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); |
2130 | | - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 2125 | + ignore_fpstatus = !npy_isnan(in1) && in2 == 0; |
| 2126 | + ignore_fpstatus |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); |
| 2127 | + ignore_fpstatus |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); |
2131 | 2128 | divmod@c@(in1, in2, (@type@ *)op1); |
2132 | 2129 | } |
2133 | 2130 | } |
@@ -2376,10 +2373,10 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i |
2376 | 2373 | * complex types |
2377 | 2374 | * #TYPE = CFLOAT, CDOUBLE# |
2378 | 2375 | * #ftype = npy_float, npy_double# |
| 2376 | + * #type = npy_cfloat, npy_cdouble# |
2379 | 2377 | * #c = f, # |
2380 | | - * #C = F, # |
2381 | | - * #s = s, d# |
2382 | | - * #SUPPORTED_BY_VML = 1, 1# |
| 2378 | + * #C = F, # |
| 2379 | + * #s = c, z# |
2383 | 2380 | */ |
2384 | 2381 |
|
2385 | 2382 | /* similar to pairwise sum of real floats */ |
@@ -2659,44 +2656,47 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i |
2659 | 2656 | } |
2660 | 2657 | } |
2661 | 2658 |
|
2662 | | -/* TODO: USE MKL */ |
2663 | 2659 | void |
2664 | 2660 | mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { |
2665 | | - UNARY_LOOP { |
2666 | | - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
2667 | | - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
2668 | | - ((@ftype@ *)op1)[0] = in1r; |
2669 | | - ((@ftype@ *)op1)[1] = -in1i; |
2670 | | - } |
| 2661 | + const int contig = IS_UNARY_CONT(@type@, @type@); |
| 2662 | + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); |
| 2663 | + const int can_vectorize = contig && disjoint_or_same; |
| 2664 | + |
| 2665 | + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
| 2666 | + CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]); |
| 2667 | + /* v@s@Conj(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ |
| 2668 | + } else { |
| 2669 | + UNARY_LOOP { |
| 2670 | + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
| 2671 | + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
| 2672 | + ((@ftype@ *)op1)[0] = in1r; |
| 2673 | + ((@ftype@ *)op1)[1] = -in1i; |
| 2674 | + } |
| 2675 | + } |
2671 | 2676 | } |
2672 | 2677 |
|
2673 | | -/* TODO: USE MKL */ |
2674 | 2678 | void |
2675 | 2679 | mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) |
2676 | 2680 | { |
| 2681 | + const int contig = IS_UNARY_CONT(@type@, @ftype@); |
| 2682 | + const int disjoint_or_same = DISJOINT_OR_SAME_TWO_DTYPES(args[0], args[1], dimensions[0], sizeof(@type@), sizeof(@ftype@)); |
| 2683 | + const int can_vectorize = contig && disjoint_or_same; |
2677 | 2684 | int ignore_fpstatus = 0; |
2678 | | - |
2679 | | - // FIXME: abs function VML for complex numbers breaks FFT test_basic.py |
2680 | | - //if(steps[0]/2 == sizeof(@ftype@) && steps[1] == sizeof(@ftype@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
2681 | | -#if @SUPPORTED_BY_VML@ |
2682 | | - if(0 == 1) { |
| 2685 | + |
| 2686 | + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
2683 | 2687 | ignore_fpstatus = 1; |
2684 | | - CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @ftype@, args[0], args[1]); |
2685 | | - /* v@s@Abs(dimensions[0], (@ftype@ *) args[0], (@ftype@ *) args[1]); */ |
2686 | | - } else |
2687 | | -#endif |
2688 | | - { |
| 2688 | + CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]); |
| 2689 | + /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ |
| 2690 | + } else { |
2689 | 2691 | UNARY_LOOP { |
2690 | 2692 | const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
2691 | 2693 | const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
2692 | | - if(in1r == 0.0 && in1i == 0.0){ |
2693 | | - ignore_fpstatus = 1; |
2694 | | - } |
| 2694 | + ignore_fpstatus = npy_isnan(in1r) && npy_isnan(in1i); |
2695 | 2695 | *((@ftype@ *)op1) = hypot@c@(in1r, in1i); |
2696 | 2696 | } |
2697 | 2697 | } |
2698 | 2698 | if(ignore_fpstatus) { |
2699 | | - feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); |
| 2699 | + feclearexcept(FE_INVALID); |
2700 | 2700 | } |
2701 | 2701 | } |
2702 | 2702 |
|
|
0 commit comments