126126 } while(0)
127127
128128/* for pointers p1, and p2 pointing at contiguous arrays n-elements of size s, are arrays disjoint or same
129- * when these conditions are not met VML functions may product incorrect output
129+ * when these conditions are not met VML functions may produce incorrect output
130130 */
131131#define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
132132
@@ -219,6 +219,7 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus)
219219 ** FLOAT LOOPS **
220220 *****************************************************************************
221221 */
222+ /* TODO: Use MKL for pow, arctan2, fmod, hypot, i0 */
222223
223224/**begin repeat
224225 * Float types
@@ -334,22 +335,26 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
334335 * #scalarf = exp2f, exp2#
335336 */
336337
337- /* TODO: Use VML */
338338void
339339mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
340340{
341341 const int contig = IS_UNARY_CONT(@type@, @type@);
342342 const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
343343 const int can_vectorize = contig && disjoint_or_same;
344344
345- UNARY_LOOP_DISPATCH(
346- @type@, @type@
347- ,
348- can_vectorize
349- ,
350- const @type@ in1 = *(@type@ *)ip1;
351- *(@type@ *)op1 = @scalarf@(in1);
352- )
345+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
346+ CHUNKED_VML_CALL2(v@c@Exp2, dimensions[0], @type@, args[0], args[1]);
347+ /* v@c@Exp2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
348+ } else {
349+ UNARY_LOOP_DISPATCH(
350+ @type@, @type@
351+ ,
352+ can_vectorize
353+ ,
354+ const @type@ in1 = *(@type@ *)ip1;
355+ *(@type@ *)op1 = @scalarf@(in1);
356+ )
357+ }
353358}
354359
355360/**end repeat**/
@@ -460,22 +465,27 @@ mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *st
460465 * #scalarf = log2f, log2#
461466 */
462467
463- /* TODO: Use VML */
464468void
465469mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
466470{
467471 const int contig = IS_UNARY_CONT(@type@, @type@);
468472 const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
469473 const int can_vectorize = contig && disjoint_or_same;
470474
471- UNARY_LOOP_DISPATCH(
472- @type@, @type@
473- ,
474- can_vectorize
475- ,
476- const @type@ in1 = *(@type@ *)ip1;
477- *(@type@ *)op1 = @scalarf@(in1);
478- )
475+ if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
476+ {
477+ CHUNKED_VML_CALL2(v@c@Log2, dimensions[0], @type@, args[0], args[1]);
478+ /* v@c@Log2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
479+ } else {
480+ UNARY_LOOP_DISPATCH(
481+ @type@, @type@
482+ ,
483+ can_vectorize
484+ ,
485+ const @type@ in1 = *(@type@ *)ip1;
486+ *(@type@ *)op1 = @scalarf@(in1);
487+ )
488+ }
479489}
480490
481491/**end repeat**/
@@ -957,14 +967,20 @@ mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *s
957967 const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
958968 const int can_vectorize = contig && disjoint_or_same;
959969
960- UNARY_LOOP_DISPATCH(
961- @type@, @type@
962- ,
963- can_vectorize
964- ,
965- const @type@ in1 = *(@type@ *)ip1;
966- *(@type@ *)op1 = @scalarf@(in1);
967- )
970+ if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
971+ {
972+ CHUNKED_VML_CALL2(v@c@Abs, dimensions[0], @type@, args[0], args[1]);
973+ /* v@c@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
974+ } else {
975+ UNARY_LOOP_DISPATCH(
976+ @type@, @type@
977+ ,
978+ can_vectorize
979+ ,
980+ const @type@ in1 = *(@type@ *)ip1;
981+ *(@type@ *)op1 = @scalarf@(in1);
982+ )
983+ }
968984}
969985
970986/**end repeat**/
@@ -1230,7 +1246,6 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
12301246 * #type = npy_float, npy_double#
12311247 * #TYPE = FLOAT, DOUBLE#
12321248 * #c = f, #
1233- * #C = F, #
12341249 * #s = s, d#
12351250 * #SUPPORTED_BY_VML = 1, 1#
12361251 */
@@ -1959,20 +1974,46 @@ mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp
19591974void
19601975mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
19611976{
1962- BINARY_LOOP {
1963- const @type@ in1 = *(@type@ *)ip1;
1964- const @type@ in2 = *(@type@ *)ip2;
1965- *((@type@ *)op1)= copysign@c@(in1, in2);
1977+ #if @SUPPORTED_BY_VML@
1978+ const int contig = IS_BINARY_CONT(@type@, @type@);
1979+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
1980+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
1981+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
1982+
1983+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
1984+ CHUNKED_VML_CALL3(v@s@CopySign, dimensions[0], @type@, args[0], args[1], args[2]);
1985+ /* v@s@CopySign(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
1986+ } else
1987+ #endif
1988+ {
1989+ BINARY_LOOP {
1990+ const @type@ in1 = *(@type@ *)ip1;
1991+ const @type@ in2 = *(@type@ *)ip2;
1992+ *((@type@ *)op1)= copysign@c@(in1, in2);
1993+ }
19661994 }
19671995}
19681996
19691997void
19701998mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
19711999{
1972- BINARY_LOOP {
1973- const @type@ in1 = *(@type@ *)ip1;
1974- const @type@ in2 = *(@type@ *)ip2;
1975- *((@type@ *)op1)= nextafter@c@(in1, in2);
2000+ #if @SUPPORTED_BY_VML@
2001+ const int contig = IS_BINARY_CONT(@type@, @type@);
2002+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2003+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2004+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2005+
2006+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2007+ CHUNKED_VML_CALL3(v@s@NextAfter, dimensions[0], @type@, args[0], args[1], args[2]);
2008+ /* v@s@NextAfter(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2009+ } else
2010+ #endif
2011+ {
2012+ BINARY_LOOP {
2013+ const @type@ in1 = *(@type@ *)ip1;
2014+ const @type@ in2 = *(@type@ *)ip2;
2015+ *((@type@ *)op1)= nextafter@c@(in1, in2);
2016+ }
19762017 }
19772018}
19782019
@@ -2009,39 +2050,65 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
20092050
20102051/**begin repeat1
20112052 * #kind = fmax, fmin#
2053+ * #VML = Fmax, Fmin#
20122054 * #OP = >=, <=#
20132055 **/
20142056void
20152057mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
20162058{
2017- /* */
2018- if (IS_BINARY_REDUCE) {
2019- BINARY_REDUCE_LOOP(@type@) {
2020- const @type@ in2 = *(@type@ *)ip2;
2021- /* Order of operations important for MSVC 2015 */
2022- io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
2059+ #if @SUPPORTED_BY_VML@
2060+ const int contig = IS_BINARY_CONT(@type@, @type@);
2061+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2062+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2063+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2064+
2065+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2066+ CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
2067+ /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2068+ } else
2069+ #endif
2070+ {
2071+ if (IS_BINARY_REDUCE) {
2072+ BINARY_REDUCE_LOOP(@type@) {
2073+ const @type@ in2 = *(@type@ *)ip2;
2074+ /* Order of operations important for MSVC 2015 */
2075+ io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
2076+ }
2077+ *((@type@ *)iop1) = io1;
20232078 }
2024- *((@type@ *)iop1) = io1;
2025- }
2026- else {
2027- BINARY_LOOP {
2028- const @type@ in1 = *(@type@ *)ip1;
2029- const @type@ in2 = *(@type@ *)ip2;
2030- /* Order of operations important for MSVC 2015 */
2031- *((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
2079+ else {
2080+ BINARY_LOOP {
2081+ const @type@ in1 = *(@type@ *)ip1;
2082+ const @type@ in2 = *(@type@ *)ip2;
2083+ /* Order of operations important for MSVC 2015 */
2084+ *((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
2085+ }
20322086 }
2087+ feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
20332088 }
2034- feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
20352089}
20362090/**end repeat1**/
20372091
20382092void
20392093mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
20402094{
2041- BINARY_LOOP {
2042- const @type@ in1 = *(@type@ *)ip1;
2043- const @type@ in2 = *(@type@ *)ip2;
2044- divmod@c@(in1, in2, (@type@ *)op1);
2095+ #if @SUPPORTED_BY_VML@
2096+ const int contig = IS_BINARY_CONT(@type@, @type@);
2097+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2098+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2099+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2100+
2101+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2102+ CHUNKED_VML_CALL3(v@s@Remainder, dimensions[0], @type@, args[0], args[1], args[2]);
2103+ /* v@s@Remainder(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2104+ } else
2105+ #endif
2106+ {
2107+ BINARY_LOOP {
2108+ const @type@ in1 = *(@type@ *)ip1;
2109+ const @type@ in2 = *(@type@ *)ip2;
2110+ divmod@c@(in1, in2, (@type@ *)op1);
2111+ }
20452112 }
20462113}
20472114
@@ -2059,9 +2126,11 @@ void
20592126mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
20602127{
20612128#if @SUPPORTED_BY_VML@
2062- if(IS_UNARY_CONT(@type@, @type@) &&
2063- dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2064- DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2129+ const int contig = IS_UNARY_CONT(@type@, @type@);
2130+ const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2131+ const int can_vectorize = contig && disjoint_or_same;
2132+
2133+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
20652134 CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, args[0], args[1]);
20662135 /* v@s@Sqr(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
20672136 } else
@@ -2078,9 +2147,11 @@ void
20782147mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
20792148{
20802149#if @SUPPORTED_BY_VML@
2081- if(IS_UNARY_CONT(@type@, @type@) &&
2082- dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2083- DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2150+ const int contig = IS_UNARY_CONT(@type@, @type@);
2151+ const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2152+ const int can_vectorize = contig && disjoint_or_same;
2153+
2154+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
20842155 CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, args[0], args[1]);
20852156 /* v@s@Inv(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
20862157 } else
@@ -2114,9 +2185,11 @@ void
21142185mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
21152186{
21162187#if @SUPPORTED_BY_VML@
2117- if(IS_UNARY_CONT(@type@, @type@) &&
2118- dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2119- DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2188+ const int contig = IS_UNARY_CONT(@type@, @type@);
2189+ const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2190+ const int can_vectorize = contig && disjoint_or_same;
2191+
2192+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
21202193 CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
21212194 /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
21222195 } else
@@ -2162,6 +2235,7 @@ mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *s
21622235 }
21632236}
21642237
2238+ /* TODO: USE MKL */
21652239void
21662240mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
21672241{
@@ -2261,6 +2335,7 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
22612335 ** COMPLEX LOOPS **
22622336 *****************************************************************************
22632337 */
2338+ /* TODO: USE MKL for pow, exp, ln, log10, sqrt, trigonometric functions and hyperbolic functions */
22642339
22652340#define CGE(xr,xi,yr,yi) ((xr > yr && !isnan(xi) && !isnan(yi)) \
22662341 || (xr == yr && xi >= yi))
@@ -2363,6 +2438,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
23632438 }
23642439}
23652440
2441+ /* TODO: USE MKL */
23662442/**begin repeat1
23672443 * arithmetic
23682444 * #kind = add, subtract#
@@ -2396,6 +2472,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
23962472}
23972473/**end repeat1**/
23982474
2475+ /* TODO: USE MKL */
23992476void
24002477mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
24012478{
@@ -2409,6 +2486,7 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
24092486 }
24102487}
24112488
2489+ /* TODO: USE MKL */
24122490void
24132491mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
24142492{
@@ -2557,6 +2635,7 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
25572635 }
25582636}
25592637
2638+ /* TODO: USE MKL */
25602639void
25612640mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
25622641 UNARY_LOOP {
@@ -2567,6 +2646,7 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in
25672646 }
25682647}
25692648
2649+ /* TODO: USE MKL */
25702650void
25712651mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
25722652{
0 commit comments