126
126
} while(0)
127
127
128
128
/* for pointers p1, and p2 pointing at contiguous arrays n-elements of size s, are arrays disjoint or same
129
- * when these conditions are not met VML functions may product incorrect output
129
+ * when these conditions are not met VML functions may produce incorrect output
130
130
*/
131
131
#define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
132
132
@@ -219,6 +219,7 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus)
219
219
** FLOAT LOOPS **
220
220
*****************************************************************************
221
221
*/
222
+ /* TODO: Use MKL for pow, arctan2, fmod, hypot, i0 */
222
223
223
224
/**begin repeat
224
225
* Float types
@@ -334,22 +335,26 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
334
335
* #scalarf = exp2f, exp2#
335
336
*/
336
337
337
- /* TODO: Use VML */
338
338
void
339
339
mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
340
340
{
341
341
const int contig = IS_UNARY_CONT(@type@, @type@);
342
342
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
343
343
const int can_vectorize = contig && disjoint_or_same;
344
344
345
- UNARY_LOOP_DISPATCH(
346
- @type@, @type@
347
- ,
348
- can_vectorize
349
- ,
350
- const @type@ in1 = *(@type@ *)ip1;
351
- *(@type@ *)op1 = @scalarf@(in1);
352
- )
345
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
346
+ CHUNKED_VML_CALL2(v@c@Exp2, dimensions[0], @type@, args[0], args[1]);
347
+ /* v@c@Exp2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
348
+ } else {
349
+ UNARY_LOOP_DISPATCH(
350
+ @type@, @type@
351
+ ,
352
+ can_vectorize
353
+ ,
354
+ const @type@ in1 = *(@type@ *)ip1;
355
+ *(@type@ *)op1 = @scalarf@(in1);
356
+ )
357
+ }
353
358
}
354
359
355
360
/**end repeat**/
@@ -460,22 +465,27 @@ mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *st
460
465
* #scalarf = log2f, log2#
461
466
*/
462
467
463
- /* TODO: Use VML */
464
468
void
465
469
mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
466
470
{
467
471
const int contig = IS_UNARY_CONT(@type@, @type@);
468
472
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
469
473
const int can_vectorize = contig && disjoint_or_same;
470
474
471
- UNARY_LOOP_DISPATCH(
472
- @type@, @type@
473
- ,
474
- can_vectorize
475
- ,
476
- const @type@ in1 = *(@type@ *)ip1;
477
- *(@type@ *)op1 = @scalarf@(in1);
478
- )
475
+ if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
476
+ {
477
+ CHUNKED_VML_CALL2(v@c@Log2, dimensions[0], @type@, args[0], args[1]);
478
+ /* v@c@Log2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
479
+ } else {
480
+ UNARY_LOOP_DISPATCH(
481
+ @type@, @type@
482
+ ,
483
+ can_vectorize
484
+ ,
485
+ const @type@ in1 = *(@type@ *)ip1;
486
+ *(@type@ *)op1 = @scalarf@(in1);
487
+ )
488
+ }
479
489
}
480
490
481
491
/**end repeat**/
@@ -957,14 +967,20 @@ mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *s
957
967
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
958
968
const int can_vectorize = contig && disjoint_or_same;
959
969
960
- UNARY_LOOP_DISPATCH(
961
- @type@, @type@
962
- ,
963
- can_vectorize
964
- ,
965
- const @type@ in1 = *(@type@ *)ip1;
966
- *(@type@ *)op1 = @scalarf@(in1);
967
- )
970
+ if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
971
+ {
972
+ CHUNKED_VML_CALL2(v@c@Abs, dimensions[0], @type@, args[0], args[1]);
973
+ /* v@c@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
974
+ } else {
975
+ UNARY_LOOP_DISPATCH(
976
+ @type@, @type@
977
+ ,
978
+ can_vectorize
979
+ ,
980
+ const @type@ in1 = *(@type@ *)ip1;
981
+ *(@type@ *)op1 = @scalarf@(in1);
982
+ )
983
+ }
968
984
}
969
985
970
986
/**end repeat**/
@@ -1230,7 +1246,6 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
1230
1246
* #type = npy_float, npy_double#
1231
1247
* #TYPE = FLOAT, DOUBLE#
1232
1248
* #c = f, #
1233
- * #C = F, #
1234
1249
* #s = s, d#
1235
1250
* #SUPPORTED_BY_VML = 1, 1#
1236
1251
*/
@@ -1959,20 +1974,46 @@ mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp
1959
1974
void
1960
1975
mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
1961
1976
{
1962
- BINARY_LOOP {
1963
- const @type@ in1 = *(@type@ *)ip1;
1964
- const @type@ in2 = *(@type@ *)ip2;
1965
- *((@type@ *)op1)= copysign@c@(in1, in2);
1977
+ #if @SUPPORTED_BY_VML@
1978
+ const int contig = IS_BINARY_CONT(@type@, @type@);
1979
+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
1980
+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
1981
+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
1982
+
1983
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
1984
+ CHUNKED_VML_CALL3(v@s@CopySign, dimensions[0], @type@, args[0], args[1], args[2]);
1985
+ /* v@s@CopySign(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
1986
+ } else
1987
+ #endif
1988
+ {
1989
+ BINARY_LOOP {
1990
+ const @type@ in1 = *(@type@ *)ip1;
1991
+ const @type@ in2 = *(@type@ *)ip2;
1992
+ *((@type@ *)op1)= copysign@c@(in1, in2);
1993
+ }
1966
1994
}
1967
1995
}
1968
1996
1969
1997
void
1970
1998
mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
1971
1999
{
1972
- BINARY_LOOP {
1973
- const @type@ in1 = *(@type@ *)ip1;
1974
- const @type@ in2 = *(@type@ *)ip2;
1975
- *((@type@ *)op1)= nextafter@c@(in1, in2);
2000
+ #if @SUPPORTED_BY_VML@
2001
+ const int contig = IS_BINARY_CONT(@type@, @type@);
2002
+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2003
+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2004
+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2005
+
2006
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2007
+ CHUNKED_VML_CALL3(v@s@NextAfter, dimensions[0], @type@, args[0], args[1], args[2]);
2008
+ /* v@s@NextAfter(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2009
+ } else
2010
+ #endif
2011
+ {
2012
+ BINARY_LOOP {
2013
+ const @type@ in1 = *(@type@ *)ip1;
2014
+ const @type@ in2 = *(@type@ *)ip2;
2015
+ *((@type@ *)op1)= nextafter@c@(in1, in2);
2016
+ }
1976
2017
}
1977
2018
}
1978
2019
@@ -2009,39 +2050,65 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
2009
2050
2010
2051
/**begin repeat1
2011
2052
* #kind = fmax, fmin#
2053
+ * #VML = Fmax, Fmin#
2012
2054
* #OP = >=, <=#
2013
2055
**/
2014
2056
void
2015
2057
mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2016
2058
{
2017
- /* */
2018
- if (IS_BINARY_REDUCE) {
2019
- BINARY_REDUCE_LOOP(@type@) {
2020
- const @type@ in2 = *(@type@ *)ip2;
2021
- /* Order of operations important for MSVC 2015 */
2022
- io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
2059
+ #if @SUPPORTED_BY_VML@
2060
+ const int contig = IS_BINARY_CONT(@type@, @type@);
2061
+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2062
+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2063
+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2064
+
2065
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2066
+ CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
2067
+ /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2068
+ } else
2069
+ #endif
2070
+ {
2071
+ if (IS_BINARY_REDUCE) {
2072
+ BINARY_REDUCE_LOOP(@type@) {
2073
+ const @type@ in2 = *(@type@ *)ip2;
2074
+ /* Order of operations important for MSVC 2015 */
2075
+ io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
2076
+ }
2077
+ *((@type@ *)iop1) = io1;
2023
2078
}
2024
- *((@type@ *)iop1) = io1;
2025
- }
2026
- else {
2027
- BINARY_LOOP {
2028
- const @type@ in1 = *(@type@ *)ip1;
2029
- const @type@ in2 = *(@type@ *)ip2;
2030
- /* Order of operations important for MSVC 2015 */
2031
- *((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
2079
+ else {
2080
+ BINARY_LOOP {
2081
+ const @type@ in1 = *(@type@ *)ip1;
2082
+ const @type@ in2 = *(@type@ *)ip2;
2083
+ /* Order of operations important for MSVC 2015 */
2084
+ *((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
2085
+ }
2032
2086
}
2087
+ feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
2033
2088
}
2034
- feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
2035
2089
}
2036
2090
/**end repeat1**/
2037
2091
2038
2092
void
2039
2093
mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2040
2094
{
2041
- BINARY_LOOP {
2042
- const @type@ in1 = *(@type@ *)ip1;
2043
- const @type@ in2 = *(@type@ *)ip2;
2044
- divmod@c@(in1, in2, (@type@ *)op1);
2095
+ #if @SUPPORTED_BY_VML@
2096
+ const int contig = IS_BINARY_CONT(@type@, @type@);
2097
+ const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2098
+ const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2099
+ const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2100
+
2101
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2102
+ CHUNKED_VML_CALL3(v@s@Remainder, dimensions[0], @type@, args[0], args[1], args[2]);
2103
+ /* v@s@Remainder(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2104
+ } else
2105
+ #endif
2106
+ {
2107
+ BINARY_LOOP {
2108
+ const @type@ in1 = *(@type@ *)ip1;
2109
+ const @type@ in2 = *(@type@ *)ip2;
2110
+ divmod@c@(in1, in2, (@type@ *)op1);
2111
+ }
2045
2112
}
2046
2113
}
2047
2114
@@ -2059,9 +2126,11 @@ void
2059
2126
mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
2060
2127
{
2061
2128
#if @SUPPORTED_BY_VML@
2062
- if(IS_UNARY_CONT(@type@, @type@) &&
2063
- dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2064
- DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2129
+ const int contig = IS_UNARY_CONT(@type@, @type@);
2130
+ const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2131
+ const int can_vectorize = contig && disjoint_or_same;
2132
+
2133
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2065
2134
CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, args[0], args[1]);
2066
2135
/* v@s@Sqr(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
2067
2136
} else
@@ -2078,9 +2147,11 @@ void
2078
2147
mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
2079
2148
{
2080
2149
#if @SUPPORTED_BY_VML@
2081
- if(IS_UNARY_CONT(@type@, @type@) &&
2082
- dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2083
- DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2150
+ const int contig = IS_UNARY_CONT(@type@, @type@);
2151
+ const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2152
+ const int can_vectorize = contig && disjoint_or_same;
2153
+
2154
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2084
2155
CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, args[0], args[1]);
2085
2156
/* v@s@Inv(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
2086
2157
} else
@@ -2114,9 +2185,11 @@ void
2114
2185
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2115
2186
{
2116
2187
#if @SUPPORTED_BY_VML@
2117
- if(IS_UNARY_CONT(@type@, @type@) &&
2118
- dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2119
- DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2188
+ const int contig = IS_UNARY_CONT(@type@, @type@);
2189
+ const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2190
+ const int can_vectorize = contig && disjoint_or_same;
2191
+
2192
+ if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2120
2193
CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
2121
2194
/* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
2122
2195
} else
@@ -2162,6 +2235,7 @@ mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *s
2162
2235
}
2163
2236
}
2164
2237
2238
+ /* TODO: USE MKL */
2165
2239
void
2166
2240
mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2167
2241
{
@@ -2261,6 +2335,7 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
2261
2335
** COMPLEX LOOPS **
2262
2336
*****************************************************************************
2263
2337
*/
2338
+ /* TODO: USE MKL for pow, exp, ln, log10, sqrt, trigonometric functions and hyperbolic functions */
2264
2339
2265
2340
#define CGE(xr,xi,yr,yi) ((xr > yr && !isnan(xi) && !isnan(yi)) \
2266
2341
|| (xr == yr && xi >= yi))
@@ -2363,6 +2438,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
2363
2438
}
2364
2439
}
2365
2440
2441
+ /* TODO: USE MKL */
2366
2442
/**begin repeat1
2367
2443
* arithmetic
2368
2444
* #kind = add, subtract#
@@ -2396,6 +2472,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
2396
2472
}
2397
2473
/**end repeat1**/
2398
2474
2475
+ /* TODO: USE MKL */
2399
2476
void
2400
2477
mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2401
2478
{
@@ -2409,6 +2486,7 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
2409
2486
}
2410
2487
}
2411
2488
2489
+ /* TODO: USE MKL */
2412
2490
void
2413
2491
mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2414
2492
{
@@ -2557,6 +2635,7 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
2557
2635
}
2558
2636
}
2559
2637
2638
+ /* TODO: USE MKL */
2560
2639
void
2561
2640
mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
2562
2641
UNARY_LOOP {
@@ -2567,6 +2646,7 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in
2567
2646
}
2568
2647
}
2569
2648
2649
+ /* TODO: USE MKL */
2570
2650
void
2571
2651
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
2572
2652
{
0 commit comments