Skip to content

Commit 6c7c143

Browse files
author
Vahid Tavanashad
committed
add new vm functions
1 parent aafc0cc commit 6c7c143

File tree

3 files changed

+200
-91
lines changed

3 files changed

+200
-91
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [dev] (MM/DD/YYYY)
8+
9+
### Added
10+
* Added mkl implementation for floating point data-types of `exp2`, `log2`, `fabs`, `copysign`, `nextafter`, `fmax`, `fmin` and `remainder` functions [gh-81](https://github.com/IntelPython/mkl_umath/pull/81)
11+
712
## [0.2.0] (06/DD/2025)
813
This release updates `mkl_umath` to be aligned with both numpy-1.26.x and numpy-2.x.x.
914

mkl_umath/src/mkl_umath_loops.c.src

Lines changed: 144 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
} while(0)
127127

128128
/* for pointers p1, and p2 pointing at contiguous arrays n-elements of size s, are arrays disjoint or same
129-
* when these conditions are not met VML functions may product incorrect output
129+
* when these conditions are not met VML functions may produce incorrect output
130130
*/
131131
#define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
132132

@@ -219,6 +219,7 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus)
219219
** FLOAT LOOPS **
220220
*****************************************************************************
221221
*/
222+
/* TODO: Use MKL for pow, arctan2, fmod, hypot, i0 */
222223

223224
/**begin repeat
224225
* Float types
@@ -334,22 +335,26 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
334335
* #scalarf = exp2f, exp2#
335336
*/
336337

337-
/* TODO: Use VML */
338338
void
339339
mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
340340
{
341341
const int contig = IS_UNARY_CONT(@type@, @type@);
342342
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
343343
const int can_vectorize = contig && disjoint_or_same;
344344

345-
UNARY_LOOP_DISPATCH(
346-
@type@, @type@
347-
,
348-
can_vectorize
349-
,
350-
const @type@ in1 = *(@type@ *)ip1;
351-
*(@type@ *)op1 = @scalarf@(in1);
352-
)
345+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
346+
CHUNKED_VML_CALL2(v@c@Exp2, dimensions[0], @type@, args[0], args[1]);
347+
/* v@c@Exp2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
348+
} else {
349+
UNARY_LOOP_DISPATCH(
350+
@type@, @type@
351+
,
352+
can_vectorize
353+
,
354+
const @type@ in1 = *(@type@ *)ip1;
355+
*(@type@ *)op1 = @scalarf@(in1);
356+
)
357+
}
353358
}
354359

355360
/**end repeat**/
@@ -460,22 +465,27 @@ mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *st
460465
* #scalarf = log2f, log2#
461466
*/
462467

463-
/* TODO: Use VML */
464468
void
465469
mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
466470
{
467471
const int contig = IS_UNARY_CONT(@type@, @type@);
468472
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
469473
const int can_vectorize = contig && disjoint_or_same;
470474

471-
UNARY_LOOP_DISPATCH(
472-
@type@, @type@
473-
,
474-
can_vectorize
475-
,
476-
const @type@ in1 = *(@type@ *)ip1;
477-
*(@type@ *)op1 = @scalarf@(in1);
478-
)
475+
if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
476+
{
477+
CHUNKED_VML_CALL2(v@c@Log2, dimensions[0], @type@, args[0], args[1]);
478+
/* v@c@Log2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
479+
} else {
480+
UNARY_LOOP_DISPATCH(
481+
@type@, @type@
482+
,
483+
can_vectorize
484+
,
485+
const @type@ in1 = *(@type@ *)ip1;
486+
*(@type@ *)op1 = @scalarf@(in1);
487+
)
488+
}
479489
}
480490

481491
/**end repeat**/
@@ -957,14 +967,20 @@ mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *s
957967
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
958968
const int can_vectorize = contig && disjoint_or_same;
959969

960-
UNARY_LOOP_DISPATCH(
961-
@type@, @type@
962-
,
963-
can_vectorize
964-
,
965-
const @type@ in1 = *(@type@ *)ip1;
966-
*(@type@ *)op1 = @scalarf@(in1);
967-
)
970+
if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
971+
{
972+
CHUNKED_VML_CALL2(v@c@Abs, dimensions[0], @type@, args[0], args[1]);
973+
/* v@c@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
974+
} else {
975+
UNARY_LOOP_DISPATCH(
976+
@type@, @type@
977+
,
978+
can_vectorize
979+
,
980+
const @type@ in1 = *(@type@ *)ip1;
981+
*(@type@ *)op1 = @scalarf@(in1);
982+
)
983+
}
968984
}
969985

970986
/**end repeat**/
@@ -1230,7 +1246,6 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
12301246
* #type = npy_float, npy_double#
12311247
* #TYPE = FLOAT, DOUBLE#
12321248
* #c = f, #
1233-
* #C = F, #
12341249
* #s = s, d#
12351250
* #SUPPORTED_BY_VML = 1, 1#
12361251
*/
@@ -1959,20 +1974,46 @@ mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp
19591974
void
19601975
mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
19611976
{
1962-
BINARY_LOOP {
1963-
const @type@ in1 = *(@type@ *)ip1;
1964-
const @type@ in2 = *(@type@ *)ip2;
1965-
*((@type@ *)op1)= copysign@c@(in1, in2);
1977+
#if @SUPPORTED_BY_VML@
1978+
const int contig = IS_BINARY_CONT(@type@, @type@);
1979+
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
1980+
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
1981+
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
1982+
1983+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
1984+
CHUNKED_VML_CALL3(v@s@CopySign, dimensions[0], @type@, args[0], args[1], args[2]);
1985+
/* v@s@CopySign(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
1986+
} else
1987+
#endif
1988+
{
1989+
BINARY_LOOP {
1990+
const @type@ in1 = *(@type@ *)ip1;
1991+
const @type@ in2 = *(@type@ *)ip2;
1992+
*((@type@ *)op1)= copysign@c@(in1, in2);
1993+
}
19661994
}
19671995
}
19681996

19691997
void
19701998
mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
19711999
{
1972-
BINARY_LOOP {
1973-
const @type@ in1 = *(@type@ *)ip1;
1974-
const @type@ in2 = *(@type@ *)ip2;
1975-
*((@type@ *)op1)= nextafter@c@(in1, in2);
2000+
#if @SUPPORTED_BY_VML@
2001+
const int contig = IS_BINARY_CONT(@type@, @type@);
2002+
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2003+
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2004+
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2005+
2006+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2007+
CHUNKED_VML_CALL3(v@s@NextAfter, dimensions[0], @type@, args[0], args[1], args[2]);
2008+
/* v@s@NextAfter(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2009+
} else
2010+
#endif
2011+
{
2012+
BINARY_LOOP {
2013+
const @type@ in1 = *(@type@ *)ip1;
2014+
const @type@ in2 = *(@type@ *)ip2;
2015+
*((@type@ *)op1)= nextafter@c@(in1, in2);
2016+
}
19762017
}
19772018
}
19782019

@@ -2009,39 +2050,65 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
20092050

20102051
/**begin repeat1
20112052
* #kind = fmax, fmin#
2053+
* #VML = Fmax, Fmin#
20122054
* #OP = >=, <=#
20132055
**/
20142056
void
20152057
mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
20162058
{
2017-
/* */
2018-
if (IS_BINARY_REDUCE) {
2019-
BINARY_REDUCE_LOOP(@type@) {
2020-
const @type@ in2 = *(@type@ *)ip2;
2021-
/* Order of operations important for MSVC 2015 */
2022-
io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
2059+
#if @SUPPORTED_BY_VML@
2060+
const int contig = IS_BINARY_CONT(@type@, @type@);
2061+
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2062+
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2063+
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2064+
2065+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2066+
CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
2067+
/* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2068+
} else
2069+
#endif
2070+
{
2071+
if (IS_BINARY_REDUCE) {
2072+
BINARY_REDUCE_LOOP(@type@) {
2073+
const @type@ in2 = *(@type@ *)ip2;
2074+
/* Order of operations important for MSVC 2015 */
2075+
io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
2076+
}
2077+
*((@type@ *)iop1) = io1;
20232078
}
2024-
*((@type@ *)iop1) = io1;
2025-
}
2026-
else {
2027-
BINARY_LOOP {
2028-
const @type@ in1 = *(@type@ *)ip1;
2029-
const @type@ in2 = *(@type@ *)ip2;
2030-
/* Order of operations important for MSVC 2015 */
2031-
*((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
2079+
else {
2080+
BINARY_LOOP {
2081+
const @type@ in1 = *(@type@ *)ip1;
2082+
const @type@ in2 = *(@type@ *)ip2;
2083+
/* Order of operations important for MSVC 2015 */
2084+
*((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
2085+
}
20322086
}
2087+
feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
20332088
}
2034-
feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
20352089
}
20362090
/**end repeat1**/
20372091

20382092
void
20392093
mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
20402094
{
2041-
BINARY_LOOP {
2042-
const @type@ in1 = *(@type@ *)ip1;
2043-
const @type@ in2 = *(@type@ *)ip2;
2044-
divmod@c@(in1, in2, (@type@ *)op1);
2095+
#if @SUPPORTED_BY_VML@
2096+
const int contig = IS_BINARY_CONT(@type@, @type@);
2097+
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
2098+
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
2099+
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
2100+
2101+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2102+
CHUNKED_VML_CALL3(v@s@Remainder, dimensions[0], @type@, args[0], args[1], args[2]);
2103+
/* v@s@Remainder(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
2104+
} else
2105+
#endif
2106+
{
2107+
BINARY_LOOP {
2108+
const @type@ in1 = *(@type@ *)ip1;
2109+
const @type@ in2 = *(@type@ *)ip2;
2110+
divmod@c@(in1, in2, (@type@ *)op1);
2111+
}
20452112
}
20462113
}
20472114

@@ -2059,9 +2126,11 @@ void
20592126
mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
20602127
{
20612128
#if @SUPPORTED_BY_VML@
2062-
if(IS_UNARY_CONT(@type@, @type@) &&
2063-
dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2064-
DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2129+
const int contig = IS_UNARY_CONT(@type@, @type@);
2130+
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2131+
const int can_vectorize = contig && disjoint_or_same;
2132+
2133+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
20652134
CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, args[0], args[1]);
20662135
/* v@s@Sqr(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
20672136
} else
@@ -2078,9 +2147,11 @@ void
20782147
mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
20792148
{
20802149
#if @SUPPORTED_BY_VML@
2081-
if(IS_UNARY_CONT(@type@, @type@) &&
2082-
dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2083-
DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2150+
const int contig = IS_UNARY_CONT(@type@, @type@);
2151+
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2152+
const int can_vectorize = contig && disjoint_or_same;
2153+
2154+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
20842155
CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, args[0], args[1]);
20852156
/* v@s@Inv(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
20862157
} else
@@ -2114,9 +2185,11 @@ void
21142185
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
21152186
{
21162187
#if @SUPPORTED_BY_VML@
2117-
if(IS_UNARY_CONT(@type@, @type@) &&
2118-
dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
2119-
DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
2188+
const int contig = IS_UNARY_CONT(@type@, @type@);
2189+
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2190+
const int can_vectorize = contig && disjoint_or_same;
2191+
2192+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
21202193
CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
21212194
/* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
21222195
} else
@@ -2162,6 +2235,7 @@ mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *s
21622235
}
21632236
}
21642237

2238+
/* TODO: USE MKL */
21652239
void
21662240
mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
21672241
{
@@ -2261,6 +2335,7 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
22612335
** COMPLEX LOOPS **
22622336
*****************************************************************************
22632337
*/
2338+
/* TODO: USE MKL for pow, exp, ln, log10, sqrt, trigonometric functions and hyperbolic functions */
22642339

22652340
#define CGE(xr,xi,yr,yi) ((xr > yr && !isnan(xi) && !isnan(yi)) \
22662341
|| (xr == yr && xi >= yi))
@@ -2363,6 +2438,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
23632438
}
23642439
}
23652440

2441+
/* TODO: USE MKL */
23662442
/**begin repeat1
23672443
* arithmetic
23682444
* #kind = add, subtract#
@@ -2396,6 +2472,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
23962472
}
23972473
/**end repeat1**/
23982474

2475+
/* TODO: USE MKL */
23992476
void
24002477
mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
24012478
{
@@ -2409,6 +2486,7 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
24092486
}
24102487
}
24112488

2489+
/* TODO: USE MKL */
24122490
void
24132491
mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
24142492
{
@@ -2557,6 +2635,7 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
25572635
}
25582636
}
25592637

2638+
/* TODO: USE MKL */
25602639
void
25612640
mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
25622641
UNARY_LOOP {
@@ -2567,6 +2646,7 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in
25672646
}
25682647
}
25692648

2649+
/* TODO: USE MKL */
25702650
void
25712651
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
25722652
{

0 commit comments

Comments
 (0)