Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [dev] (MM/DD/YYYY)

### Added
* Added mkl implementation for floating point data-types of `exp2`, `log2`, `fabs`, `copysign`, `nextafter`, `fmax`, `fmin` and `remainder` functions [gh-81](https://github.com/IntelPython/mkl_umath/pull/81)

## [0.2.0] (06/DD/2025)
This release updates `mkl_umath` to be aligned with both numpy-1.26.x and numpy-2.x.x.

Expand Down
208 changes: 144 additions & 64 deletions mkl_umath/src/mkl_umath_loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
} while(0)

/* for pointers p1, and p2 pointing at contiguous arrays n-elements of size s, are arrays disjoint or same
* when these conditions are not met VML functions may product incorrect output
* when these conditions are not met VML functions may produce incorrect output
*/
#define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )

Expand Down Expand Up @@ -219,6 +219,7 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus)
** FLOAT LOOPS **
*****************************************************************************
*/
/* TODO: Use MKL for pow, arctan2, fmod, hypot, i0 */

/**begin repeat
* Float types
Expand Down Expand Up @@ -334,22 +335,26 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
* #scalarf = exp2f, exp2#
*/

/* TODO: Use VML */
void
mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
const int contig = IS_UNARY_CONT(@type@, @type@);
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same;

UNARY_LOOP_DISPATCH(
@type@, @type@
,
can_vectorize
,
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
)
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL2(v@c@Exp2, dimensions[0], @type@, args[0], args[1]);
/* v@c@Exp2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
} else {
UNARY_LOOP_DISPATCH(
@type@, @type@
,
can_vectorize
,
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
)
}
}

/**end repeat**/
Expand Down Expand Up @@ -460,22 +465,27 @@ mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *st
* #scalarf = log2f, log2#
*/

/* TODO: Use VML */
void
mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
const int contig = IS_UNARY_CONT(@type@, @type@);
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same;

UNARY_LOOP_DISPATCH(
@type@, @type@
,
can_vectorize
,
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
)
if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
{
CHUNKED_VML_CALL2(v@c@Log2, dimensions[0], @type@, args[0], args[1]);
/* v@c@Log2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
} else {
UNARY_LOOP_DISPATCH(
@type@, @type@
,
can_vectorize
,
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
)
}
}

/**end repeat**/
Expand Down Expand Up @@ -957,14 +967,20 @@ mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *s
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same;

UNARY_LOOP_DISPATCH(
@type@, @type@
,
can_vectorize
,
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
)
if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
{
CHUNKED_VML_CALL2(v@c@Abs, dimensions[0], @type@, args[0], args[1]);
/* v@c@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
} else {
UNARY_LOOP_DISPATCH(
@type@, @type@
,
can_vectorize
,
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = @scalarf@(in1);
)
}
}

/**end repeat**/
Expand Down Expand Up @@ -1230,7 +1246,6 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
* #type = npy_float, npy_double#
* #TYPE = FLOAT, DOUBLE#
* #c = f, #
* #C = F, #
* #s = s, d#
* #SUPPORTED_BY_VML = 1, 1#
*/
Expand Down Expand Up @@ -1959,20 +1974,46 @@ mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp
void
mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1)= copysign@c@(in1, in2);
#if @SUPPORTED_BY_VML@
const int contig = IS_BINARY_CONT(@type@, @type@);
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL3(v@s@CopySign, dimensions[0], @type@, args[0], args[1], args[2]);
/* v@s@CopySign(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
} else
#endif
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1)= copysign@c@(in1, in2);
}
}
}

void
mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1)= nextafter@c@(in1, in2);
#if @SUPPORTED_BY_VML@
const int contig = IS_BINARY_CONT(@type@, @type@);
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL3(v@s@NextAfter, dimensions[0], @type@, args[0], args[1], args[2]);
/* v@s@NextAfter(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
} else
#endif
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*((@type@ *)op1)= nextafter@c@(in1, in2);
}
}
}

Expand Down Expand Up @@ -2009,39 +2050,65 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp

/**begin repeat1
* #kind = fmax, fmin#
* #VML = Fmax, Fmin#
* #OP = >=, <=#
**/
void
mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
/* */
if (IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
/* Order of operations important for MSVC 2015 */
io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
#if @SUPPORTED_BY_VML@
const int contig = IS_BINARY_CONT(@type@, @type@);
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
/* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
} else
#endif
{
if (IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
/* Order of operations important for MSVC 2015 */
io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
}
*((@type@ *)iop1) = io1;
}
*((@type@ *)iop1) = io1;
}
else {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
/* Order of operations important for MSVC 2015 */
*((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
else {
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
/* Order of operations important for MSVC 2015 */
*((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
}
}
feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
}
feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
}
/**end repeat1**/

void
mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
divmod@c@(in1, in2, (@type@ *)op1);
#if @SUPPORTED_BY_VML@
const int contig = IS_BINARY_CONT(@type@, @type@);
const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL3(v@s@Remainder, dimensions[0], @type@, args[0], args[1], args[2]);
/* v@s@Remainder(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
} else
#endif
{
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
divmod@c@(in1, in2, (@type@ *)op1);
}
}
}

Expand All @@ -2059,9 +2126,11 @@ void
mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
{
#if @SUPPORTED_BY_VML@
if(IS_UNARY_CONT(@type@, @type@) &&
dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
const int contig = IS_UNARY_CONT(@type@, @type@);
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, args[0], args[1]);
/* v@s@Sqr(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
} else
Expand All @@ -2078,9 +2147,11 @@ void
mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
{
#if @SUPPORTED_BY_VML@
if(IS_UNARY_CONT(@type@, @type@) &&
dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
const int contig = IS_UNARY_CONT(@type@, @type@);
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, args[0], args[1]);
/* v@s@Inv(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
} else
Expand Down Expand Up @@ -2114,9 +2185,11 @@ void
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
#if @SUPPORTED_BY_VML@
if(IS_UNARY_CONT(@type@, @type@) &&
dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
const int contig = IS_UNARY_CONT(@type@, @type@);
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
const int can_vectorize = contig && disjoint_or_same;

if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
/* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
} else
Expand Down Expand Up @@ -2162,6 +2235,7 @@ mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *s
}
}

/* TODO: USE MKL */
void
mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
Expand Down Expand Up @@ -2261,6 +2335,7 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
** COMPLEX LOOPS **
*****************************************************************************
*/
/* TODO: USE MKL for pow, exp, ln, log10, sqrt, trigonometric functions and hyperbolic functions */

#define CGE(xr,xi,yr,yi) ((xr > yr && !isnan(xi) && !isnan(yi)) \
|| (xr == yr && xi >= yi))
Expand Down Expand Up @@ -2363,6 +2438,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
}
}

/* TODO: USE MKL */
/**begin repeat1
* arithmetic
* #kind = add, subtract#
Expand Down Expand Up @@ -2396,6 +2472,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
}
/**end repeat1**/

/* TODO: USE MKL */
void
mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
Expand All @@ -2409,6 +2486,7 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
}
}

/* TODO: USE MKL */
void
mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
Expand Down Expand Up @@ -2557,6 +2635,7 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
}
}

/* TODO: USE MKL */
void
mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
UNARY_LOOP {
Expand All @@ -2567,6 +2646,7 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in
}
}

/* TODO: USE MKL */
void
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
{
Expand Down
Loading
Loading