IntelPython · vtavana · Jul 12, 2025 · May 9, 2025 · Jun 20, 2025 · Jun 26, 2025
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [dev] (MM/DD/YYYY)
+
+### Added
+* Added mkl implementation for floating point data-types of `exp2`, `log2`, `fabs`, `copysign`, `nextafter`, `fmax`, `fmin` and `remainder` functions [gh-81](https://github.com/IntelPython/mkl_umath/pull/81)
+
 ## [0.2.0] (06/DD/2025)
 This release updates `mkl_umath` to be aligned with both numpy-1.26.x and numpy-2.x.x.
 

@@ -126,7 +126,7 @@
     } while(0)
 
 /* for pointers p1, and p2 pointing at contiguous arrays n-elements of size s, are arrays disjoint or same
- *  when these conditions are not met VML functions may product incorrect output
+ *  when these conditions are not met VML functions may produce incorrect output
  */
 #define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
 
@@ -219,6 +219,7 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus)
  **                             FLOAT LOOPS                                 **
  *****************************************************************************
  */
+/* TODO: Use MKL for pow, arctan2, fmod, hypot, i0 */
 
 /**begin repeat
  * Float types
@@ -334,22 +335,26 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
  *  #scalarf = exp2f, exp2#
  */
 
-/* TODO: Use VML */
 void
 mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
     const int contig = IS_UNARY_CONT(@type@, @type@);
     const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
     const int can_vectorize = contig && disjoint_or_same;
 
-    UNARY_LOOP_DISPATCH(
-        @type@, @type@
-        ,
-        can_vectorize
-        ,
-        const @type@ in1 = *(@type@ *)ip1;
-        *(@type@ *)op1 = @scalarf@(in1);
-    )
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
+        CHUNKED_VML_CALL2(v@c@Exp2, dimensions[0], @type@, args[0], args[1]);
+        /* v@c@Exp2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
+    } else {
+        UNARY_LOOP_DISPATCH(
+            @type@, @type@
+            ,
+            can_vectorize
+            ,
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = @scalarf@(in1);
+        )
+    }
 }
 
 /**end repeat**/
@@ -460,22 +465,27 @@ mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *st
  *  #scalarf = log2f, log2#
  */
 
-/* TODO: Use VML */
 void
 mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
     const int contig = IS_UNARY_CONT(@type@, @type@);
     const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
     const int can_vectorize = contig && disjoint_or_same;
 
-    UNARY_LOOP_DISPATCH(
-        @type@, @type@
-        ,
-        can_vectorize
-        ,
-        const @type@ in1 = *(@type@ *)ip1;
-        *(@type@ *)op1 = @scalarf@(in1);
-    )
+    if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
+    {
+        CHUNKED_VML_CALL2(v@c@Log2, dimensions[0], @type@, args[0], args[1]);
+        /* v@c@Log2(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
+    } else {
+        UNARY_LOOP_DISPATCH(
+            @type@, @type@
+            ,
+            can_vectorize
+            ,
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = @scalarf@(in1);
+        )
+    }
 }
 
 /**end repeat**/
@@ -957,14 +967,20 @@ mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *s
     const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
     const int can_vectorize = contig && disjoint_or_same;
 
-    UNARY_LOOP_DISPATCH(
-        @type@, @type@
-        ,
-        can_vectorize
-        ,
-        const @type@ in1 = *(@type@ *)ip1;
-        *(@type@ *)op1 = @scalarf@(in1);
-    )
+    if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
+    {
+        CHUNKED_VML_CALL2(v@c@Abs, dimensions[0], @type@, args[0], args[1]);
+        /* v@c@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
+    } else {
+        UNARY_LOOP_DISPATCH(
+            @type@, @type@
+            ,
+            can_vectorize
+            ,
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = @scalarf@(in1);
+        )
+    }
 }
 
 /**end repeat**/
@@ -1230,7 +1246,6 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
  *  #type = npy_float, npy_double#
  *  #TYPE = FLOAT, DOUBLE#
  *  #c = f, #
- *  #C = F, #
  *  #s = s, d#
  *  #SUPPORTED_BY_VML = 1, 1#
  */
@@ -1959,20 +1974,46 @@ mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp
 void
 mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
-    BINARY_LOOP {
-        const @type@ in1 = *(@type@ *)ip1;
-        const @type@ in2 = *(@type@ *)ip2;
-        *((@type@ *)op1)= copysign@c@(in1, in2);
+#if @SUPPORTED_BY_VML@    
+    const int contig = IS_BINARY_CONT(@type@, @type@);
+    const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
+    const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
+        CHUNKED_VML_CALL3(v@s@CopySign, dimensions[0], @type@, args[0], args[1], args[2]);
+        /* v@s@CopySign(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
+    } else    
+#endif    
+    {
+        BINARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            const @type@ in2 = *(@type@ *)ip2;
+            *((@type@ *)op1)= copysign@c@(in1, in2);
+        }
     }
 }
 
 void
 mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
-    BINARY_LOOP {
-        const @type@ in1 = *(@type@ *)ip1;
-        const @type@ in2 = *(@type@ *)ip2;
-        *((@type@ *)op1)= nextafter@c@(in1, in2);
+#if @SUPPORTED_BY_VML@    
+    const int contig = IS_BINARY_CONT(@type@, @type@);
+    const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
+    const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
+        CHUNKED_VML_CALL3(v@s@NextAfter, dimensions[0], @type@, args[0], args[1], args[2]);
+        /* v@s@NextAfter(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
+    } else    
+#endif      
+    {
+        BINARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            const @type@ in2 = *(@type@ *)ip2;
+            *((@type@ *)op1)= nextafter@c@(in1, in2);
+        }
     }
 }
 
@@ -2009,39 +2050,65 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
 
 /**begin repeat1
  * #kind = fmax, fmin#
+ * #VML = Fmax, Fmin#
  * #OP =  >=, <=#
  **/
 void
 mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
-    /*  */
-    if (IS_BINARY_REDUCE) {
-        BINARY_REDUCE_LOOP(@type@) {
-            const @type@ in2 = *(@type@ *)ip2;
-            /* Order of operations important for MSVC 2015 */
-            io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
+#if @SUPPORTED_BY_VML@    
+    const int contig = IS_BINARY_CONT(@type@, @type@);
+    const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
+    const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
+        CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
+        /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
+    } else
+#endif      
+    {
+        if (IS_BINARY_REDUCE) {
+            BINARY_REDUCE_LOOP(@type@) {
+                const @type@ in2 = *(@type@ *)ip2;
+                /* Order of operations important for MSVC 2015 */
+                io1 = (io1 @OP@ in2 || isnan(in2)) ? io1 : in2;
+            }
+            *((@type@ *)iop1) = io1;
         }
-        *((@type@ *)iop1) = io1;
-    }
-    else {
-        BINARY_LOOP {
-            const @type@ in1 = *(@type@ *)ip1;
-            const @type@ in2 = *(@type@ *)ip2;
-            /* Order of operations important for MSVC 2015 */
-            *((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
+        else {
+            BINARY_LOOP {
+                const @type@ in1 = *(@type@ *)ip1;
+                const @type@ in2 = *(@type@ *)ip2;
+                /* Order of operations important for MSVC 2015 */
+                *((@type@ *)op1) = (in1 @OP@ in2 || isnan(in2)) ? in1 : in2;
+            }
         }
+        feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
     }
-    feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */
 }
 /**end repeat1**/
 
 void
 mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
-    BINARY_LOOP {
-        const @type@ in1 = *(@type@ *)ip1;
-        const @type@ in2 = *(@type@ *)ip2;
-        divmod@c@(in1, in2, (@type@ *)op1);
+#if @SUPPORTED_BY_VML@    
+    const int contig = IS_BINARY_CONT(@type@, @type@);
+    const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
+    const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
+        CHUNKED_VML_CALL3(v@s@Remainder, dimensions[0], @type@, args[0], args[1], args[2]);
+        /* v@s@Remainder(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
+    } else    
+#endif      
+    {    
+        BINARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            const @type@ in2 = *(@type@ *)ip2;
+            divmod@c@(in1, in2, (@type@ *)op1);
+        }
     }
 }
 
@@ -2059,9 +2126,11 @@ void
 mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
 {
 #if @SUPPORTED_BY_VML@
-    if(IS_UNARY_CONT(@type@, @type@) &&
-           dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
-           DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
+    const int contig = IS_UNARY_CONT(@type@, @type@);
+    const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
         CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, args[0], args[1]);
         /* v@s@Sqr(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
     } else
@@ -2078,9 +2147,11 @@ void
 mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data))
 {
 #if @SUPPORTED_BY_VML@
-    if(IS_UNARY_CONT(@type@, @type@) &&
-           dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
-           DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
+    const int contig = IS_UNARY_CONT(@type@, @type@);
+    const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
         CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, args[0], args[1]);
         /* v@s@Inv(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
     } else
@@ -2114,9 +2185,11 @@ void
 mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
 #if @SUPPORTED_BY_VML@
-    if(IS_UNARY_CONT(@type@, @type@) &&
-           dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD &&
-           DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) {
+    const int contig = IS_UNARY_CONT(@type@, @type@);
+    const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
+    const int can_vectorize = contig && disjoint_or_same;
+
+    if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
         CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
         /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
     } else
@@ -2162,6 +2235,7 @@ mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *s
     }
 }
 
+/* TODO: USE MKL */
 void
 mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
@@ -2261,6 +2335,7 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
  **                           COMPLEX LOOPS                                 **
  *****************************************************************************
  */
+/* TODO: USE MKL for pow, exp, ln, log10, sqrt, trigonometric functions and hyperbolic functions */
 
 #define CGE(xr,xi,yr,yi) ((xr > yr && !isnan(xi) && !isnan(yi)) \
                           || (xr == yr && xi >= yi))
@@ -2363,6 +2438,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
     }
 }
 
+/* TODO: USE MKL */
 /**begin repeat1
  * arithmetic
  * #kind = add, subtract#
@@ -2396,6 +2472,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
 }
 /**end repeat1**/
 
+/* TODO: USE MKL */
 void
 mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
@@ -2409,6 +2486,7 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
     }
 }
 
+/* TODO: USE MKL */
 void
 mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {
@@ -2557,6 +2635,7 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
     }
 }
 
+/* TODO: USE MKL */
 void
 mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
     UNARY_LOOP {
@@ -2567,6 +2646,7 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in
     }
 }
 
+/* TODO: USE MKL */
 void
 mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
 {