Merge branch 'master' into scipy-runtime

vtavana · web-flow · commit e809383a7f86 · 2025-05-30T10:28:18.000-05:00
diff --git a/README.md b/README.md
@@ -4,8 +4,17 @@
 [![Conda package with conda-forge channel only](https://github.com/IntelPython/mkl_fft/actions/workflows/conda-package-cf.yml/badge.svg)](https://github.com/IntelPython/mkl_fft/actions/workflows/conda-package-cf.yml)
 [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/IntelPython/mkl_fft/badge)](https://securityscorecards.dev/viewer/?uri=github.com/IntelPython/mkl_fft)
 
+# Introduction
 `mkl_fft` started as a part of Intel® Distribution for Python* optimizations to NumPy, and is now being released
-as a stand-alone package. It can be installed into conda environment from Intel's channel using:
+as a stand-alone package. It offers a thin layered interface for the Intel® oneAPI Math Kernel Library (OneMKL) FFT functionality that allows efficient access to native FFT optimizations from a range of NumPy and SciPy functions. As a result, its performance is close to the performance of native C/Intel® OneMKL. The optimizations are provided for real and complex data types in both single and double precisions for in-place and out-of-place modes of operation. For analyzing the performance use [FFT benchmarks](https://github.com/intelpython/fft_benchmark).
+
+Thanks to Intel® OneMKL’s flexibility in its supports for arbitrarily strided input and output arrays both one-dimensional and multi-dimensional Fast Fourier Transforms along distinct axes can be performed directly, without the need to copy the input into a contiguous array first. Furthermore, input strides can be arbitrary, including negative or zero, as long as strides remain an integer multiple of array’s item size, otherwise a copy will be made.
+
+More details can be found in ["Accelerating Scientific Python with Intel Optimizations"](https://proceedings.scipy.org/articles/shinma-7f4c6e7-00f) from Proceedings of the 16th Python in Science Conference (SciPy 2017).
+
+---
+# Installation
+`mkl_fft` can be installed into conda environment from Intel's channel using:
 
 ```
    conda install -c https://software.repos.intel.com/python/conda mkl_fft
@@ -34,22 +43,12 @@ If command above installs NumPy package from the PyPI, please use following comm
 Where `<numpy_version>` should be the latest version from https://software.repos.intel.com/python/conda/
 
 ---
+# How to use?
+## `mkl_fft.interfaces` module
+The recommended way to use `mkl_fft` package is through `mkl_fft.interfaces` module. These interfaces act as drop-in replacements for equivalent functions in NumPy and SciPy. Learn more about these interfaces [here](https://github.com/IntelPython/mkl_fft/blob/master/mkl_fft/interfaces/README.md).
 
-Since MKL FFT supports performing discrete Fourier transforms over non-contiguously laid out arrays, OneMKL can be directly
-used on any well-behaved floating point array with no internal overlaps for both in-place and not in-place transforms of
-arrays in single and double floating point precision.
-
-This eliminates the need to copy input array contiguously into an intermediate buffer.
-
-`mkl_fft` directly supports N-dimensional Fourier transforms.
-
-More details can be found in [SciPy 2017 conference proceedings](https://github.com/scipy-conference/scipy_proceedings/tree/2017/papers/oleksandr_pavlyk).
-
----
-
-The `mkl_fft` package offers interfaces that act as drop-in replacements for equivalent functions in NumPy and SciPy. Learn more about these interfaces [here](https://github.com/IntelPython/mkl_fft/blob/master/mkl_fft/interfaces/README.md).
-
-While using these interfaces is the easiest way to leverage `mk_fft`, one can also use `mkl_fft` directly with the following FFT functions:
+## `mkl_fft` package
+While using the interfaces module is the recommended way to leverage `mk_fft`, one can also use `mkl_fft` directly with the following FFT functions:
 
 ### complex-to-complex (c2c) transforms:
 
@@ -84,6 +83,7 @@ numpy.allclose(mkl_res, np_res)
 ```
 
 ---
+# Building from source
 
 To build `mkl_fft` from sources on Linux with Intel® OneMKL:
   - create a virtual environment: `python3 -m venv fft_env`
diff --git a/mkl_fft/_pydfti.pyx b/mkl_fft/_pydfti.pyx
@@ -203,7 +203,7 @@ cdef cnp.ndarray _pad_array(
     b_shape[axis] = n
 
     # allocating temporary buffer
-    x_arr_is_fortran = cnp.PyArray_CHKFLAGS(x_arr, cnp.NPY_F_CONTIGUOUS)
+    x_arr_is_fortran = cnp.PyArray_CHKFLAGS(x_arr, cnp.NPY_ARRAY_F_CONTIGUOUS)
     b_arr = <cnp.ndarray> cnp.PyArray_EMPTY(
         b_ndim, b_shape, <cnp.NPY_TYPES> b_type, x_arr_is_fortran
     )  # 0 for C-contiguous
@@ -249,9 +249,12 @@ cdef cnp.ndarray _process_arguments(
 
     # convert x to ndarray, ensure that strides are multiples of itemsize
     x_arr = PyArray_CheckFromAny(
-          x, NULL, 0, 0,
-          cnp.NPY_ELEMENTSTRIDES | cnp.NPY_ENSUREARRAY | cnp.NPY_NOTSWAPPED,
-          NULL)
+        x, NULL, 0, 0,
+        cnp.NPY_ARRAY_ELEMENTSTRIDES |
+        cnp.NPY_ARRAY_ENSUREARRAY |
+        cnp.NPY_ARRAY_NOTSWAPPED,
+        NULL
+    )
 
     if (<void *> x_arr) is NULL:
         raise ValueError("An input argument x is not an array-like object")
@@ -319,7 +322,7 @@ cdef cnp.ndarray _allocate_result(
         f_shape[axis_] = n_
 
     # allocating output buffer
-    x_arr_is_fortran = cnp.PyArray_CHKFLAGS(x_arr, cnp.NPY_F_CONTIGUOUS)
+    x_arr_is_fortran = cnp.PyArray_CHKFLAGS(x_arr, cnp.NPY_ARRAY_F_CONTIGUOUS)
     f_arr = <cnp.ndarray> cnp.PyArray_EMPTY(
         f_ndim, f_shape, <cnp.NPY_TYPES> f_type, x_arr_is_fortran
     )  # 0 for C-contiguous
@@ -419,7 +422,9 @@ def _c2c_fft1d_impl(
         # so we cast to complex double and operate in place
         try:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_CDOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
+                x_arr, cnp.NPY_CDOUBLE,
+                cnp.NPY_ARRAY_BEHAVED | cnp.NPY_ARRAY_ENSURECOPY
+            )
         except:
             raise ValueError(
                 "First argument must be a complex "
@@ -601,9 +606,9 @@ def _r2c_fft1d_impl(
     else:
         # we must cast the input to doubles and allocate the output,
         try:
-            requirement = cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY
+            requirement = cnp.NPY_ARRAY_BEHAVED | cnp.NPY_ARRAY_ENSURECOPY
             if x_type is cnp.NPY_LONGDOUBLE:
-                requirement = requirement | cnp.NPY_FORCECAST
+                requirement = requirement | cnp.NPY_ARRAY_FORCECAST
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
                 x_arr, cnp.NPY_DOUBLE, requirement)
             x_type = cnp.PyArray_TYPE(x_arr)
@@ -705,11 +710,11 @@ def _c2r_fft1d_impl(
         # so we cast to complex double and operate in place
         if x_type is cnp.NPY_FLOAT:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_CFLOAT, cnp.NPY_BEHAVED
+                x_arr, cnp.NPY_CFLOAT, cnp.NPY_ARRAY_BEHAVED
             )
         else:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_CDOUBLE, cnp.NPY_BEHAVED
+                x_arr, cnp.NPY_CDOUBLE, cnp.NPY_ARRAY_BEHAVED
             )
         x_type = cnp.PyArray_TYPE(x_arr)
         in_place = 1
@@ -788,9 +793,12 @@ def _direct_fftnd(
 
     # convert x to ndarray, ensure that strides are multiples of itemsize
     x_arr = PyArray_CheckFromAny(
-          x, NULL, 0, 0,
-          cnp.NPY_ELEMENTSTRIDES | cnp.NPY_ENSUREARRAY | cnp.NPY_NOTSWAPPED,
-          NULL)
+        x, NULL, 0, 0,
+        cnp.NPY_ARRAY_ELEMENTSTRIDES |
+        cnp.NPY_ARRAY_ENSUREARRAY |
+        cnp.NPY_ARRAY_NOTSWAPPED,
+        NULL
+    )
 
     if <void *> x_arr is NULL:
         raise ValueError("An input argument x is not an array-like object")
@@ -808,7 +816,9 @@ def _direct_fftnd(
         pass
     else:
         x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-            x_arr, cnp.NPY_CDOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
+            x_arr, cnp.NPY_CDOUBLE,
+            cnp.NPY_ARRAY_BEHAVED | cnp.NPY_ARRAY_ENSURECOPY
+        )
         x_type = cnp.PyArray_TYPE(x_arr)
         assert x_type == cnp.NPY_CDOUBLE
         in_place = 1
@@ -1003,7 +1013,9 @@ def _rr_fft1d_impl(x, n=None, axis=-1, overwrite_x=False, double fsc=1.0):
     else:
         try:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_DOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
+                x_arr, cnp.NPY_DOUBLE,
+                cnp.NPY_ARRAY_BEHAVED | cnp.NPY_ARRAY_ENSURECOPY
+            )
         except:
             raise TypeError("1st argument must be a real sequence")
         x_type = cnp.PyArray_TYPE(x_arr)
@@ -1065,7 +1077,9 @@ def _rr_ifft1d_impl(x, n=None, axis=-1, overwrite_x=False, double fsc=1.0):
         # so we cast to complex double and operate in place
         try:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_DOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
+                x_arr, cnp.NPY_DOUBLE,
+                cnp.NPY_ARRAY_BEHAVED | cnp.NPY_ARRAY_ENSURECOPY
+            )
         except:
             raise ValueError(
                 "First argument should be a real "
diff --git a/mkl_fft/interfaces/numpy_fft.py b/mkl_fft/interfaces/numpy_fft.py
@@ -24,9 +24,6 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# Added for completing the namespaces
-from numpy.fft import fftfreq, fftshift, ifftshift, rfftfreq
-
 # pylint: disable=no-name-in-module
 from ._numpy_fft import (
     fft,
@@ -60,8 +57,11 @@
     "irfftn",
     "hfft",
     "ihfft",
-    "fftshift",
-    "ifftshift",
-    "fftfreq",
-    "rfftfreq",
 ]
+
+# It is important to put the following import here to avoid circular imports
+# when patching numpy with mkl_fft
+# Added for completing the namespaces
+from numpy.fft import fftfreq, fftshift, ifftshift, rfftfreq
+
+__all__ += ["fftshift", "ifftshift", "fftfreq", "rfftfreq"]