diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..05c1669 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @oleksandr-pavlyk @xaleryb @ekomarova diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..5ace460 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml new file mode 100644 index 0000000..0c5bd0b --- /dev/null +++ b/.github/workflows/conda-package.yml @@ -0,0 +1,290 @@ +name: Conda package + +on: push + +env: + PACKAGE_NAME: mkl_umath + MODULE_NAME: mkl_umath + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10', '3.11', '3.12'] + steps: + - uses: actions/checkout@v4.1.7 + with: + fetch-depth: 0 + + - name: Set pkgs_dirs + run: | + echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + + - name: Cache conda packages + uses: actions/cache@v4 + env: + CACHE_NUMBER: 0 # Increase to reset cache + with: + path: ~/.conda/pkgs + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + + - name: Install conda-build + run: conda install conda-build + + - name: Build conda package + run: | + CHANNELS="-c conda-forge -c https://software.repos.intel.com/python/conda --override-channels" + VERSIONS="--python ${{ matrix.python }}" + TEST="--no-test" + echo "CONDA_BLD=${CONDA}/conda-bld/linux-64" >> $GITHUB_ENV + + conda build \ + $TEST \ + $VERSIONS \ + $CHANNELS \ + conda-recipe-cf + + - name: Upload artifact + uses: actions/upload-artifact@v4.4.0 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.CONDA_BLD }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 + + test: + needs: build + runs-on: ${{ matrix.runner }} + + strategy: + matrix: + python: ['3.10', '3.11', '3.12'] + experimental: [false] + runner: [ubuntu-latest] + continue-on-error: ${{ matrix.experimental }} + env: + CHANNELS: -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels + + steps: + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + - name: Install conda-build + run: conda install conda-build + - name: Create conda channel + run: | + mkdir -p $GITHUB_WORKSPACE/channel/linux-64 + mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64 + conda index $GITHUB_WORKSPACE/channel + # Test channel + conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels + + - name: Collect dependencies + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + conda create -n test_mkl_umath $PACKAGE_NAME python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + - name: Display lockfile + run: cat lockfile + + - name: Set pkgs_dirs + run: | + echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + + - name: Cache conda packages + uses: actions/cache@v4 + env: + CACHE_NUMBER: 0 # Increase to reset cache + with: + path: ~/.conda/pkgs + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Install mkl_umath + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + conda create -n test_mkl_umath python=${{ matrix.python }} $PACKAGE_NAME pytest $CHANNELS + # Test installed packages + conda list -n test_mkl_umath + + - name: Run tests + run: | + source $CONDA/etc/profile.d/conda.sh + conda activate test_mkl_umath + python -c "import mkl_umath, numpy as np; mkl_umath.use_in_numpy(); np.sin(np.linspace(0, 1, num=10**6));" + + build_windows: + runs-on: windows-2019 + + strategy: + matrix: + python: ['3.10', '3.11', '3.12'] + env: + conda-bld: C:\Miniconda\conda-bld\win-64\ + steps: + - uses: actions/checkout@v4.1.7 + with: + fetch-depth: 0 + + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: build + channels: conda-forge + python-version: ${{ matrix.python }} + + - name: Cache conda packages + uses: actions/cache@v4 + env: + CACHE_NUMBER: 3 # Increase to reset cache + with: + path: /home/runner/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('**/meta.yaml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + + - name: Store conda paths as envs + shell: bash -l {0} + run: | + echo "CONDA_BLD=$CONDA/conda-bld/win-64/" | tr "\\\\" '/' >> $GITHUB_ENV + + - name: Install conda build + run: | + conda activate + conda install -y conda-build + conda list -n base + + - name: Build conda package + run: | + conda activate + conda build --no-test --python ${{ matrix.python }} -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels conda-recipe-cf + + - name: Upload artifact + uses: actions/upload-artifact@v4.4.0 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 + + test_windows: + needs: build_windows + runs-on: ${{ matrix.runner }} + defaults: + run: + shell: cmd /C CALL {0} + strategy: + matrix: + python: ['3.10', '3.11', '3.12'] + experimental: [false] + runner: [windows-2019] + continue-on-error: ${{ matrix.experimental }} + env: + workdir: '${{ github.workspace }}' + CHANNELS: -c conda-forge -c https://software.repos.intel.com/python/conda --override-channels + + steps: + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + conda-build-version: '*' + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: mkl_umath_test + channels: conda-forge + python-version: ${{ matrix.python }} + + - name: Create conda channel with the artifact bit + shell: cmd /C CALL {0} + run: | + echo ${{ env.workdir }} + mkdir ${{ env.workdir }}\channel\win-64 + move ${{ env.PACKAGE_NAME }}-*.tar.bz2 ${{ env.workdir }}\channel\win-64 + dir ${{ env.workdir }}\channel\win-64 + + - name: Index the channel + shell: cmd /C CALL {0} + run: | + conda index ${{ env.workdir }}\channel + + - name: Dump mkl_umath version info from created channel to STDOUT + shell: cmd /C CALL {0} + run: | + conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json + - name: Dump mkl_umath version info from created channel into ver.json + shell: cmd /C CALL {0} + run: | + conda search ${{ env.PACKAGE_NAME }} -c ${{ env.workdir }}/channel --override-channels --info --json > ${{ env.workdir }}\ver.json + - name: Output content of workdir + shell: pwsh + run: Get-ChildItem -Path ${{ env.workdir }} + - name: Output content of produced ver.json + shell: pwsh + run: Get-Content -Path ${{ env.workdir }}\ver.json + - name: Collect dependencies + shell: cmd /C CALL {0} + run: | + IF NOT EXIST ver.json ( + copy /Y ${{ env.workdir }}\ver.json . + ) + SET "SCRIPT=%VER_SCRIPT1% %VER_SCRIPT2%" + FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( + SET PACKAGE_VERSION=%%F + ) + conda install -n mkl_umath_test ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile + - name: Display lockfile content + shell: pwsh + run: Get-Content -Path .\lockfile + - name: Cache conda packages + uses: actions/cache@v4 + env: + CACHE_NUMBER: 0 # Increase to reset cache + with: + path: /home/runner/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + - name: Install mkl_umath + shell: cmd /C CALL {0} + run: | + @ECHO ON + IF NOT EXIST ver.json ( + copy /Y ${{ env.workdir }}\ver.json . + ) + set "SCRIPT=%VER_SCRIPT1% %VER_SCRIPT2%" + FOR /F "tokens=* USEBACKQ" %%F IN (`python -c "%SCRIPT%"`) DO ( + SET PACKAGE_VERSION=%%F + ) + SET "TEST_DEPENDENCIES=pytest pytest-cov" + conda install -n mkl_umath_test ${{ env.PACKAGE_NAME }}=%PACKAGE_VERSION% %TEST_DEPENDENCIES% python=${{ matrix.python }} -c ${{ env.workdir }}/channel ${{ env.CHANNELS }} + - name: Report content of test environment + shell: cmd /C CALL {0} + run: | + conda activate + echo "Value of CONDA enviroment variable was: " %CONDA% + echo "Value of CONDA_PREFIX enviroment variable was: " %CONDA_PREFIX% + conda info && conda list -n mkl_umath_test + - name: Run tests + shell: cmd /C CALL {0} + run: >- + conda activate mkl_umath_test && python -c "import mkl_umath, numpy as np; mkl_umath.use_in_numpy(); np.sin(np.linspace(0, 1, num=10**6));" + diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml new file mode 100644 index 0000000..586f7bc --- /dev/null +++ b/.github/workflows/openssf-scorecard.yml @@ -0,0 +1,74 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '28 2 * * 1' + - cron: '28 2 * * 4' + push: + branches: [ "master" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: SARIF file + path: results.sarif + retention-days: 14 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 + with: + sarif_file: results.sarif diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7cc71d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,96 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions and binary files +*.o +*.so +*.so.* +*.exe +*.lib +*.dll + +# CMake build and local install directory +build +_skbuild +build_cmake +install + +# Code project files +.vscode + +# Eclipse project files +.project +.pydevproject + +# Emacs temp files +*~ + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +dpctl_conda_pkg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ +junit.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# pyenv python configuration file +.python-version + +_cmake_test_compile + +# generated numpy files +mkl_umath/src/__umath_generated.c +mkl_umath/src/mkl_umath_loops.c +mkl_umath/src/mkl_umath_loops.h +mkl_umath/src/_patch.c + +# moved cmake scripts +dpctl/resources/cmake diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a4e3533 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,145 @@ +cmake_minimum_required(VERSION 3.27...3.28 FATAL_ERROR) + +cmake_policy(SET CMP0135 NEW) + +project(mkl_umath + LANGUAGES C + DESCRIPTION "mkl_umath module" +) + +option(OPTIMIZATION_REPORT + "Whether to generate optimization vectorization report" + OFF +) + +find_package(Python COMPONENTS Interpreter Development NumPy REQUIRED) + +# Print out the discovered paths +include(CMakePrintHelpers) +cmake_print_variables(Python_INCLUDE_DIRS) +cmake_print_variables(Python_LIBRARIES) +cmake_print_variables(Python_NumPy_INCLUDE_DIRS) + +set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") +find_package(Cython REQUIRED) + +set(MKL_LINK sdl) +find_package(MKL REQUIRED) + +if(WIN32) + string(CONCAT WARNING_FLAGS + "-Wall " + "-Wextra " + "-Winit-self " + "-Wunused-function " + "-Wuninitialized " + "-Wmissing-declarations " + "-Wstrict-prototypes " + "-Wno-unused-parameter " + "-Wno-implicit-function-declaration " + ) + string(CONCAT SDL_FLAGS + "/GS " + "/DynamicBase " + ) + string(CONCAT PRECISION_FLAGS + "/fp:fast=2 " + "/Qimf-precision=high " + "/Qprec-sqrt " + "/Qprotect-parens " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS} ${PRECISION_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" + ) + set(MKL_UMATH_LINKER_OPTIONS "LINKER:/NXCompat;LINKER:/DynamicBase") +elseif(UNIX) + string(CONCAT WARNING_FLAGS + "-Wall " + "-Wextra " + "-Winit-self " + "-Wunused-function " + "-Wuninitialized " + "-Wmissing-declarations " + "-Wstrict-prototypes " + "-Wno-unused-parameter " + "-fdiagnostics-color=auto " + ) + string(CONCAT SDL_FLAGS + "-fstack-protector " + "-fstack-protector-all " + "-fpic " + "-fPIC " + "-D_FORTIFY_SOURCE=2 " + "-Wformat " + "-Wformat-security " +# "-fno-strict-overflow " # no-strict-overflow is implied by -fwrapv + "-fno-delete-null-pointer-checks " + "-fwrapv " + ) + string(CONCAT CFLAGS + "${WARNING_FLAGS}" + "${SDL_FLAGS}" + ) + string(CONCAT PRECISION_FLAGS + "-prec-sqrt " + "-fprotect-parens " + "-fimf-precision=high " + "-fp-model fast=2 " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS} ${PRECISION_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -O0 -g1 -DDEBUG" + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-incompatible-function-pointer-types ${CFLAGS}") + set(MKL_UMATH_LINKER_OPTIONS "LINKER:-z,noexecstack,-z,relro,-z,now") +else() + message(FATAL_ERROR "Unsupported system.") +endif() + +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +# set_property(GLOBAL PROPERTY GLOBAL_DEPENDS_DEBUG_MODE 1) +set(_linker_options ${MKL_UMATH_LINKER_OPTIONS}) + +set(_trgt mkl_umath_loops) +add_library(${_trgt} SHARED mkl_umath/src/mkl_umath_loops.c) +set_target_properties(${_trgt} PROPERTIES + CMAKE_POSITION_INDEPENDENT_CODE ON + C_STANDARD 99 +) +target_include_directories(${_trgt} PUBLIC mkl_umath/src/ ${Python_NumPy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) +target_link_libraries(${_trgt} PUBLIC MKL::MKL ${Python_LIBRARIES}) +target_link_options(${_trgt} PUBLIC ${_linker_options}) +target_compile_options(${_trgt} PUBLIC -fveclib=SVML) +target_compile_options(${_trgt} PUBLIC -fvectorize) +if(OPTIMIZATION_REPORT) + target_compile_options(${_trgt} PRIVATE -qopt-report=3) +endif() +install(TARGETS ${_trgt} + LIBRARY DESTINATION mkl_umath + ARCHIVE DESTINATION mkl_umath + RUNTIME DESTINATION mkl_umath +) + +python_add_library(_ufuncs MODULE WITH_SOABI "mkl_umath/src/ufuncsmodule.c" "mkl_umath/src/__umath_generated.c") +target_include_directories(_ufuncs PRIVATE "mkl_umath/src" ${Python_NumPy_INCLUDE_DIRS} ${MKL_INCLUDE_DIR}) +target_compile_definitions(_ufuncs PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) +target_link_options(_ufuncs PRIVATE ${_linker_options}) +target_link_libraries(_ufuncs PRIVATE mkl_umath_loops) +set_target_properties(_ufuncs PROPERTIES C_STANDARD 99) +if (UNIX) + set_target_properties(_ufuncs PROPERTIES INSTALL_RPATH "$ORIGIN") +endif() +install(TARGETS _ufuncs LIBRARY DESTINATION mkl_umath) + +add_cython_target(_patch "mkl_umath/src/_patch.pyx" C OUTPUT_VAR _generated_src) +Python_add_library(_patch MODULE WITH_SOABI ${_generated_src}) +target_include_directories(_patch PRIVATE "mkl_umath/src/" ${Python_NumPy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) +target_compile_definitions(_patch PUBLIC NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) +target_link_libraries(_patch PRIVATE mkl_umath_loops) +set_target_properties(_patch PROPERTIES C_STANDARD 99) +if (UNIX) + set_target_properties(_patch PROPERTIES INSTALL_RPATH "$ORIGIN") +endif() +install(TARGETS _patch LIBRARY DESTINATION mkl_umath) diff --git a/README.md b/README.md index a9f571c..0e2bd0b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Patches were factored out per community feedback ([NEP-36](https://numpy.org/nep as a stand-alone package. It can be installed into conda environment using ``` - conda install -c intel mkl_umath + conda install -c https://software.repos.intel.com/python/conda mkl_umath ``` --- @@ -18,17 +18,9 @@ as a stand-alone package. It can be installed into conda environment using To install mkl_umath Pypi package please use following command: ``` - python -m pip install --i https://pypi.anaconda.org/intel/simple -extra-index-url https://pypi.org/simple mkl_umath + python -m pip install mkl_umath ``` -If command above installs NumPy package from the Pypi, please use following command to install Intel optimized NumPy wheel package from Anaconda Cloud: - -``` - python -m pip install --i https://pypi.anaconda.org/intel/simple -extra-index-url https://pypi.org/simple mkl_umath numpy== -``` - -Where `` should be the latest version from https://anaconda.org/intel/numpy - --- ## Building @@ -36,7 +28,7 @@ Where `` should be the latest version from https://anaconda.org/i Intel(R) C compiler and Intel(R) Math Kernel Library are required to build `mkl_umath` from source: ```sh -# ensure that MKL is installed, icc is activated +# ensure that MKL is installed into Python prefix, Intel LLVM compiler is activated export MKLROOT=$CONDA_PREFIX -python setup.py config_cc --compiler=intelem build_ext --inplace +CC=icx pip install --no-build-isolation --no-deps -e . ``` diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..556938b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,12 @@ +# Security Policy + +## Report a Vulnerability + +Please report security issues or vulnerabilities to the [Intel® Security Center]. + +For more information on how Intel® works to resolve security issues, see +[Vulnerability Handling Guidelines]. + +[Intel® Security Center]:https://www.intel.com/content/www/us/en/security-center/default.html + +[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html diff --git a/_vendored/README.md b/_vendored/README.md new file mode 100644 index 0000000..0ebafcb --- /dev/null +++ b/_vendored/README.md @@ -0,0 +1,5 @@ +## Vendored files + +File `conv_template.py` is copied from NumPy's numpy/distutils folder, since +`numpy.distutils` is absent from the installation layout starting with +Python 3.12 \ No newline at end of file diff --git a/_vendored/__init__.py b/_vendored/__init__.py new file mode 100644 index 0000000..fa81ada --- /dev/null +++ b/_vendored/__init__.py @@ -0,0 +1 @@ +# empty file diff --git a/_vendored/conv_template.py b/_vendored/conv_template.py new file mode 100644 index 0000000..c8933d1 --- /dev/null +++ b/_vendored/conv_template.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +""" +takes templated file .xxx.src and produces .xxx file where .xxx is +.i or .c or .h, using the following template rules + +/**begin repeat -- on a line by itself marks the start of a repeated code + segment +/**end repeat**/ -- on a line by itself marks it's end + +After the /**begin repeat and before the */, all the named templates are placed +these should all have the same number of replacements + +Repeat blocks can be nested, with each nested block labeled with its depth, +i.e. +/**begin repeat1 + *.... + */ +/**end repeat1**/ + +When using nested loops, you can optionally exclude particular +combinations of the variables using (inside the comment portion of the inner loop): + + :exclude: var1=value1, var2=value2, ... + +This will exclude the pattern where var1 is value1 and var2 is value2 when +the result is being generated. + + +In the main body each replace will use one entry from the list of named replacements + + Note that all #..# forms in a block must have the same number of + comma-separated entries. + +Example: + + An input file containing + + /**begin repeat + * #a = 1,2,3# + * #b = 1,2,3# + */ + + /**begin repeat1 + * #c = ted, jim# + */ + @a@, @b@, @c@ + /**end repeat1**/ + + /**end repeat**/ + + produces + + line 1 "template.c.src" + + /* + ********************************************************************* + ** This file was autogenerated from a template DO NOT EDIT!!** + ** Changes should be made to the original source (.src) file ** + ********************************************************************* + */ + + #line 9 + 1, 1, ted + + #line 9 + 1, 1, jim + + #line 9 + 2, 2, ted + + #line 9 + 2, 2, jim + + #line 9 + 3, 3, ted + + #line 9 + 3, 3, jim + +""" + +__all__ = ['process_str', 'process_file'] + +import os +import sys +import re + +# names for replacement that are already global. +global_names = {} + +# header placed at the front of head processed file +header =\ +""" +/* + ***************************************************************************** + ** This file was autogenerated from a template DO NOT EDIT!!!! ** + ** Changes should be made to the original source (.src) file ** + ***************************************************************************** + */ + +""" +# Parse string for repeat loops +def parse_structure(astr, level): + """ + The returned line number is from the beginning of the string, starting + at zero. Returns an empty list if no loops found. + + """ + if level == 0 : + loopbeg = "/**begin repeat" + loopend = "/**end repeat**/" + else : + loopbeg = "/**begin repeat%d" % level + loopend = "/**end repeat%d**/" % level + + ind = 0 + line = 0 + spanlist = [] + while True: + start = astr.find(loopbeg, ind) + if start == -1: + break + start2 = astr.find("*/", start) + start2 = astr.find("\n", start2) + fini1 = astr.find(loopend, start2) + fini2 = astr.find("\n", fini1) + line += astr.count("\n", ind, start2+1) + spanlist.append((start, start2+1, fini1, fini2+1, line)) + line += astr.count("\n", start2+1, fini2) + ind = fini2 + spanlist.sort() + return spanlist + + +def paren_repl(obj): + torep = obj.group(1) + numrep = obj.group(2) + return ','.join([torep]*int(numrep)) + +parenrep = re.compile(r"\(([^)]*)\)\*(\d+)") +plainrep = re.compile(r"([^*]+)\*(\d+)") +def parse_values(astr): + # replaces all occurrences of '(a,b,c)*4' in astr + # with 'a,b,c,a,b,c,a,b,c,a,b,c'. Empty braces generate + # empty values, i.e., ()*4 yields ',,,'. The result is + # split at ',' and a list of values returned. + astr = parenrep.sub(paren_repl, astr) + # replaces occurrences of xxx*3 with xxx, xxx, xxx + astr = ','.join([plainrep.sub(paren_repl, x.strip()) + for x in astr.split(',')]) + return astr.split(',') + + +stripast = re.compile(r"\n\s*\*?") +named_re = re.compile(r"#\s*(\w*)\s*=([^#]*)#") +exclude_vars_re = re.compile(r"(\w*)=(\w*)") +exclude_re = re.compile(":exclude:") +def parse_loop_header(loophead) : + """Find all named replacements in the header + + Returns a list of dictionaries, one for each loop iteration, + where each key is a name to be substituted and the corresponding + value is the replacement string. + + Also return a list of exclusions. The exclusions are dictionaries + of key value pairs. There can be more than one exclusion. + [{'var1':'value1', 'var2', 'value2'[,...]}, ...] + + """ + # Strip out '\n' and leading '*', if any, in continuation lines. + # This should not effect code previous to this change as + # continuation lines were not allowed. + loophead = stripast.sub("", loophead) + # parse out the names and lists of values + names = [] + reps = named_re.findall(loophead) + nsub = None + for rep in reps: + name = rep[0] + vals = parse_values(rep[1]) + size = len(vals) + if nsub is None : + nsub = size + elif nsub != size : + msg = "Mismatch in number of values, %d != %d\n%s = %s" + raise ValueError(msg % (nsub, size, name, vals)) + names.append((name, vals)) + + + # Find any exclude variables + excludes = [] + + for obj in exclude_re.finditer(loophead): + span = obj.span() + # find next newline + endline = loophead.find('\n', span[1]) + substr = loophead[span[1]:endline] + ex_names = exclude_vars_re.findall(substr) + excludes.append(dict(ex_names)) + + # generate list of dictionaries, one for each template iteration + dlist = [] + if nsub is None : + raise ValueError("No substitution variables found") + for i in range(nsub): + tmp = {name: vals[i] for name, vals in names} + dlist.append(tmp) + return dlist + +replace_re = re.compile(r"@(\w+)@") +def parse_string(astr, env, level, line) : + lineno = "#line %d\n" % line + + # local function for string replacement, uses env + def replace(match): + name = match.group(1) + try : + val = env[name] + except KeyError: + msg = 'line %d: no definition of key "%s"'%(line, name) + raise ValueError(msg) from None + return val + + code = [lineno] + struct = parse_structure(astr, level) + if struct : + # recurse over inner loops + oldend = 0 + newlevel = level + 1 + for sub in struct: + pref = astr[oldend:sub[0]] + head = astr[sub[0]:sub[1]] + text = astr[sub[1]:sub[2]] + oldend = sub[3] + newline = line + sub[4] + code.append(replace_re.sub(replace, pref)) + try : + envlist = parse_loop_header(head) + except ValueError as e: + msg = "line %d: %s" % (newline, e) + raise ValueError(msg) + for newenv in envlist : + newenv.update(env) + newcode = parse_string(text, newenv, newlevel, newline) + code.extend(newcode) + suff = astr[oldend:] + code.append(replace_re.sub(replace, suff)) + else : + # replace keys + code.append(replace_re.sub(replace, astr)) + code.append('\n') + return ''.join(code) + +def process_str(astr): + code = [header] + code.extend(parse_string(astr, global_names, 0, 1)) + return ''.join(code) + + +include_src_re = re.compile(r"(\n|\A)#include\s*['\"]" + r"(?P[\w\d./\\]+[.]src)['\"]", re.I) + +def resolve_includes(source): + d = os.path.dirname(source) + with open(source) as fid: + lines = [] + for line in fid: + m = include_src_re.match(line) + if m: + fn = m.group('name') + if not os.path.isabs(fn): + fn = os.path.join(d, fn) + if os.path.isfile(fn): + lines.extend(resolve_includes(fn)) + else: + lines.append(line) + else: + lines.append(line) + return lines + +def process_file(source): + lines = resolve_includes(source) + sourcefile = os.path.normcase(source).replace("\\", "\\\\") + try: + code = process_str(''.join(lines)) + except ValueError as e: + raise ValueError('In "%s" loop at %s' % (sourcefile, e)) from None + return '#line 1 "%s"\n%s' % (sourcefile, code) + + +def unique_key(adict): + # this obtains a unique key given a dictionary + # currently it works by appending together n of the letters of the + # current keys and increasing n until a unique key is found + # -- not particularly quick + allkeys = list(adict.keys()) + done = False + n = 1 + while not done: + newkey = "".join([x[:n] for x in allkeys]) + if newkey in allkeys: + n += 1 + else: + done = True + return newkey + + +def main(): + try: + file = sys.argv[1] + except IndexError: + fid = sys.stdin + outfile = sys.stdout + else: + fid = open(file, 'r') + (base, ext) = os.path.splitext(file) + newname = base + outfile = open(newname, 'w') + + allstr = fid.read() + try: + writestr = process_str(allstr) + except ValueError as e: + raise ValueError("In %s loop at %s" % (file, e)) from None + + outfile.write(writestr) + +if __name__ == "__main__": + main() diff --git a/conda-recipe-cf/bld.bat b/conda-recipe-cf/bld.bat new file mode 100644 index 0000000..e27318d --- /dev/null +++ b/conda-recipe-cf/bld.bat @@ -0,0 +1,25 @@ +REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 +set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" +set "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" + +"%PYTHON%" setup.py clean --all +set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @( + REM set DIR_HINT if directory exists + IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" ( + SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V" + ) +) + +if NOT "%WHEELS_OUTPUT_FOLDER%"=="" ( + rem Install and assemble wheel package from the build bits + "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS% + if errorlevel 1 exit 1 + copy dist\mkl_umath*.whl %WHEELS_OUTPUT_FOLDER% + if errorlevel 1 exit 1 +) ELSE ( + rem Only install + "%PYTHON%" setup.py install %SKBUILD_ARGS% + if errorlevel 1 exit 1 +) diff --git a/conda-recipe-cf/build.sh b/conda-recipe-cf/build.sh new file mode 100644 index 0000000..2792f27 --- /dev/null +++ b/conda-recipe-cf/build.sh @@ -0,0 +1,19 @@ +# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" + +# Intel LLVM must cooperate with compiler and sysroot from conda +echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icx_for_conda.cfg +export ICXCFG="$(pwd)/icx_for_conda.cfg" + +export CMAKE_GENERATOR="Ninja" +SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then + # Install packages and assemble wheel package from built bits + WHEELS_BUILD_ARGS="-p manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" + ${PYTHON} setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} + cp dist/mkl_umath*.whl ${WHEELS_OUTPUT_FOLDER} +else + # Perform regular install + ${PYTHON} setup.py install ${SKBUILD_ARGS} +fi diff --git a/conda-recipe-cf/meta.yaml b/conda-recipe-cf/meta.yaml new file mode 100644 index 0000000..4ecf657 --- /dev/null +++ b/conda-recipe-cf/meta.yaml @@ -0,0 +1,54 @@ +{% set version = "0.1.2" %} +{% set buildnumber = 0 %} + +package: + name: mkl_umath + version: {{ version }} + +source: + path: ../ + +build: + number: {{ buildnumber }} + ignore_run_exports: + - blas + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler('dpcpp') }} >=2024.2 # [not osx] + - sysroot_linux-64 >=2.28 # [linux] + host: + - setuptools + - cmake + - ninja + - git + - cython + - scikit-build + - python + - mkl-devel + - numpy + run: + - python + - mkl + - mkl-service + - {{ pin_compatible('intel-cmplr-lib-rt') }} + +test: + requires: + - pytest + source_files: + - mkl_umath/tests/test_basic.py + commands: + - pytest mkl_umath/tests/test_basic.py + imports: + - mkl_umath + - mkl_umath._ufuncs + - mkl_umath._patch + +about: + home: http://github.com/IntelPython/mkl_umath + license: BSD-3 + license_file: LICENSE.txt + summary: Universal functions for real and complex floating point arrays powered by Intel(R) Math Kernel Library Vector (Intel(R) MKL) and Intel(R) Short Vector Math Library (Intel(R) SVML) diff --git a/conda-recipe-cf/run_tests.bat b/conda-recipe-cf/run_tests.bat new file mode 100644 index 0000000..590db89 --- /dev/null +++ b/conda-recipe-cf/run_tests.bat @@ -0,0 +1 @@ +%PYTHON% tests\test_basic.py \ No newline at end of file diff --git a/conda-recipe-cf/run_tests.sh b/conda-recipe-cf/run_tests.sh new file mode 100644 index 0000000..7bfca5d --- /dev/null +++ b/conda-recipe-cf/run_tests.sh @@ -0,0 +1 @@ +$PYTHON tests/test_basic.py diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat new file mode 100644 index 0000000..e27318d --- /dev/null +++ b/conda-recipe/bld.bat @@ -0,0 +1,25 @@ +REM A workaround for activate-dpcpp.bat issue to be addressed in 2021.4 +set "LIB=%BUILD_PREFIX%\Library\lib;%BUILD_PREFIX%\compiler\lib;%LIB%" +set "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" + +"%PYTHON%" setup.py clean --all +set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @( + REM set DIR_HINT if directory exists + IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" ( + SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V" + ) +) + +if NOT "%WHEELS_OUTPUT_FOLDER%"=="" ( + rem Install and assemble wheel package from the build bits + "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS% + if errorlevel 1 exit 1 + copy dist\mkl_umath*.whl %WHEELS_OUTPUT_FOLDER% + if errorlevel 1 exit 1 +) ELSE ( + rem Only install + "%PYTHON%" setup.py install %SKBUILD_ARGS% + if errorlevel 1 exit 1 +) diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh new file mode 100644 index 0000000..2792f27 --- /dev/null +++ b/conda-recipe/build.sh @@ -0,0 +1,19 @@ +# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" + +# Intel LLVM must cooperate with compiler and sysroot from conda +echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icx_for_conda.cfg +export ICXCFG="$(pwd)/icx_for_conda.cfg" + +export CMAKE_GENERATOR="Ninja" +SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then + # Install packages and assemble wheel package from built bits + WHEELS_BUILD_ARGS="-p manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" + ${PYTHON} setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} + cp dist/mkl_umath*.whl ${WHEELS_OUTPUT_FOLDER} +else + # Perform regular install + ${PYTHON} setup.py install ${SKBUILD_ARGS} +fi diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml new file mode 100644 index 0000000..dcafd45 --- /dev/null +++ b/conda-recipe/meta.yaml @@ -0,0 +1,55 @@ +{% set version = "0.1.2" %} +{% set buildnumber = 0 %} + +package: + name: mkl_umath + version: {{ version }} + +source: + path: ../ + +build: + number: {{ buildnumber }} + ignore_run_exports: + - blas + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler('dpcpp') }} >=2024.2 # [not osx] + - sysroot_linux-64 >=2.28 # [linux] + host: + - setuptools + - cmake + - ninja + - git + - cython + - scikit-build + - python + - mkl-devel + - numpy-base + run: + - python + - mkl + - mkl-service + - {{ pin_compatible('intel-cmplr-lib-rt') }} + - {{ pin_compatible('numpy') }} + +test: + requires: + - pytest + source_files: + - mkl_umath/tests/test_basic.py + commands: + - pytest mkl_umath/tests/test_basic.py + imports: + - mkl_umath + - mkl_umath._ufuncs + - mkl_umath._patch + +about: + home: http://github.com/IntelPython/mkl_umath + license: BSD-3 + license_file: LICENSE.txt + summary: Universal functions for real and complex floating point arrays powered by Intel(R) Math Kernel Library Vector (Intel(R) MKL) and Intel(R) Short Vector Math Library (Intel(R) SVML) diff --git a/conda-recipe/run_tests.bat b/conda-recipe/run_tests.bat new file mode 100644 index 0000000..590db89 --- /dev/null +++ b/conda-recipe/run_tests.bat @@ -0,0 +1 @@ +%PYTHON% tests\test_basic.py \ No newline at end of file diff --git a/conda-recipe/run_tests.sh b/conda-recipe/run_tests.sh new file mode 100644 index 0000000..7bfca5d --- /dev/null +++ b/conda-recipe/run_tests.sh @@ -0,0 +1 @@ +$PYTHON tests/test_basic.py diff --git a/mkl_umath/__init__.py b/mkl_umath/__init__.py index 92960ad..a6e2927 100644 --- a/mkl_umath/__init__.py +++ b/mkl_umath/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: diff --git a/mkl_umath/_version.py b/mkl_umath/_version.py index df9144c..10939f0 100644 --- a/mkl_umath/_version.py +++ b/mkl_umath/_version.py @@ -1 +1 @@ -__version__ = '0.1.1' +__version__ = '0.1.2' diff --git a/mkl_umath/generate_umath.py b/mkl_umath/generate_umath.py index 7ff39b2..e6609ab 100644 --- a/mkl_umath/generate_umath.py +++ b/mkl_umath/generate_umath.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -343,12 +343,6 @@ def english_upper(s): None, TD(inexactvec + cmplxvec), ), -'floor_divide': - Ufunc(2, 1, None, - docstrings.get('numpy.core.umath.floor_divide'), - None, - TD(inexactvec + cmplxvec), - ), 'true_divide': Ufunc(2, 1, None, docstrings.get('numpy.core.umath.true_divide'), @@ -797,16 +791,16 @@ def make_arrays(funcdict): tname = english_upper(chartoname[t.type]) datalist.append('(void *)NULL') funclist.append( - '%s_%s_%s_%s' % (tname, t.in_, t.out, name)) + 'mkl_umath_%s_%s_%s_%s' % (tname, t.in_, t.out, name)) elif isinstance(t.func_data, FuncNameSuffix): datalist.append('(void *)NULL') tname = english_upper(chartoname[t.type]) funclist.append( - '%s_%s_%s' % (tname, name, t.func_data.suffix)) + 'mkl_umath_%s_%s_%s' % (tname, name, t.func_data.suffix)) elif t.func_data is None: datalist.append('(void *)NULL') tname = english_upper(chartoname[t.type]) - funclist.append('%s_%s' % (tname, name)) + funclist.append('mkl_umath_%s_%s' % (tname, name)) if t.simd is not None: for vt in t.simd: code2list.append(textwrap.dedent("""\ @@ -936,8 +930,10 @@ def make_code(funcdict, filename): Please make changes to the code generator program (%s) **/ #include "Python.h" + #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" - #include "loops_intel.h" + #include "mkl_umath_loops.h" %s static int diff --git a/mkl_umath/setup.py b/mkl_umath/setup.py deleted file mode 100644 index 81a77bf..0000000 --- a/mkl_umath/setup.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019-2021, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys -from os import (getcwd, environ, makedirs) -from os.path import join, exists, abspath, dirname -import importlib.machinery # requires Python >= 3.4 -from distutils.dep_util import newer - -from numpy.distutils.ccompiler import new_compiler -from distutils.sysconfig import customize_compiler -import platform -from numpy import get_include as get_numpy_include -from distutils.sysconfig import get_python_inc as get_python_include - -def ensure_Intel_compiler(): - ccompiler = new_compiler() - customize_compiler(ccompiler) - if hasattr(ccompiler, 'compiler'): - compiler_name = ccompiler.compiler[0] - else: - compiler_name = ccompiler.__class__.__name__ - - assert ('icl' in compiler_name or 'icc' in compiler_name), \ - "Intel(R) C Compiler is required to build mkl_umath, found {}".format(compiler_name) - - -def load_module(name, fn): - """ - Credit: numpy.compat.npy_load_module - """ - return importlib.machinery.SourceFileLoader(name, fn).load_module() - - -def separator_join(sep, strs): - """ - Joins non-empty arguments strings with dot. - - Credit: numpy.distutils.misc_util.dot_join - """ - assert isinstance(strs, (list, tuple)) - assert isinstance(sep, str) - return sep.join([si for si in strs if si]) - - -def configuration(parent_package='',top_path=None): - from numpy.distutils.misc_util import Configuration - from numpy.distutils.system_info import get_info - config = Configuration('mkl_umath', parent_package, top_path) - - mkl_root = environ.get('MKLROOT', None) - if mkl_root: - mkl_info = { - 'include_dirs': [join(mkl_root, 'include')], - 'library_dirs': [join(mkl_root, 'lib'), join(mkl_root, 'lib', 'intel64')], - 'libraries': ['mkl_rt'] - } - else: - mkl_info = get_info('mkl') - - print(mkl_info) - mkl_include_dirs = mkl_info.get('include_dirs', []) - mkl_library_dirs = mkl_info.get('library_dirs', []) - mkl_libraries = mkl_info.get('libraries', ['mkl_rt']) - - pdir = dirname(__file__) - wdir = join(pdir, 'src') - mkl_info = get_info('mkl') - - generate_umath_py = join(pdir, 'generate_umath.py') - n = separator_join('_', (config.name, 'generate_umath')) - generate_umath = load_module(n, generate_umath_py) - del n - - def generate_umath_c(ext, build_dir): - target_dir = join(build_dir, 'src') - target = join(target_dir, '__umath_generated.c') - if not exists(target_dir): - print("Folder {} was expected to exist, but creating".format(target_dir)) - makedirs(target_dir) - script = generate_umath_py - if newer(script, target): - with open(target, 'w') as f: - f.write(generate_umath.make_code(generate_umath.defdict, - generate_umath.__file__)) - config.add_include_dirs(target_dir) - return [] - - sources = [generate_umath_c] - - # ensure_Intel_compiler() - - if platform.system() == "Windows": - eca = ['/fp:fast=2', '/Qimf-precision=high', '/Qprec-sqrt', '/Qstd=c99', '/Qprotect-parens'] - else: - eca = ['-fp-model', 'fast=2', '-fimf-precision=high', '-prec-sqrt', '-fprotect-parens'] - - numpy_include_dir = get_numpy_include() - python_include_dir = get_python_include() - config.add_library( - 'loops_intel', - sources = [ - join(wdir, 'loops_intel.h.src'), - join(wdir, 'loops_intel.c.src'), - ], - include_dirs = [wdir] + mkl_include_dirs + [numpy_include_dir, python_include_dir], - depends = [ - join(wdir, 'blocking_utils.h'), - join(wdir, 'fast_loop_macros.h'), - join(numpy_include_dir, 'numpy', '*object.h'), - join(python_include_dir, "Python.h") - ], - libraries=mkl_libraries, - extra_compiler_args=eca, - macros=getattr(config, 'define_macros', getattr(config.get_distribution(), 'define_macros', [])) - ) - - config.add_extension( - name = '_ufuncs', - sources = [ - join(wdir, 'ufuncsmodule.c'), - ] + sources, - depends = [ - join(wdir, 'loops_intel.c.src'), - join(wdir, 'loops_intel.h.src'), - ], - include_dirs = [wdir] + mkl_include_dirs, - libraries = mkl_libraries + ['loops_intel'], - library_dirs = mkl_library_dirs, - extra_compile_args = [ - '-DNDEBUG', - # '-ggdb', '-O0', '-Wall', '-Wextra', '-DDEBUG', - ] - ) - - from Cython.Build import cythonize - from setuptools import Extension - cythonize(Extension('_patch', sources=[join(wdir, 'patch.pyx'),])) - - config.add_extension( - name = '_patch', - sources = [ - join(wdir, 'patch.c'), - ], - libraries = mkl_libraries + ['loops_intel'], - library_dirs = mkl_library_dirs, - extra_compile_args = [ - '-DNDEBUG', - #'-ggdb', '-O0', '-Wall', '-Wextra', '-DDEBUG', - ] - ) - - config.add_data_dir('tests') - -# if have_cython: -# config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) - - return config - -if __name__ == '__main__': - from numpy.distutils.core import setup - setup(configuration=configuration) diff --git a/mkl_umath/src/patch.pyx b/mkl_umath/src/_patch.pyx similarity index 99% rename from mkl_umath/src/patch.pyx rename to mkl_umath/src/_patch.pyx index 5814d54..fd78f8d 100644 --- a/mkl_umath/src/patch.pyx +++ b/mkl_umath/src/_patch.pyx @@ -24,7 +24,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # distutils: language = c -# cython: language_level=2 +# cython: language_level=3 import mkl_umath._ufuncs as mu import numpy.core.umath as nu diff --git a/mkl_umath/src/fast_loop_macros.h b/mkl_umath/src/fast_loop_macros.h index 50f9d41..12ef2e1 100644 --- a/mkl_umath/src/fast_loop_macros.h +++ b/mkl_umath/src/fast_loop_macros.h @@ -41,6 +41,10 @@ #define NPY_PRAGMA_VECTOR _Pragma("vector") #define NPY_PRAGMA_NOVECTOR _Pragma("novector") #define NPY_ASSUME_ALIGNED(p, b) __assume_aligned((p), (b)); +#elif defined(__clang__) +#define NPY_PRAGMA_VECTOR _Pragma("clang loop vectorize(enable)") +#define NPY_PRAGMA_NOVECTOR _Pragma("clang loop vectorize(disable)") +#define NPY_ASSUME_ALIGNED(p, b) #else #define NPY_PRAGMA_VECTOR _Pragma("GCC ivdep") #define NPY_PRAGMA_NOVECTOR @@ -70,19 +74,19 @@ npy_intp is1 = steps[0], os1 = steps[1];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, op1 += os1) + for(i = 0; i < n; ++i, ip1 += is1, op1 += os1) -#define UNARY_LOOP_VECTORIZED\ - char *ip1 = args[0], *op1 = args[1];\ - npy_intp is1 = steps[0], os1 = steps[1];\ +#define UNARY_LOOP_VECTORIZED(tin, tout)\ + tin *ip1 = (tin *) args[0];\ + tout *op1 = (tout *) args[1]; \ npy_intp n = dimensions[0];\ npy_intp i;\ NPY_PRAGMA_VECTOR\ - for(i = 0; i < n; i++, ip1 += is1, op1 += os1) + for(i = 0; i < n; ++i, ++ip1, ++op1) -#define UNARY_LOOP_DISPATCH(cond, body)\ +#define UNARY_LOOP_DISPATCH(tin, tout, cond, body)\ if (cond) {\ - UNARY_LOOP_VECTORIZED { body; }\ + UNARY_LOOP_VECTORIZED(tin, tout) { body; }\ } else {\ UNARY_LOOP { body; }\ } @@ -93,7 +97,7 @@ npy_intp is1 = steps[0], os1 = steps[1], os2 = steps[2];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2) + for(i = 0; i < n; ++i, ip1 += is1, op1 += os1, op2 += os2) /** (ip1, ip2) -> (op1) */ #define BINARY_LOOP\ @@ -101,7 +105,7 @@ npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) + for(i = 0; i < n; ++i, ip1 += is1, ip2 += is2, op1 += os1) /** (ip1, ip2) -> (op1, op2) */ #define BINARY_LOOP_TWO_OUT\ @@ -109,7 +113,7 @@ npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2], os2 = steps[3];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2) + for(i = 0; i < n; ++i, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2) /** (ip1, ip2, ip3) -> (op1) */ #define TERNARY_LOOP\ @@ -117,7 +121,7 @@ npy_intp is1 = steps[0], is2 = steps[1], is3 = steps[2], os1 = steps[3];\ npy_intp n = dimensions[0];\ npy_intp i;\ - for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1) + for(i = 0; i < n; ++i, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1) /** @} */ diff --git a/mkl_umath/src/loops_intel.h.src b/mkl_umath/src/loops_intel.h.src deleted file mode 100644 index c45bab4..0000000 --- a/mkl_umath/src/loops_intel.h.src +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright (c) 2019-2021, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _MKL_UMATH_LOOPS_H_ -#define _MKL_UMATH_LOOPS_H_ - -#include "numpy/ndarraytypes.h" - -#include - -/**begin repeat - * Float types - * #TYPE = FLOAT, DOUBLE# - */ - -NPY_NO_EXPORT void -@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_invsqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_exp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_exp2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_expm1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_erf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log10(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_log1p(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_cos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_sin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_tan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arccos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arcsin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arctan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_cosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_sinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_tanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arccosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arcsinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_arctanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_fabs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_floor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_ceil(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_rint(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_trunc(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_cbrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -/**begin repeat1 - * Arithmetic - * # kind = add, subtract, multiply, divide# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -/**begin repeat1 - * Arithmetic - * # kind = equal, not_equal, less, less_equal, greater, greater_equal, - * logical_and, logical_or# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -/**begin repeat1 - * #kind = isnan, isinf, isfinite, signbit# - **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - - -NPY_NO_EXPORT void -@TYPE@_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -/**begin repeat1 - * #kind = maximum, minimum, fmax, fmin# - **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -#define @TYPE@_true_divide @TYPE@_divide - -/**end repeat**/ - -/* - ***************************************************************************** - ** COMPLEX LOOPS ** - ***************************************************************************** - */ - -#define CGE(xr,xi,yr,yi) (xr > yr || (xr == yr && xi >= yi)); -#define CLE(xr,xi,yr,yi) (xr < yr || (xr == yr && xi <= yi)); -#define CGT(xr,xi,yr,yi) (xr > yr || (xr == yr && xi > yi)); -#define CLT(xr,xi,yr,yi) (xr < yr || (xr == yr && xi < yi)); -#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi); -#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi); - -/**begin repeat - * complex types - * #TYPE = CFLOAT, CDOUBLE# - */ - -/**begin repeat1 - * arithmetic - * #kind = add, subtract# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); - - -/**begin repeat1 - * arithmetic - * #kind = greater, greater_equal, less, less_equal, equal, - not_equal, logical_and, logical_or, logical_xor, logical_not, - isnan, isinf, isfinite# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); - -/**begin repeat1 - * arithmetic - * #kind = maximum, minimum, fmax, fmin# - */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -/**end repeat1**/ - -#define @TYPE@_true_divide @TYPE@_divide - -/**end repeat**/ - -#undef CGE -#undef CLE -#undef CGT -#undef CLT -#undef CEQ -#undef CNE - -#endif diff --git a/mkl_umath/src/loops_intel.c.src b/mkl_umath/src/mkl_umath_loops.c.src similarity index 74% rename from mkl_umath/src/loops_intel.c.src rename to mkl_umath/src/mkl_umath_loops.c.src index 0a199dc..86a62c4 100644 --- a/mkl_umath/src/loops_intel.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, Intel Corporation + * Copyright (c) 2019-2023, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,10 +29,10 @@ #include "mkl.h" #include #include -#include "mathimf.h" #include "Python.h" #define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define NP_IMPORT_ARRAY #include "numpy/npy_common.h" #include "numpy/ndarraytypes.h" @@ -40,9 +40,9 @@ #include "numpy/ufuncobject.h" #include "numpy/npy_math.h" #include "blocking_utils.h" -#include "loops_intel.h" +#include "mkl_umath_loops.h" -/* Adapated from NumPy's source code. +/* Adapated from NumPy's source code. * https://github.com/numpy/numpy/blob/main/LICENSE.txt */ /* @@ -143,17 +143,22 @@ static inline npy_double spacing(npy_double x) { if (isinf(x)) - return ((npy_double) NAN); + return ((npy_double) NAN); return copysign(nextafter(fabs(x), ((npy_double) INFINITY)), x) - x; } static inline npy_float spacingf(npy_float x) { if (isinff(x)) - return ((npy_float) NAN); + return ((npy_float) NAN); return copysignf(nextafterf(fabsf(x), INFINITY), x) - x; } +#if defined(_MSC_VER) && defined(__INTEL_COMPILER) +extern __inline float __cdecl ldexpf( float _X, int _Y) { + return (float)ldexp(_X, _Y); +} +#endif /**begin repeat * Float types @@ -223,21 +228,26 @@ divmod@c@(@type@ a, @type@ b, @type@ *modulus) * #scalarf = sqrtf, sqrt# */ -NPY_NO_EXPORT void -@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@))) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Sqrt, dimensions[0], @type@, args[0], args[1]); /* v@c@Sqrt(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -251,21 +261,26 @@ NPY_NO_EXPORT void * #scalarf = (1.0f)/sqrtf, (1.0)/sqrt# */ -NPY_NO_EXPORT void -@TYPE@_invsqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@InvSqrt, dimensions[0], @type@, args[0], args[1]); /* v@c@InvSqrt(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -280,27 +295,29 @@ NPY_NO_EXPORT void * #scalarf = expf, exp# */ -NPY_NO_EXPORT void -@TYPE@_exp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; int ignore_fpstatus = 0; - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@))) { + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { ignore_fpstatus = 1; CHUNKED_VML_CALL2(v@c@Exp, dimensions[0], @type@, args[0], args[1]); /* v@c@Exp(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; - if(in1 == -NPY_INFINITY@A@){ - ignore_fpstatus = 1; - } + ignore_fpstatus |= ((in1 == -NPY_INFINITY@A@) ? 1 : 0); *(@type@ *)op1 = @scalarf@(in1); - ) + ) } if(ignore_fpstatus) { feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); @@ -318,11 +335,17 @@ NPY_NO_EXPORT void */ /* TODO: Use VML */ -NPY_NO_EXPORT void -@TYPE@_exp2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); @@ -339,21 +362,25 @@ NPY_NO_EXPORT void * #scalarf = expm1f, expm1# */ -NPY_NO_EXPORT void -@TYPE@_expm1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { CHUNKED_VML_CALL2(v@c@Expm1, dimensions[0], @type@, args[0], args[1]); /* v@c@Expm1(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -367,21 +394,26 @@ NPY_NO_EXPORT void * #scalarf = erff, erf# */ -NPY_NO_EXPORT void -@TYPE@_erf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Erf, dimensions[0], @type@, args[0], args[1]); /* v@c@Erf(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -395,21 +427,26 @@ NPY_NO_EXPORT void * #scalarf = logf, log# */ -NPY_NO_EXPORT void -@TYPE@_log(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Ln, dimensions[0], @type@, args[0], args[1]); /* v@c@Ln(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -424,11 +461,17 @@ NPY_NO_EXPORT void */ /* TODO: Use VML */ -NPY_NO_EXPORT void -@TYPE@_log2(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); @@ -445,21 +488,26 @@ NPY_NO_EXPORT void * #scalarf = log10f, log10# */ -NPY_NO_EXPORT void -@TYPE@_log10(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Log10, dimensions[0], @type@, args[0], args[1]); /* v@c@Log10(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -473,21 +521,26 @@ NPY_NO_EXPORT void * #scalarf = log1pf, log1p# */ -NPY_NO_EXPORT void -@TYPE@_log1p(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Log1p, dimensions[0], @type@, args[0], args[1]); /* v@c@Log1p(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -501,21 +554,26 @@ NPY_NO_EXPORT void * #scalarf = cosf, cos# */ -NPY_NO_EXPORT void -@TYPE@_cos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Cos, dimensions[0], @type@, args[0], args[1]); /* v@c@Cos(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -529,21 +587,26 @@ NPY_NO_EXPORT void * #scalarf = sinf, sin# */ -NPY_NO_EXPORT void -@TYPE@_sin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Sin, dimensions[0], @type@, args[0], args[1]); /* v@c@Sin(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -557,21 +620,26 @@ NPY_NO_EXPORT void * #scalarf = tanf, tan# */ -NPY_NO_EXPORT void -@TYPE@_tan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Tan, dimensions[0], @type@, args[0], args[1]); /* v@c@Tan(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -585,21 +653,26 @@ NPY_NO_EXPORT void * #scalarf = acosf, acos# */ -NPY_NO_EXPORT void -@TYPE@_arccos(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Acos, dimensions[0], @type@, args[0], args[1]); /* v@c@Acos(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -613,21 +686,26 @@ NPY_NO_EXPORT void * #scalarf = asinf, asin# */ -NPY_NO_EXPORT void -@TYPE@_arcsin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Asin, dimensions[0], @type@, args[0], args[1]); /* v@c@Asin(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -641,21 +719,26 @@ NPY_NO_EXPORT void * #scalarf = atanf, atan# */ -NPY_NO_EXPORT void -@TYPE@_arctan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Atan, dimensions[0], @type@, args[0], args[1]); /* v@c@Atan(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -669,21 +752,26 @@ NPY_NO_EXPORT void * #scalarf = coshf, cosh# */ -NPY_NO_EXPORT void -@TYPE@_cosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Cosh, dimensions[0], @type@, args[0], args[1]); /* v@c@Cosh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -697,21 +785,26 @@ NPY_NO_EXPORT void * #scalarf = sinhf, sinh# */ -NPY_NO_EXPORT void -@TYPE@_sinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Sinh, dimensions[0], @type@, args[0], args[1]); /* v@c@Sinh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -725,21 +818,26 @@ NPY_NO_EXPORT void * #scalarf = tanhf, tanh# */ -NPY_NO_EXPORT void -@TYPE@_tanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Tanh, dimensions[0], @type@, args[0], args[1]); /* v@c@Tanh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -753,21 +851,26 @@ NPY_NO_EXPORT void * #scalarf = acoshf, acosh# */ -NPY_NO_EXPORT void -@TYPE@_arccosh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Acosh, dimensions[0], @type@, args[0], args[1]); /* v@c@Acosh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -781,21 +884,26 @@ NPY_NO_EXPORT void * #scalarf = asinhf, asinh# */ -NPY_NO_EXPORT void -@TYPE@_arcsinh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Asinh, dimensions[0], @type@, args[0], args[1]); /* v@c@Asinh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -809,21 +917,26 @@ NPY_NO_EXPORT void * #scalarf = atanhf, atanh# */ -NPY_NO_EXPORT void -@TYPE@_arctanh(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Atanh, dimensions[0], @type@, args[0], args[1]); /* v@c@Atanh(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -837,11 +950,17 @@ NPY_NO_EXPORT void * #scalarf = fabsf, fabs# */ -NPY_NO_EXPORT void -@TYPE@_fabs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) + @type@, @type@ + , + can_vectorize , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); @@ -858,21 +977,26 @@ NPY_NO_EXPORT void * #scalarf = floorf, floor# */ -NPY_NO_EXPORT void -@TYPE@_floor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(steps[0] == sizeof(@type@) && steps[1] == sizeof(@type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Floor, dimensions[0], @type@, args[0], args[1]); /* v@c@Floor(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -886,21 +1010,26 @@ NPY_NO_EXPORT void * #scalarf = ceilf, ceil# */ -NPY_NO_EXPORT void -@TYPE@_ceil(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Ceil, dimensions[0], @type@, args[0], args[1]); /* v@c@Ceil(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -914,21 +1043,26 @@ NPY_NO_EXPORT void * #scalarf = rintf, rint# */ -NPY_NO_EXPORT void -@TYPE@_rint(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(steps[0] == sizeof(@type@) && steps[1] == sizeof(@type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Rint, dimensions[0], @type@, args[0], args[1]); /* v@c@Rint(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -942,21 +1076,26 @@ NPY_NO_EXPORT void * #scalarf = truncf, trunc# */ -NPY_NO_EXPORT void -@TYPE@_trunc(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if( can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Trunc, dimensions[0], @type@, args[0], args[1]); /* v@c@Trunc(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -970,21 +1109,26 @@ NPY_NO_EXPORT void * #scalarf = cbrtf, cbrt# */ -NPY_NO_EXPORT void -@TYPE@_cbrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_cbrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_UNARY_CONT(@type@, @type@) && - dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD && - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) ) { + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) + { CHUNKED_VML_CALL2(v@c@Cbrt, dimensions[0], @type@, args[0], args[1]); /* v@c@Cbrt(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { UNARY_LOOP_DISPATCH( - DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)) - , + @type@, @type@ + , + can_vectorize + , const @type@ in1 = *(@type@ *)ip1; *(@type@ *)op1 = @scalarf@(in1); - ) + ) } } @@ -1094,8 +1238,8 @@ pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride) * # PW = 1# * # VML = Add# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1127,19 +1271,19 @@ NPY_NO_EXPORT void @type@ *op1_shifted = op1 + peel; @type@ *ip2_shifted = ip2 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } @@ -1262,8 +1406,8 @@ NPY_NO_EXPORT void * # PW = 0# * # VML = Sub# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1295,19 +1439,19 @@ NPY_NO_EXPORT void @type@ *ip2_shifted = ip2 + peel; @type@ *op1_shifted = op1 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } @@ -1430,8 +1574,8 @@ NPY_NO_EXPORT void * # PW = 0# * # VML = Mul# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1463,19 +1607,19 @@ NPY_NO_EXPORT void @type@ *ip2_shifted = ip2 + peel; @type@ *op1_shifted = op1 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } @@ -1598,8 +1742,8 @@ NPY_NO_EXPORT void * # PW = 0# * # VML = Div# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_CONT(@type@, @type@)) { #if @SUPPORTED_BY_VML@ @@ -1620,37 +1764,37 @@ NPY_NO_EXPORT void const npy_intp blocked_end = npy_blocked_end(peel, sizeof(@type@), vsize, n); npy_intp i; - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(i = 0; i < peel; i++) { op1[i] = ip1[i] @OP@ ip2[i]; } { npy_intp j, j_max = blocked_end - peel; - j_max &= (~0xf); - const npy_intp blocked_end = j_max + peel; + j_max &= (~0xf); + const npy_intp blocked_end = j_max + peel; if (j_max > 0) { @type@ *ip1_aligned = ip1 + peel, *op1_shifted = op1 + peel, *ip2_shifted = ip2 + peel; - if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && - DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - NPY_PRAGMA_VECTOR - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } else { - NPY_ASSUME_ALIGNED(ip1_aligned, 64) - for(j = 0; j < j_max; j++) { - op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; - } - } + if( DISJOINT_OR_SAME(op1_shifted, ip1_aligned, j_max, 1) && + DISJOINT_OR_SAME(op1_shifted, ip2_shifted, j_max, 1) ) { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + NPY_PRAGMA_VECTOR + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } else { + NPY_ASSUME_ALIGNED(ip1_aligned, 64) + for(j = 0; j < j_max; j++) { + op1_shifted[j] = ip1_aligned[j] @OP@ ip2_shifted[j]; + } + } i = blocked_end; } } - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(; i < n; i++) { op1[i] = ip1[i] @OP@ ip2[i]; } @@ -1666,7 +1810,7 @@ NPY_NO_EXPORT void npy_intp i; const @type@ ip1c = ip1[0]; - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(i = 0; i < peel; i++) { op1[i] = ip1c @OP@ ip2[i]; } @@ -1685,7 +1829,7 @@ NPY_NO_EXPORT void } } - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(; i < n; i++) { op1[i] = ip1c @OP@ ip2[i]; } @@ -1700,7 +1844,7 @@ NPY_NO_EXPORT void npy_intp i; const @type@ ip2c = ip2[0]; - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(i = 0; i < peel; i++) { op1[i] = ip1[i] @OP@ ip2c; } @@ -1719,7 +1863,7 @@ NPY_NO_EXPORT void } } - NPY_PRAGMA_NOVECTOR + NPY_PRAGMA_NOVECTOR for(; i < n; i++) { op1[i] = ip1[i] @OP@ ip2c; } @@ -1750,8 +1894,8 @@ NPY_NO_EXPORT void * logical_and, logical_or# * #OP = ==, !=, <, <=, >, >=, &&, ||# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { { BINARY_LOOP { @@ -1763,8 +1907,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const int t1 = !!*(@type@ *)ip1; @@ -1773,8 +1917,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1786,8 +1930,8 @@ NPY_NO_EXPORT void * #kind = isnan, isinf, isfinite, signbit# * #func = isnan, isinf, isfinite, signbit# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { { UNARY_LOOP { @@ -1799,8 +1943,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1808,8 +1952,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1818,8 +1962,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1832,8 +1976,8 @@ NPY_NO_EXPORT void * #kind = maximum, minimum# * #OP = >=, <=# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* */ if (IS_BINARY_REDUCE) { @@ -1863,8 +2007,8 @@ NPY_NO_EXPORT void * #kind = fmax, fmin# * #OP = >=, <=# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* */ if (IS_BINARY_REDUCE) { @@ -1887,19 +2031,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) -{ - BINARY_LOOP { - const @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - @type@ mod; - *((@type@ *)op1) = divmod@c@(in1, in2, &mod); - } -} - -NPY_NO_EXPORT void -@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1908,8 +2041,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_divmod(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; @@ -1918,8 +2051,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { #if @SUPPORTED_BY_VML@ if(IS_UNARY_CONT(@type@, @type@) && @@ -1937,8 +2070,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { #if @SUPPORTED_BY_VML@ if(IS_UNARY_CONT(@type@, @type@) && @@ -1956,16 +2089,16 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { OUTPUT_LOOP { *((@type@ *)op1) = 1; } } -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -1973,8 +2106,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { #if @SUPPORTED_BY_VML@ if(IS_UNARY_CONT(@type@, @type@) && @@ -1995,8 +2128,8 @@ NPY_NO_EXPORT void feclearexcept(FE_ALL_EXCEPT); /* clear floatstatus */ } -NPY_NO_EXPORT void -@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_negative(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { { UNARY_LOOP { @@ -2006,8 +2139,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_positive(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -2015,8 +2148,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* Sign of nan is nan */ UNARY_LOOP { @@ -2025,8 +2158,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; @@ -2034,8 +2167,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_frexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; @@ -2043,8 +2176,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_ldexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; @@ -2053,8 +2186,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* * Additional loop to handle npy_long integer inputs (cf. #866, #1633). @@ -2083,7 +2216,7 @@ NPY_NO_EXPORT void } } -#define @TYPE@_true_divide @TYPE@_divide +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide /**end repeat**/ @@ -2159,13 +2292,13 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, for (i = 8; i < n - (n % 8); i += 8) { /* small blocksizes seems to mess with hardware prefetch */ NPY_PREFETCH(a + (i + 512 /(npy_intp)sizeof(@ftype@))*stride, 0, 3); - r[0] += *((@ftype@ *)(a + (i + 0) * stride)); + r[0] += *((@ftype@ *)(a + (i + 0) * stride)); r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@))); - r[2] += *((@ftype@ *)(a + (i + 2) * stride)); + r[2] += *((@ftype@ *)(a + (i + 2) * stride)); r[3] += *((@ftype@ *)(a + (i + 2) * stride + sizeof(@ftype@))); - r[4] += *((@ftype@ *)(a + (i + 4) * stride)); + r[4] += *((@ftype@ *)(a + (i + 4) * stride)); r[5] += *((@ftype@ *)(a + (i + 4) * stride + sizeof(@ftype@))); - r[6] += *((@ftype@ *)(a + (i + 6) * stride)); + r[6] += *((@ftype@ *)(a + (i + 6) * stride)); r[7] += *((@ftype@ *)(a + (i + 6) * stride + sizeof(@ftype@))); } @@ -2200,8 +2333,8 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, * #OP = +, -# * #PW = 1, 0# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { if (IS_BINARY_REDUCE && @PW@) { npy_intp n = dimensions[0]; @@ -2227,8 +2360,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2240,8 +2373,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2272,33 +2405,12 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) -{ - BINARY_LOOP { - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - const @ftype@ in2r = ((@ftype@ *)ip2)[0]; - const @ftype@ in2i = ((@ftype@ *)ip2)[1]; - if (fabs@c@(in2r) >= fabs@c@(in2i)) { - const @ftype@ rat = in2i/in2r; - ((@ftype@ *)op1)[0] = floor@c@((in1r + in1i*rat)/(in2r + in2i*rat)); - ((@ftype@ *)op1)[1] = 0; - } - else { - const @ftype@ rat = in2r/in2i; - ((@ftype@ *)op1)[0] = floor@c@((in1r*rat + in1i)/(in2i + in2r*rat)); - ((@ftype@ *)op1)[1] = 0; - } - } -} - /**begin repeat1 * #kind= greater, greater_equal, less, less_equal, equal, not_equal# * #OP = CGT, CGE, CLT, CLE, CEQ, CNE# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2315,8 +2427,8 @@ NPY_NO_EXPORT void #OP1 = ||, ||# #OP2 = &&, ||# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2328,8 +2440,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2342,8 +2454,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2357,8 +2469,8 @@ NPY_NO_EXPORT void * #func = isnan, isinf, isfinite# * #OP = ||, ||, &&# **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2369,8 +2481,8 @@ NPY_NO_EXPORT void } /**end repeat1**/ -NPY_NO_EXPORT void -@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2380,8 +2492,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2400,8 +2512,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)) { OUTPUT_LOOP { ((@ftype@ *)op1)[0] = 1; @@ -2409,8 +2521,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { +void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; const @ftype@ in1i = ((@ftype@ *)ip1)[1]; @@ -2419,8 +2531,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { int ignore_fpstatus = 0; @@ -2449,8 +2561,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@__arg(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2459,8 +2571,8 @@ NPY_NO_EXPORT void } } -NPY_NO_EXPORT void -@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { /* fixme: sign of nan is currently 0 */ UNARY_LOOP { @@ -2478,8 +2590,8 @@ NPY_NO_EXPORT void * #kind = maximum, minimum# * #OP = CGE, CLE# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2501,8 +2613,8 @@ NPY_NO_EXPORT void * #kind = fmax, fmin# * #OP = CGE, CLE# */ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { BINARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; @@ -2522,7 +2634,7 @@ NPY_NO_EXPORT void } /**end repeat1**/ -#define @TYPE@_true_divide @TYPE@_divide +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide /**end repeat**/ diff --git a/mkl_umath/src/mkl_umath_loops.h.src b/mkl_umath/src/mkl_umath_loops.h.src new file mode 100644 index 0000000..c643c20 --- /dev/null +++ b/mkl_umath/src/mkl_umath_loops.h.src @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2019-2023, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MKL_UMATH_LOOPS_H_ +#define _MKL_UMATH_LOOPS_H_ + +#include "numpy/ndarraytypes.h" + +#include + +#ifdef _WIN32 +#ifdef mkl_umath_loops_EXPORTS +#define MKL_UMATH_API __declspec(dllexport) +#else +#define MKL_UMATH_API __declspec(dllimport) +#endif +#else +#define MKL_UMATH_API +#endif + +/**begin repeat + * Float types + * #TYPE = FLOAT, DOUBLE# + */ + +MKL_UMATH_API +void +mkl_umath_@TYPE@_sqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_invsqrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_expm1(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_erf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_log(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_log10(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_log1p(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_cos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_sin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_tan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_arccos(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_arcsin(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_arctan(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_cosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_sinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_tanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_arccosh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_arcsinh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_arctanh(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_fabs(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_floor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_ceil(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_rint(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_trunc(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_cbrt(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * Arithmetic + * # kind = add, subtract, multiply, divide# + */ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * Arithmetic + * # kind = equal, not_equal, less, less_equal, greater, greater_equal, + * logical_and, logical_or# + */ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +MKL_UMATH_API +void +mkl_umath_@TYPE@_logical_xor(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_logical_not(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind = isnan, isinf, isfinite, signbit# + **/ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +MKL_UMATH_API +void +mkl_umath_@TYPE@_spacing(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + + +MKL_UMATH_API +void +mkl_umath_@TYPE@_copysign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_nextafter(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind = maximum, minimum, fmax, fmin# + **/ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +MKL_UMATH_API +void +mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_divmod(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_negative(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_positive(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_modf(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_frexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_ldexp(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide + +/**end repeat**/ + +/* + ***************************************************************************** + ** COMPLEX LOOPS ** + ***************************************************************************** + */ + +#define CGE(xr,xi,yr,yi) (xr > yr || (xr == yr && xi >= yi)); +#define CLE(xr,xi,yr,yi) (xr < yr || (xr == yr && xi <= yi)); +#define CGT(xr,xi,yr,yi) (xr > yr || (xr == yr && xi > yi)); +#define CLT(xr,xi,yr,yi) (xr < yr || (xr == yr && xi < yi)); +#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi); +#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi); + +/**begin repeat + * complex types + * #TYPE = CFLOAT, CDOUBLE# + */ + +/**begin repeat1 + * arithmetic + * #kind = add, subtract# + */ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +MKL_UMATH_API +void +mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_floor_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); + + +/**begin repeat1 + * arithmetic + * #kind = greater, greater_equal, less, less_equal, equal, + not_equal, logical_and, logical_or, logical_xor, logical_not, + isnan, isinf, isfinite# + */ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +MKL_UMATH_API +void +mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@__arg(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +MKL_UMATH_API +void +mkl_umath_@TYPE@_sign(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(data)); + +/**begin repeat1 + * arithmetic + * #kind = maximum, minimum, fmax, fmin# + */ +MKL_UMATH_API +void +mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +#define mkl_umath_@TYPE@_true_divide mkl_umath_@TYPE@_divide + +/**end repeat**/ + +#undef CGE +#undef CLE +#undef CGT +#undef CLT +#undef CEQ +#undef CNE + +#endif diff --git a/mkl_umath/src/ufuncsmodule.h b/mkl_umath/src/ufuncsmodule.h index 2526763..acb6bbd 100644 --- a/mkl_umath/src/ufuncsmodule.h +++ b/mkl_umath/src/ufuncsmodule.h @@ -25,6 +25,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "Python.h" +#define PY_ARRAY_UNIQUE_SYMBOL mkl_umath_ufunc_ext #include "numpy/arrayobject.h" #include "numpy/ndarraytypes.h" #include "numpy/ufuncobject.h" diff --git a/mkl_umath/tests/test_basic.py b/mkl_umath/tests/test_basic.py index 14e5ded..88770a9 100644 --- a/mkl_umath/tests/test_basic.py +++ b/mkl_umath/tests/test_basic.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -23,6 +23,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest import numpy as np import mkl_umath._ufuncs as mu import numpy.core.umath as nu @@ -41,19 +42,16 @@ def get_args(args_str): elif s == 'D': args.append(np.double(np.random.random_sample()) + np.double(np.random.random_sample()) * 1j) elif s == 'i': - args.append(np.int(np.random.randint(low=1, high=10))) + args.append(np.int_(np.random.randint(low=1, high=10))) elif s == 'l': - args.append(np.long(np.random.randint(low=1, high=10))) + args.append(np.dtype('long').type(np.random.randint(low=1, high=10))) else: raise ValueError("Unexpected type specified!") return tuple(args) umaths = [i for i in dir(mu) if isinstance(getattr(mu, i), np.ufunc)] - umaths.remove('arccosh') # expects input greater than 1 -# dictionary with test cases -# (umath, types) : args generated_cases = {} for umath in umaths: mkl_umath = getattr(mu, umath) @@ -64,29 +62,30 @@ def get_args(args_str): generated_cases[(umath, type)] = args additional_cases = { -('arccosh', 'f->f') : (np.single(np.random.random_sample() + 1),), -('arccosh', 'd->d') : (np.double(np.random.random_sample() + 1),), + ('arccosh', 'f->f'): (np.single(np.random.random_sample() + 1),), + ('arccosh', 'd->d'): (np.double(np.random.random_sample() + 1),), } -test_cases = {} -for d in (generated_cases, additional_cases): - test_cases.update(d) +test_cases = {**generated_cases, **additional_cases} -for case in test_cases: - umath = case[0] - type = case[1] +@pytest.mark.parametrize("case", list(test_cases.keys())) +def test_umath(case): + umath, type = case args = test_cases[case] mkl_umath = getattr(mu, umath) np_umath = getattr(nu, umath) print('*'*80) - print(umath, type) - print("args", args) + print(f"Testing {umath} with type {type}") + print("args:", args) + mkl_res = mkl_umath(*args) np_res = np_umath(*args) - print("mkl res", mkl_res) - print("npy res", np_res) - - assert np.array_equal(mkl_res, np_res) + + print("mkl res:", mkl_res) + print("npy res:", np_res) + + assert np.allclose(mkl_res, np_res), f"Results for {umath} do not match" -print("Test cases count:", len(test_cases)) -print("All looks good!") +def test_cases_count(): + print("Test cases count:", len(test_cases)) + assert len(test_cases) > 0, "No test cases found" diff --git a/mkl_umath/ufunc_docstrings.py b/mkl_umath/ufunc_docstrings.py index 5abc3af..79877e2 100644 --- a/mkl_umath/ufunc_docstrings.py +++ b/mkl_umath/ufunc_docstrings.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: diff --git a/setup.py b/setup.py index 1ab571e..0ee7fa6 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2019-2021, Intel Corporation +# Copyright (c) 2019-2023, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -24,8 +24,23 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import importlib.machinery import io +import os import re +from distutils.dep_util import newer +from _vendored.conv_template import process_file as process_c_file +from os import (getcwd, environ, makedirs) +from os.path import join, exists, abspath, dirname +from setuptools import Extension + +import skbuild +import skbuild.setuptools_wrap +import skbuild.utils +from skbuild.command.build_py import build_py as _skbuild_build_py +from skbuild.command.install import install as _skbuild_install + +# import versioneer with io.open('mkl_umath/_version.py', 'rt', encoding='utf8') as f: version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) @@ -54,46 +69,86 @@ Operating System :: MacOS """ -def configuration(parent_package='',top_path=None): - from numpy.distutils.misc_util import Configuration - - config = Configuration(None, parent_package, top_path) - config.set_options(ignore_setup_xxx_py=True, - assume_default_configuration=True, - delegate_options_to_subpackages=True, - quiet=True) - - config.add_subpackage('mkl_umath') - - config.version = VERSION - - return config - - -def setup_package(): - from setuptools import setup - from numpy.distutils.core import setup - metadata = dict( - name = 'mkl_umath', - maintainer = "Intel Corp.", - maintainer_email = "scripting@intel.com", - description = "MKL-based universal functions for NumPy arrays", - long_description = long_description, - long_description_content_type="text/markdown", - url = "http://github.com/IntelPython/mkl_umath", - author = "Intel Corporation", - download_url = "http://github.com/IntelPython/mkl_umath", - license = 'BSD', - classifiers = [_f for _f in CLASSIFIERS.split('\n') if _f], - platforms = ["Windows", "Linux", "Mac OS-X"], - test_suite = 'nose.collector', - python_requires = '>=3.6', - install_requires = ['numpy'], - configuration = configuration - ) - setup(**metadata) - - return None - -if __name__ == '__main__': - setup_package() + +def load_module(name, fn): + """ + Credit: numpy.compat.npy_load_module + """ + return importlib.machinery.SourceFileLoader(name, fn).load_module() + +def separator_join(sep, strs): + """ + Joins non-empty arguments strings with dot. + + Credit: numpy.distutils.misc_util.dot_join + """ + assert isinstance(strs, (list, tuple)) + assert isinstance(sep, str) + return sep.join([si for si in strs if si]) + +pdir = join(dirname(__file__), 'mkl_umath') +wdir = join(pdir, 'src') + +generate_umath_py = join(pdir, 'generate_umath.py') +n = separator_join('_', ('mkl_umath', 'generate_umath')) +generate_umath = load_module(n, generate_umath_py) +del n + + +def generate_umath_c(build_dir): + target_dir = join(build_dir, 'src') + target = join(target_dir, '__umath_generated.c') + if not exists(target_dir): + print("Folder {} was expected to exist, but creating".format(target_dir)) + makedirs(target_dir) + script = generate_umath_py + if newer(script, target): + with open(target, 'w') as f: + f.write(generate_umath.make_code(generate_umath.defdict, + generate_umath.__file__)) + return [] + + +generate_umath_c(pdir) + +loops_header_templ = join(wdir, "mkl_umath_loops.h.src") +processed_loops_h_fn = join(wdir, "mkl_umath_loops.h") +loops_header_processed = process_c_file(loops_header_templ) + +with open(processed_loops_h_fn, 'w') as fid: + fid.write(loops_header_processed) + +loops_src_templ = join(wdir, "mkl_umath_loops.c.src") +processed_loops_src_fn = join(wdir, "mkl_umath_loops.c") +loops_src_processed = process_c_file(loops_src_templ) + +with open(processed_loops_src_fn, 'w') as fid: + fid.write(loops_src_processed) + + +skbuild.setup( + name="mkl_umath", + version=VERSION, + maintainer = "Intel Corp.", + maintainer_email = "scripting@intel.com", + description = "MKL-based universal functions for NumPy arrays", + long_description = long_description, + long_description_content_type="text/markdown", + license = 'BSD', + author="Intel Corporation", + url="http://github.com/IntelPython/mkl_umath", + download_url="http://github.com/IntelPython/mkl_umath", + packages=[ + "mkl_umath", + ], + package_data={"mkl_umath": ["tests/*.*"]}, + include_package_data=True, + zip_safe=False, + setup_requires=["Cython"], + install_requires=[ + "numpy", + ], + keywords="mkl_umath", + classifiers=[_f for _f in CLASSIFIERS.split("\n") if _f], + platforms=["Linux", "Windows"] +)