diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..bf4ce5a --- /dev/null +++ b/.flake8 @@ -0,0 +1,15 @@ +[flake8] +ignore = + E741, + W503, + E203, + D100, + D401, + D200, + D205, + D400, + D301 +max-line-length = 88 +docstring-convention=numpy +per-file-ignores = + ./test.py:D101,D102,D103 \ No newline at end of file diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml new file mode 100644 index 0000000..abcccff --- /dev/null +++ b/.github/workflows/github-actions.yml @@ -0,0 +1,60 @@ +name: Tests +on: + push: + branches: main + pull_request: + branches: main + +jobs: + unit-testing: + defaults: + run: + # Set default shell to login-bash + # This is required to properly activate the conda environment + # https://github.com/conda-incubator/setup-miniconda?tab=readme-ov-file#use-a-default-shell + shell: bash -el {0} + + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + + steps: + - name: Install xmllint on Linux + if: runner.os == 'Linux' + run: sudo apt-get install libxml2-utils + + - name: Get Safe PYTHON_VERSION + run: echo "PYTHON_VERSION=$(curl --location https://devguide.python.org/versions | xmllint --html --xpath '//section[@id="supported-versions"]//table/tbody/tr[count(//section[@id="supported-versions"]//table/tbody/tr[td[.="security"]]/preceding-sibling::*)]/td[1]/p/text()' - 2> /dev/null)" >> $GITHUB_ENV + + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Update environment.yml with PYTHON_VERSION + run: sed -r 's/- python[>=]+[0-9]+\.[0-9]+/- python==${{ env.PYTHON_VERSION }}/' environment.yml > environment_new.yml + + - name: Display environment_new.yml + run: cat environment_new.yml + + - name: Set up Miniconda and Install New Dependencies + uses: conda-incubator/setup-miniconda@v3 + with: + environment-file: environment_new.yml + + - name : Remove environment_new.yml + run: rm -rf environment_new.yml + + - name : Display Conda Environments + run: conda env list + + - name: Display All Installed Packages + run: conda list + + - name: Display Python Version + run: python -c "import sys; print(sys.version)" + + - name: Show Full Numba Environment + run: python -m numba -s + + - name: Run Unit Tests and Coverage + run: ./test.sh \ No newline at end of file diff --git a/.gitignore b/.gitignore index 0473167..6f5d8ae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__ *csv .ipynb_checkpoints +.coverage \ No newline at end of file diff --git a/environment.yml b/environment.yml index b720b8d..c5d39d9 100644 --- a/environment.yml +++ b/environment.yml @@ -8,11 +8,11 @@ dependencies: - numba>=0.59.1 - fftw - pyfftw - - jax - panel - pandas>=0.20.0 - flake8>=3.7.7 - black>=22.1.0 - jupyterlab>=3.0 - matplotlib>=3.3.0 - - isort>=5.11.0 + - pytest>=4.4.1 + - coverage>=4.5.3 diff --git a/sdp/njit_sdp.py b/sdp/njit_sdp.py index 2b9c73b..ca88ee9 100644 --- a/sdp/njit_sdp.py +++ b/sdp/njit_sdp.py @@ -1,6 +1,5 @@ import numpy as np from numba import njit -import sdp @njit(fastmath=True) diff --git a/sdp/numpy_fft_sdp.py b/sdp/numpy_fft_sdp.py index 167f8bf..ae8c560 100644 --- a/sdp/numpy_fft_sdp.py +++ b/sdp/numpy_fft_sdp.py @@ -14,7 +14,8 @@ def sliding_dot_product(Q, T, order="F"): tmp = np.empty((2, shape), order=order) tmp[0, :m] = Q[::-1] tmp[0, m:] = 0.0 - tmp[1, :] = T + tmp[1, :n] = T + tmp[1, n:] = 0.0 fft_2d = np.fft.rfft(tmp, axis=-1) - return np.fft.irfft(np.multiply(fft_2d[0], fft_2d[1]))[m - 1 : n] + return np.fft.irfft(np.multiply(fft_2d[0], fft_2d[1]), n=shape)[m - 1 : n] diff --git a/test.py b/test.py old mode 100755 new mode 100644 index d37a52a..c72a6b3 --- a/test.py +++ b/test.py @@ -1,131 +1,194 @@ -#!/usr/bin/env python - -import argparse -import pkgutil -import ast import importlib import numpy as np -import numpy.testing as npt +import pkgutil +import pytest import sdp -import time -import warnings +import utils + +from numpy import testing as npt +from operator import eq, lt +from scipy.fft import next_fast_len + +# README +# Real FFT algorithm performs more efficiently when the length +# of the input array `arr` is composed of small prime factors. +# The next_fast_len(arr, real=True) function from Scipy returns +# the same length if len(arr) is composed of a subset of +# prime numbers 2, 3, 5. Therefore, these radices are +# considered as the most efficient for the real FFT algorithm. + +# To ensure that the tests cover different cases, the following cases +# are considered: +# 1. len(T) is even, and len(T) == next_fast_len(len(T), real=True) +# 2. len(T) is odd, and len(T) == next_fast_len(len(T), real=True) +# 3. len(T) is even, and len(T) < next_fast_len(len(T), real=True) +# 4. len(T) is odd, and len(T) < next_fast_len(len(T), real=True) +# And 5. a special case of 1, where len(T) is power of 2. + +# Therefore: +# 1. len(T) is composed of 2 and a subset of {3, 5} +# 2. len(T) is composed of a subset of {3, 5} +# 3. len(T) is composed of a subset of {7, 11, 13, ...} and 2 +# 4. len(T) is composed of a subset of {7, 11, 13, ...} +# 5. len(T) is power of 2 + +# In some cases, the prime factors are raised to a power of +# certain degree to increase the length of array to be around +# 1000-2000. This allows us to test sliding_dot_product for +# wider range of query lengths. + +test_inputs = [ + # Input format: + # ( + # len(T), + # remainder, # from `len(T) % 2` + # comparator, # for len(T) comparator next_fast_len(len(T), real=True) + # ) + ( + 2 * (3**2) * (5**3), + 0, + eq, + ), # = 2250, Even `len(T)`, and `len(T) == next_fast_len(len(T), real=True)` + ( + (3**2) * (5**3), + 1, + eq, + ), # = 1125, Odd `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`. + ( + 2 * 7 * 11 * 13, + 0, + lt, + ), # = 2002, Even `len(T)`, and `len(T) < next_fast_len(len(T), real=True)` + ( + 7 * 11 * 13, + 1, + lt, + ), # = 1001, Odd `len(T)`, and `len(T) < next_fast_len(len(T), real=True)` +] + + +def naive_sliding_dot_product(Q, T): + m = len(Q) + l = T.shape[0] - m + 1 + out = np.empty(l) + for i in range(l): + out[i] = np.dot(Q, T[i : i + m]) + return out + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_remainder(n_T, remainder, comparator): + assert n_T % 2 == remainder + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_comparator(n_T, remainder, comparator): + shape = next_fast_len(n_T, real=True) + assert comparator(n_T, shape) + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_sdp(n_T, remainder, comparator): + # test_sdp for cases 1-4 + + n_Q_prime = [ + 2, + 3, + 5, + 7, + 11, + 13, + 17, + 19, + 23, + 29, + 31, + 37, + 41, + 43, + 47, + 53, + 59, + 61, + 67, + 71, + 73, + 79, + 83, + 89, + 97, + ] + n_Q_power2 = [2, 4, 8, 16, 32, 64] + n_Q_values = n_Q_prime + n_Q_power2 + [n_T] + n_Q_values = sorted(n_Q for n_Q in set(n_Q_values) if n_Q <= n_T) + + modules = utils.import_sdp_mods() + for n_Q in n_Q_values: + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + ref = naive_sliding_dot_product(Q, T) + for mod in modules: + try: + comp = mod.sliding_dot_product(Q, T) + npt.assert_allclose(comp, ref) + except Exception as e: # pragma: no cover + msg = f"Error in {mod.__name__}, with n_Q={n_Q} and n_T={n_T}" + print(msg) + raise e + + return + + +def test_sdp_power2(): + # test for case 5. len(T) is power of 2 + pmin = 3 + pmax = 13 + + modules = utils.import_sdp_mods() + for mod in modules: + try: + for q in range(pmin, pmax + 1): + n_Q = 2**q + for p in range(q, pmax + 1): + n_T = 2**p + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + ref = naive_sliding_dot_product(Q, T) + comp = mod.sliding_dot_product(Q, T) + npt.assert_allclose(comp, ref) -def func_exists(mod_path, func_name): - try: - with open(mod_path, "r") as file: - module_content = file.read() - except FileNotFoundError: - return False # Module file not found + except Exception as e: # pragma: no cover + msg = f"Error in {mod.__name__}, with q={q} and p={p}" + print(msg) + raise e - try: - tree = ast.parse(module_content) - except SyntaxError: - return False # Syntax error in module + return - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef) and node.name == func_name: - return True - return False +def test_setup(): + Q = np.random.rand(3) + T = np.random.rand(10) -def import_sdp_mods(include=None, ignore=None): - mods = [] for m in sorted(list(pkgutil.iter_modules(sdp.__path__))): - mod_path = f"sdp/{m[1]}.py" - if ( - include is not None - and len(include) - and not any(mod in mod_path for mod in include) - ): - continue - if ( - ignore is not None - and len(ignore) - and any(mod in mod_path for mod in ignore) - ): - continue - - if ( - "sdp" in m[1] - and func_exists(mod_path, "sliding_dot_product") - and func_exists(mod_path, "setup") - ): + if "sdp" in m[1]: + # test if the module has the setup function + mod_path = f"sdp/{m[1]}.py" + try: + assert utils.func_exists(mod_path, "setup") + except AssertionError as e: # pragma: no cover + msg = f"Error in {mod_path}" + print(msg) + raise e + + # test if setup function returns None mod_name = f"sdp.{m[1]}" mod = importlib.import_module(mod_name) - mods.append(mod) - - return mods - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="./test.py -noheader -pmin 6 -pmax 23 -pdiff 3 pyfftw challenger" - ) - parser.add_argument("-noheader", default=False, action="store_true") - parser.add_argument("-timeout", default=5.0, type=float, help="Number of seconds to wait for a run before timing out") - parser.add_argument("-pequal", default=False, action="store_true", help="Compute `len(Q) == len(T)`") - parser.add_argument("-niter", default=4, type=int, help="Number of iterations to run") - parser.add_argument("-pmin", default=6, type=int, help="Minimum 2^p to use") - parser.add_argument("-pmax", default=27, type=int, help="Maximum 2^p to use") - parser.add_argument("-pdiff", default=100, type=int, help="Maximum deviation from the minimum 2^p allowed") - parser.add_argument("-ignore", default=None, nargs="*", help="Keyword of modules to match and ignore") - parser.add_argument("include", default=None, nargs="*", help="Keyword of modules to match and include") - args = parser.parse_args() - - modules = import_sdp_mods(args.include, args.ignore) - - noheader = args.noheader - timeout = args.timeout - if args.pequal: - skip_p_equal = 0 - else: - skip_p_equal = 1 - n_iter = args.niter - p_min = args.pmin - p_max = args.pmax - p_diff = args.pdiff - - if not noheader: - print(f"module,len_Q,len_T,n_iter,time", flush=True) - - start_timing = time.time() - for mod in modules: - mod_name = mod.__name__.removeprefix("sdp.").removesuffix("_sdp") - for i in range(p_min, p_max + 1): - Q = np.random.rand(2**i) - break_Q = False - for j in range(i + skip_p_equal, min(i + p_diff + 1, p_max + 1)): - T = np.random.rand(2**j) - break_T = False - - mod.setup(Q, T) - - elapsed_times = [] - for _ in range(n_iter): - start = time.time() - mod.sliding_dot_product(Q, T) - diff = time.time() - start - if diff > timeout: - break_T = True - warnings.warn(f"SKIPPED: {mod_name},{len(Q)},{len(T)},{diff})") - break - else: - elapsed_times.append(diff) - - if break_T: - if j == i + 1: - break_Q = True - break - - print( - f"{mod_name},{len(Q)},{len(T)},{len(elapsed_times)},{sum(elapsed_times) / len(elapsed_times)}", - flush=True, - ) - - if break_Q: - warnings.warn(f"SKIPPED: {mod_name},{len(Q)},>{len(T)},{diff})") - break - - elapsed_timing = np.round((time.time() - start_timing) / 60.0, 2) - warnings.warn(f"Test completed in {elapsed_timing} min") + try: + assert mod.setup(Q, T) is None + except AssertionError as e: # pragma: no cover + msg = f"Error in {mod_name}" + print(msg) + raise e + + return diff --git a/test.sh b/test.sh index 7b853ba..de3ad46 100755 --- a/test.sh +++ b/test.sh @@ -1,5 +1,72 @@ #!/bin/bash -rm -rf sdp/__pycache__ -./test.py > timing.csv -rm -rf sdp/__pycache__ +check_errs() +{ + # Function. Parameter 1 is the return code + if [[ $1 -ne "0" && $1 -ne "5" ]]; then + echo "Error: Test execution encountered exit code $1" + # as a bonus, make our script exit with the right error code. + exit $1 + fi +} + +clean_up() +{ + echo "Cleaning Up" + rm -rf "__pycache__/" + rm -rf "sdp/__pycache__/" +} + + +check_black() +{ + echo "Checking Black Code Formatting" + black --check --exclude=".*\.ipynb" --extend-exclude=".venv" --diff ./ + check_errs $? +} + +check_flake() +{ + echo "Checking Flake8 Style Guide Enforcement" + flake8 --extend-exclude=.venv ./ + check_errs $? +} + + +test_unit() +{ + echo "Testing Functions" + SECONDS=0 + pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning -W ignore::UserWarning test.py + check_errs $? + duration=$SECONDS + echo "Elapsed Time: $((duration / 60)) minutes and $((duration % 60)) seconds" +} + +test_coverage() +{ + echo "Disabling Numba JIT and CUDA Compiled Functions" + export NUMBA_DISABLE_JIT=1 + + echo "Testing Code Coverage" + coverage erase + + SECONDS=0 + coverage run --append --source=. -m pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning -W ignore::UserWarning test.py + check_errs $? + duration=$SECONDS + + echo "Elapsed Time: $((duration / 60)) minutes and $((duration % 60)) seconds" + coverage report -m --fail-under=100 --skip-covered --omit=timing.py,utils.py +} + + +clean_up +check_black +check_flake +test_unit + +clean_up +test_coverage + +clean_up \ No newline at end of file diff --git a/timing.py b/timing.py new file mode 100755 index 0000000..701e1c5 --- /dev/null +++ b/timing.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python + +import argparse +import numpy as np +import time +import warnings + +import utils + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="./timing.py -noheader -pmin 6 -pmax 23 -pdiff 3 pyfftw challenger" + ) + parser.add_argument("-noheader", default=False, action="store_true") + parser.add_argument( + "-timeout", + default=5.0, + type=float, + help="Number of seconds to wait for a run before timing out", + ) + parser.add_argument( + "-pequal", default=False, action="store_true", help="Compute `len(Q) == len(T)`" + ) + parser.add_argument( + "-niter", default=4, type=int, help="Number of iterations to run" + ) + parser.add_argument("-pmin", default=6, type=int, help="Minimum 2^p to use") + parser.add_argument("-pmax", default=27, type=int, help="Maximum 2^p to use") + parser.add_argument( + "-pdiff", + default=100, + type=int, + help="Maximum deviation from the minimum 2^p allowed", + ) + parser.add_argument( + "-ignore", + default=None, + nargs="*", + help="Keyword of modules to match and ignore", + ) + parser.add_argument( + "include", + default=None, + nargs="*", + help="Keyword of modules to match and include", + ) + args = parser.parse_args() + + modules = utils.import_sdp_mods(args.include, args.ignore) + + noheader = args.noheader + timeout = args.timeout + if args.pequal: + skip_p_equal = 0 + else: + skip_p_equal = 1 + n_iter = args.niter + p_min = args.pmin + p_max = args.pmax + p_diff = args.pdiff + + if not noheader: + print("module,len_Q,len_T,n_iter,time", flush=True) + + start_timing = time.time() + for mod in modules: + mod_name = mod.__name__.removeprefix("sdp.").removesuffix("_sdp") + for i in range(p_min, p_max + 1): + Q = np.random.rand(2**i) + break_Q = False + for j in range(i + skip_p_equal, min(i + p_diff + 1, p_max + 1)): + T = np.random.rand(2**j) + break_T = False + + mod.setup(Q, T) + + elapsed_times = [] + for _ in range(n_iter): + start = time.time() + mod.sliding_dot_product(Q, T) + diff = time.time() - start + if diff > timeout: + break_T = True + warnings.warn(f"SKIPPED: {mod_name},{len(Q)},{len(T)},{diff})") + break + else: + elapsed_times.append(diff) + + if break_T: + if j == i + 1: + break_Q = True + break + + info = ( + f"{mod_name},{len(Q)},{len(T)},{len(elapsed_times)}" + + f",{sum(elapsed_times) / len(elapsed_times)}" + ) + print(info, flush=True) + + if break_Q: + warnings.warn(f"SKIPPED: {mod_name},{len(Q)},>{len(T)},{diff})") + break + + elapsed_timing = np.round((time.time() - start_timing) / 60.0, 2) + warnings.warn(f"Test completed in {elapsed_timing} min") diff --git a/timing.sh b/timing.sh new file mode 100755 index 0000000..2e125c6 --- /dev/null +++ b/timing.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +rm -rf sdp/__pycache__ +./timing.py > timing.csv +rm -rf sdp/__pycache__ diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..15c9e77 --- /dev/null +++ b/utils.py @@ -0,0 +1,57 @@ +import ast +import importlib +import pkgutil +import warnings + +import sdp + + +def func_exists(mod_path, func_name): + try: + with open(mod_path, "r") as file: + module_content = file.read() + except FileNotFoundError as e: + warnings.warn(f"SKIPPED: {mod_path},{func_name}: \n{e}") + return False # Module file not found + + try: + tree = ast.parse(module_content) + except SyntaxError as e: + warnings.warn(f"SKIPPED: {mod_path},{func_name}: \n{e}") + return False # Syntax error in module + + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and node.name == func_name: + return True + e = f"Function {func_name} not found in {mod_path}" + warnings.warn(f"SKIPPED: {mod_path},{func_name}: \n{e}") + return False + + +def import_sdp_mods(include=None, ignore=None): + mods = [] + for m in sorted(list(pkgutil.iter_modules(sdp.__path__))): + mod_path = f"sdp/{m[1]}.py" + if ( + include is not None + and len(include) + and not any(mod in mod_path for mod in include) + ): + continue + if ( + ignore is not None + and len(ignore) + and any(mod in mod_path for mod in ignore) + ): + continue + + if ( + "sdp" in m[1] + and func_exists(mod_path, "sliding_dot_product") + and func_exists(mod_path, "setup") + ): + mod_name = f"sdp.{m[1]}" + mod = importlib.import_module(mod_name) + mods.append(mod) + + return mods