diff --git a/.flake8 b/.flake8 new file mode 100644 index 000000000..3d9bbfbab --- /dev/null +++ b/.flake8 @@ -0,0 +1,12 @@ +[flake8] +ignore = + # whitespace before ':' - doesn't work well with black + E203 + E402 + # line too long - let black worry about that + E501 + # do not assign a lambda expression, use a def + E731 + # line break before binary operator + W503 + diff --git a/docs/contributing.rst b/docs/contributing.rst index 03ccb5ecc..ee8baf562 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -94,6 +94,11 @@ You can install the necessary requirements using pip:: pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt +Then install the `sgkit` in [editable mode](https://pip.pypa.io/en/stable/cli/pip_install/#editable-installs) + + pip install -e . + + If you have a Nvidia GPU you will need to make sure that it is configured properly, as in you have cudatoolkit installed, the instructions for the same can be found on `nvidia docs. `_ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..be634352c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,213 @@ +[build-system] +requires = ["setuptools >= 41.2", "setuptools_scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "sgkit" +authors = [{ name = "sgkit Developers", email = "project@sgkit.dev" }] +license = { text = "Apache" } +description = "Statistical genetics toolkit" +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering", +] +urls = { Homepage = "https://github.com/sgkit-dev/sgkit" } +requires-python = ">=3.10" +dependencies = [ + "numpy < 2.2", + "xarray < 2025.03.1", + "dask[array,dataframe] >= 2022.01.0, <= 2024.8.0", + "distributed >= 2022.01.0, <= 2024.8.0", + "scipy", + "zarr >= 2.10.0, != 2.11.0, != 2.11.1, != 2.11.2, < 3", + "numba", + "typing-extensions", + "fsspec != 2021.6.*", + "scikit-learn", + "pandas", + "setuptools >= 41.2", # For pkg_resources +] +dynamic = ["version"] + +[project.readme] +text = """ +**sgkit** is an open source project for analyzing and manipulating genetic +variation data.""" +content-type = "text/x-rst" + +[project.optional-dependencies] +# For plink we need dask[dataframe], we already have +# dask[array] in install_requires, and since +# https://github.com/pypa/pip/issues/4957, pip +# will essentially ignore dask[dataframe] in the extras. +# We can workaround this by either adding pip flag +# --use-feature 2020-resolver, or installing +# dask[dataframe] in the install_requires, or just listing +# the 2 missing dependencies from dataframe, the way we do +# here, when pip finally gets a resolver, this won't be +# a problem. Here we opt for listing the 2 dependencies +# since this is the least user invasive solution. +plink = ["partd", "bed-reader"] +bgen = ["rechunker", "cbgen > 1.0.5"] + +[tool.setuptools] +packages = ["sgkit"] +zip-safe = false # https://mypy.readthedocs.io/en/latest/installed_packages.html +include-package-data = true + +[tool.coverage.report] +fail_under = 100 + +[tool.pytest.ini_options] +addopts = "--doctest-modules --ignore=validation --cov-fail-under=100" +norecursedirs = [".eggs", "build", "docs"] +filterwarnings = ["error", "ignore::DeprecationWarning"] + + +[tool.isort] +profile = "black" +default_section = "THIRDPARTY" +known_first_party = ["sgkit"] +known_third_party = [ + "allel", + "dask", + "fire", + "glow", + "hail", + "hypothesis", + "invoke", + "msprime", + "numba", + "numpy", + "pandas", + "pkg_resources", + "pyspark", + "pytest", + "setuptools", + "sgkit_plink", + "sklearn", + "sphinx", + "typing_extensions", + "xarray", + "yaml", + "zarr", +] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +line_length = 88 + +[[tool.mypy.overrides]] +module = ["callee.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["dask.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["fsspec.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["dask_ml.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["numpy.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["pandas.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["numba.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["pytest.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["statsmodels.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["hypothesis.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["zarr.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["numcodecs.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["setuptools"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["sklearn.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["cbgen.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["rechunker.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["bed_reader.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["sphinx.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["yarl.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["allel.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["networkx.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["toolz.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["scipy.*"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["sgkit.*"] +allow_redefinition = true + +[[tool.mypy.overrides]] +module = ["sgkit.*.tests.*"] +disallow_untyped_calls = false +disallow_untyped_defs = false +disallow_untyped_decorators = false + +[[tool.mypy.overrides]] +module = ["validation.*"] +ignore_errors = true diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 411c7febf..000000000 --- a/setup.cfg +++ /dev/null @@ -1,152 +0,0 @@ -[metadata] -name = sgkit -author = sgkit Developers -author_email = project@sgkit.dev -license = Apache -description = Statistical genetics toolkit -long_description_content_type=text/x-rst -long_description = - **sgkit** is an open source project for analyzing and manipulating genetic - variation data. -url = https://github.com/sgkit-dev/sgkit -classifiers = - Development Status :: 3 - Alpha - License :: OSI Approved :: Apache Software License - Operating System :: OS Independent - Intended Audience :: Science/Research - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.12 - Topic :: Scientific/Engineering - -[options] -packages = sgkit -zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html -include_package_data = True -python_requires = >=3.10 -install_requires = - numpy < 2.2 - xarray < 2025.03.1 - dask[array,dataframe] >= 2022.01.0, <= 2024.8.0 - distributed >= 2022.01.0, <= 2024.8.0 - scipy - zarr >= 2.10.0, != 2.11.0, != 2.11.1, != 2.11.2, < 3 - numba - typing-extensions - fsspec != 2021.6.* - scikit-learn - pandas - setuptools >= 41.2 # For pkg_resources -setup_requires = - setuptools >= 41.2 - setuptools_scm - -[options.extras_require] -# For plink we need dask[dataframe], we already have -# dask[array] in install_requires, and since -# https://github.com/pypa/pip/issues/4957, pip -# will essentially ignore dask[dataframe] in the extras. -# We can workaround this by either adding pip flag -# --use-feature 2020-resolver, or installing -# dask[dataframe] in the install_requires, or just listing -# the 2 missing dependencies from dataframe, the way we do -# here, when pip finally gets a resolver, this won't be -# a problem. Here we opt for listing the 2 dependencies -# since this is the least user invasive solution. -plink = - partd - bed-reader -bgen = - rechunker - cbgen > 1.0.5 - -[coverage:report] -fail_under = 100 - -[tool:pytest] -addopts = --doctest-modules --ignore=validation --cov-fail-under=100 -norecursedirs = .eggs build docs -filterwarnings = - error - ignore::DeprecationWarning - -[flake8] -ignore = - # whitespace before ':' - doesn't work well with black - E203 - E402 - # line too long - let black worry about that - E501 - # do not assign a lambda expression, use a def - E731 - # line break before binary operator - W503 - -[isort] -profile = black -default_section = THIRDPARTY -known_first_party = sgkit -known_third_party = allel,dask,fire,glow,hail,hypothesis,invoke,msprime,numba,numpy,pandas,pkg_resources,pyspark,pytest,setuptools,sgkit_plink,sklearn,sphinx,typing_extensions,xarray,yaml,zarr -multi_line_output = 3 -include_trailing_comma = True -force_grid_wrap = 0 -use_parentheses = True -line_length = 88 - - -[mypy-callee.*] -ignore_missing_imports = True -[mypy-dask.*] -ignore_missing_imports = True -[mypy-fsspec.*] -ignore_missing_imports = True -[mypy-dask_ml.*] -ignore_missing_imports = True -[mypy-numpy.*] -ignore_missing_imports = True -[mypy-pandas.*] -ignore_missing_imports = True -[mypy-numba.*] -ignore_missing_imports = True -[mypy-pytest.*] -ignore_missing_imports = True -[mypy-statsmodels.*] -ignore_missing_imports = True -[mypy-hypothesis.*] -ignore_missing_imports = True -[mypy-zarr.*] -ignore_missing_imports = True -[mypy-numcodecs.*] -ignore_missing_imports = True -[mypy-setuptools] -ignore_missing_imports = True -[mypy-sklearn.*] -ignore_missing_imports = True -[mypy-cbgen.*] -ignore_missing_imports = True -[mypy-rechunker.*] -ignore_missing_imports = True -[mypy-bed_reader.*] -ignore_missing_imports = True -[mypy-sphinx.*] -ignore_missing_imports = True -[mypy-yarl.*] -ignore_missing_imports = True -[mypy-allel.*] -ignore_missing_imports = True -[mypy-networkx.*] -ignore_missing_imports = True -[mypy-toolz.*] -ignore_missing_imports = True -[mypy-scipy.*] -ignore_missing_imports = True -[mypy-sgkit.*] -allow_redefinition = True -[mypy-sgkit.*.tests.*] -disallow_untyped_calls = False -disallow_untyped_defs = False -disallow_untyped_decorators = False -[mypy-validation.*] -ignore_errors = True diff --git a/setup.py b/setup.py deleted file mode 100644 index 42c7d6404..000000000 --- a/setup.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env python -from setuptools import setup - -setup( - # The package name along with all the other metadata is specified in setup.cfg - # However, GitHub's dependency graph can't see the package unless we put this here. - name="sgkit", - use_scm_version=True, -) diff --git a/sgkit/stats/hwe.py b/sgkit/stats/hwe.py index 3dbfae8a6..cd42f25d3 100644 --- a/sgkit/stats/hwe.py +++ b/sgkit/stats/hwe.py @@ -137,7 +137,7 @@ def hardy_weinberg_test( genotype_count: Optional[Hashable] = variables.variant_genotype_count, ploidy: Optional[int] = None, alleles: Optional[int] = None, - merge: bool = True + merge: bool = True, ) -> Dataset: """Exact test for HWE as described in Wigginton et al. 2005 [1]. diff --git a/sgkit/stats/pc_relate.py b/sgkit/stats/pc_relate.py index 994fa6feb..c8a35dcde 100644 --- a/sgkit/stats/pc_relate.py +++ b/sgkit/stats/pc_relate.py @@ -39,7 +39,7 @@ def pc_relate( call_genotype: Hashable = variables.call_genotype, call_genotype_mask: Hashable = variables.call_genotype_mask, sample_pc: Hashable = variables.sample_pca_projection, - merge: bool = True + merge: bool = True, ) -> xr.Dataset: """Compute PC-Relate as described in Conomos, et al. 2016 [1].