Skip to content

Commit 9b3ca36

Browse files
committed
Deprecate functions for reading VCF
1 parent 155fbb7 commit 9b3ca36

File tree

5 files changed

+71
-0
lines changed

5 files changed

+71
-0
lines changed

docs/api.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ PLINK
3535
VCF (reading)
3636
-------------
3737

38+
.. deprecated:: 0.9.0
39+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
40+
3841
.. currentmodule:: sgkit.io.vcf
3942
.. autosummary::
4043
:toctree: generated/

docs/vcf.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
Reading VCF
44
===========
55

6+
.. deprecated:: 0.9.0
7+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
8+
69
.. contents:: Table of contents:
710
:local:
811

sgkit/io/vcf/vcf_partition.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from typing import Any, Dict, Optional, Sequence, Union
23

34
import dask
@@ -78,6 +79,9 @@ def partition_into_regions(
7879
"""
7980
Calculate genomic region strings to partition a compressed VCF or BCF file into roughly equal parts.
8081
82+
.. deprecated:: 0.9.0
83+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
84+
8185
A ``.tbi`` or ``.csi`` file is used to find BGZF boundaries in the compressed VCF file, which are then
8286
used to divide the file into parts.
8387
@@ -118,6 +122,13 @@ def partition_into_regions(
118122
ValueError
119123
If either of ``num_parts`` or ``target_part_size`` is not a positive integer.
120124
"""
125+
126+
warnings.warn(
127+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
128+
DeprecationWarning,
129+
stacklevel=2,
130+
)
131+
121132
if num_parts is None and target_part_size is None:
122133
raise ValueError("One of num_parts or target_part_size must be specified")
123134

sgkit/io/vcf/vcf_reader.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,9 @@ def vcf_to_zarrs(
679679
) -> Sequence[str]:
680680
"""Convert VCF files to multiple Zarr on-disk stores, one per region.
681681
682+
.. deprecated:: 0.9.0
683+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
684+
682685
Parameters
683686
----------
684687
input
@@ -754,6 +757,12 @@ def vcf_to_zarrs(
754757
A list of URLs to the Zarr outputs.
755758
"""
756759

760+
warnings.warn(
761+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
762+
DeprecationWarning,
763+
stacklevel=2,
764+
)
765+
757766
output_storage_options = output_storage_options or {}
758767

759768
tasks = []
@@ -798,6 +807,9 @@ def concat_zarrs(
798807
) -> None:
799808
"""Concatenate multiple Zarr stores into a single Zarr store.
800809
810+
.. deprecated:: 0.9.0
811+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
812+
801813
The Zarr stores are concatenated and rechunked to produce a single combined store.
802814
803815
Parameters
@@ -814,6 +826,12 @@ def concat_zarrs(
814826
the chunk length of the first input Zarr store is used.
815827
"""
816828

829+
warnings.warn(
830+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
831+
DeprecationWarning,
832+
stacklevel=2,
833+
)
834+
817835
vars_to_rechunk = []
818836
vars_to_copy = []
819837
storage_options = storage_options or {}
@@ -856,6 +874,9 @@ def vcf_to_zarr(
856874
) -> None:
857875
"""Convert VCF files to a single Zarr on-disk store.
858876
877+
.. deprecated:: 0.9.0
878+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
879+
859880
By default, the conversion is carried out in parallel, by writing the output for each
860881
part to a separate, intermediate Zarr store in ``tempdir``. Then, in a second step
861882
the intermediate outputs are concatenated and rechunked into the final output Zarr
@@ -955,6 +976,12 @@ def vcf_to_zarr(
955976
so for large VCF files this can be slow.
956977
"""
957978

979+
warnings.warn(
980+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
981+
DeprecationWarning,
982+
stacklevel=2,
983+
)
984+
958985
if temp_chunk_length is not None:
959986
if chunk_length % temp_chunk_length != 0:
960987
raise ValueError(
@@ -1039,6 +1066,9 @@ def read_vcf(
10391066
) -> xr.Dataset:
10401067
"""Read VCF dataset.
10411068
1069+
.. deprecated:: 0.9.0
1070+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
1071+
10421072
A convenience for :func:`vcf_to_zarr` followed by :func:`sgkit.load_dataset`.
10431073
Note that the output Zarr store in ``tempdir`` is not deleted after this function
10441074
returns, so must be deleted manually by the user.
@@ -1119,6 +1149,12 @@ def read_vcf(
11191149
11201150
"""
11211151

1152+
warnings.warn(
1153+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
1154+
DeprecationWarning,
1155+
stacklevel=2,
1156+
)
1157+
11221158
# Need to retain zarr file backing the returned dataset
11231159
with temporary_directory(
11241160
prefix="read_vcf_",
@@ -1166,6 +1202,9 @@ def zarr_array_sizes(
11661202
) -> Dict[str, Any]:
11671203
"""Make a pass through a VCF/BCF file to determine sizes for storage in Zarr.
11681204
1205+
.. deprecated:: 0.9.0
1206+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
1207+
11691208
By default, the input is processed in parts in parallel. However, if the input
11701209
is a single file, ``target_part_size`` is None, and ``regions`` is None,
11711210
then the operation will be carried out sequentially.
@@ -1188,6 +1227,12 @@ def zarr_array_sizes(
11881227
are not None.
11891228
"""
11901229

1230+
warnings.warn(
1231+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
1232+
DeprecationWarning,
1233+
stacklevel=2,
1234+
)
1235+
11911236
return process_vcfs(
11921237
input,
11931238
zarr_array_sizes_sequential,

sgkit/io/vcfzarr_reader.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ def read_scikit_allel_vcfzarr(
4949
) -> xr.Dataset:
5050
"""Read a VCF Zarr file created using scikit-allel.
5151
52+
.. deprecated:: 0.9.0
53+
Functions for reading VCF are deprecated, please use the `bio2zarr <https://github.com/sgkit-dev/bio2zarr>`_ package.
54+
5255
Loads VCF variant, sample, and genotype data as Dask arrays within a Dataset
5356
from a Zarr file created using scikit-allel's ``vcf_to_zarr`` function.
5457
@@ -90,6 +93,12 @@ def read_scikit_allel_vcfzarr(
9093
- :data:`sgkit.variables.call_genotype_mask_spec` (variants, samples, ploidy)
9194
"""
9295

96+
warnings.warn(
97+
"Functions for reading VCF are deprecated, please use the bio2zarr package.",
98+
DeprecationWarning,
99+
stacklevel=2,
100+
)
101+
93102
vcfzarr = zarr.open_group(str(path), mode="r")
94103

95104
# don't fix strings since it requires a pass over the whole dataset

0 commit comments

Comments
 (0)