Skip to content

Commit 97578d1

Browse files
tomwhitejeromekelleher
authored andcommitted
Remove index creation as it is now done by bio2zarr
1 parent 598c441 commit 97578d1

File tree

5 files changed

+7
-53
lines changed

5 files changed

+7
-53
lines changed

tests/test_stats.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from io import StringIO
33

44
import pytest
5+
import zarr
56
from bio2zarr import vcf2zarr
67

78
from vcztools.stats import nrecords, stats
@@ -40,5 +41,9 @@ def test_stats__no_index(tmp_path):
4041
vcz = tmp_path.joinpath("intermediate.vcz")
4142
vcf2zarr.convert([original], vcz, worker_processes=0, local_alleles=False)
4243

44+
# delete the index created by vcf2zarr
45+
root = zarr.open(vcz, mode="a")
46+
del root["region_index"]
47+
4348
with pytest.raises(ValueError, match="Could not load 'region_index' variable."):
4449
stats(vcz, StringIO())

tests/utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
from bio2zarr import vcf2zarr
99

10-
from vcztools.regions import create_index
11-
1210

1311
@contextmanager
1412
def open_vcf(path) -> Iterator[cyvcf2.VCF]:
@@ -152,5 +150,4 @@ def vcz_path_cache(vcf_path):
152150
vcf2zarr.convert(
153151
[vcf_path], cached_vcz_path, worker_processes=0, local_alleles=False
154152
)
155-
create_index(cached_vcz_path)
156153
return cached_vcz_path

vcztools/cli.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import click
44

55
from . import query as query_module
6-
from . import regions, vcf_writer
76
from . import stats as stats_module
7+
from . import vcf_writer
88

99
include = click.option(
1010
"-i", "--include", type=str, help="Filter expression to include variant sites."
@@ -42,8 +42,6 @@ def index(path, nrecords, stats):
4242
stats_module.nrecords(path, sys.stdout)
4343
elif stats:
4444
stats_module.stats(path, sys.stdout)
45-
else:
46-
regions.create_index(path)
4745

4846

4947
@click.command

vcztools/regions.py

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,11 @@
11
import re
22
from typing import Any
33

4-
import numcodecs
54
import numpy as np
65
import pandas as pd
7-
import zarr
86
from pyranges import PyRanges
97

108

11-
def create_index(vcz) -> None:
12-
"""Create an index to support efficient region queries."""
13-
14-
root = zarr.open(vcz, mode="r+")
15-
16-
contig = root["variant_contig"]
17-
pos = root["variant_position"]
18-
length = root["variant_length"]
19-
20-
assert contig.cdata_shape == pos.cdata_shape
21-
22-
index = []
23-
24-
for v_chunk in range(pos.cdata_shape[0]):
25-
c = contig.blocks[v_chunk]
26-
p = pos.blocks[v_chunk]
27-
e = p + length.blocks[v_chunk] - 1
28-
29-
# create a row for each contig in the chunk
30-
d = np.diff(c, append=-1)
31-
c_start_idx = 0
32-
for c_end_idx in np.nonzero(d)[0]:
33-
assert c[c_start_idx] == c[c_end_idx]
34-
index.append(
35-
(
36-
v_chunk, # chunk index
37-
c[c_start_idx], # contig ID
38-
p[c_start_idx], # start
39-
p[c_end_idx], # end
40-
np.max(e[c_start_idx : c_end_idx + 1]), # max end
41-
c_end_idx - c_start_idx + 1, # num records
42-
)
43-
)
44-
c_start_idx = c_end_idx + 1
45-
46-
index = np.array(index, dtype=np.int32)
47-
root.array(
48-
"region_index",
49-
data=index,
50-
compressor=numcodecs.Blosc("zstd", clevel=9, shuffle=0),
51-
overwrite=True,
52-
)
53-
54-
559
def parse_region_string(region: str) -> tuple[str, int | None, int | None]:
5610
"""Return the contig, start position and end position from a region string."""
5711
if re.search(r":\d+-\d*$", region):

vcztools/stats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def stats(vcz, output):
1818
if "region_index" not in root:
1919
raise ValueError(
2020
"Could not load 'region_index' variable. "
21-
"Use 'vcztools index' to create an index."
21+
"Use 'vcz2zarr' to create an index."
2222
)
2323

2424
with open_file_like(output) as output:

0 commit comments

Comments
 (0)