Skip to content

Commit 0fd66c2

Browse files
Switch to / dimension separator by default
Aligning with V3 defaults. Seems to require zarr >= 2.17
1 parent 6b682a6 commit 0fd66c2

File tree

4 files changed

+31
-8
lines changed

4 files changed

+31
-8
lines changed

bio2zarr/vcf.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,15 +1512,13 @@ def __init__(self, path, icf, schema, dimension_separator=None):
15121512
self.path = pathlib.Path(path)
15131513
self.icf = icf
15141514
self.schema = schema
1515-
store = zarr.DirectoryStore(self.path)
15161515
# Default to using nested directories following the Zarr v3 default.
1517-
self.dimension_separator = (
1518-
"/" if dimension_separator is None else dimension_separator
1519-
)
1516+
# This seems to require version 2.17+ to work properly
1517+
self.dimension_separator = "/" if dimension_separator is None else dimension_separator
1518+
store = zarr.DirectoryStore(self.path)
15201519
self.root = zarr.group(store=store)
15211520

15221521
def init_array(self, variable):
1523-
# print("CREATE", variable)
15241522
object_codec = None
15251523
if variable.dtype == "O":
15261524
object_codec = numcodecs.VLenUTF8()
@@ -1865,6 +1863,7 @@ def encode(
18651863
variants_chunk_size=None,
18661864
samples_chunk_size=None,
18671865
max_v_chunks=None,
1866+
dimension_separator=None,
18681867
max_memory=None,
18691868
worker_processes=1,
18701869
show_progress=False,
@@ -1888,7 +1887,7 @@ def encode(
18881887
if zarr_path.exists():
18891888
logger.warning(f"Deleting existing {zarr_path}")
18901889
shutil.rmtree(zarr_path)
1891-
vzw = VcfZarrWriter(zarr_path, icf, schema)
1890+
vzw = VcfZarrWriter(zarr_path, icf, schema, dimension_separator=dimension_separator)
18921891
vzw.init()
18931892
vzw.encode(
18941893
max_v_chunks=max_v_chunks,

requirements/development.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ pytest-coverage
88
pytest-xdist
99
sgkit
1010
tabulate
11-
tqdm
11+
tqdm
12+
zarr>=2.17

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ include_package_data = True
2828
python_requires = >=3.9
2929
install_requires =
3030
numpy
31-
zarr >= 2.10.0, != 2.11.0, != 2.11.1, != 2.11.2
31+
zarr >= 2.17
3232
click
3333
tabulate
3434
tqdm

tests/test_vcf.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,29 @@ def test_exploded_metadata_mismatch(self, tmpdir, icf_path, version):
9999
vcf.IcfMetadata.fromdict(d)
100100

101101

102+
class TestEncodeDimensionSeparator:
103+
104+
@pytest.mark.parametrize("dimension_separator", [None, "/"])
105+
def test_directories(self, tmp_path, icf_path, dimension_separator):
106+
zarr_path = tmp_path / "zarr"
107+
vcf.encode(icf_path, zarr_path, dimension_separator=dimension_separator)
108+
# print(zarr_path)
109+
chunk_file = zarr_path / "call_genotype" / "0" / "0" / "0"
110+
assert chunk_file.exists()
111+
112+
def test_files(self, tmp_path, icf_path):
113+
zarr_path = tmp_path / "zarr"
114+
vcf.encode(icf_path, zarr_path, dimension_separator=".")
115+
chunk_file = zarr_path / "call_genotype" / "0.0.0"
116+
assert chunk_file.exists()
117+
118+
@pytest.mark.parametrize("dimension_separator", ["\\", "X", []])
119+
def test_bad_value(self, tmp_path, icf_path, dimension_separator):
120+
zarr_path = tmp_path / "zarr"
121+
with pytest.raises(ValueError):
122+
vcf.encode(icf_path, zarr_path, dimension_separator=dimension_separator)
123+
124+
102125
class TestDefaultSchema:
103126
def test_format_version(self, schema):
104127
assert schema["format_version"] == vcf.ZARR_SCHEMA_FORMAT_VERSION

0 commit comments

Comments
 (0)