Skip to content

Commit aece28f

Browse files
Revert to Zstd as ICF encoder.
1 parent f04ba58 commit aece28f

File tree

4 files changed

+20
-7
lines changed

4 files changed

+20
-7
lines changed

bio2zarr/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def list_commands(self, ctx):
7575
"--compressor",
7676
type=click.Choice(["lz4", "zstd"]),
7777
default=None,
78-
help="Codec to use for compressing column chunks",
78+
help="Codec to use for compressing column chunks (Default=zstd)."
7979
)
8080

8181
# Note: -l and -w were chosen when these were called "width" and "length".

bio2zarr/vcf.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class VcfPartition:
151151

152152
ICF_METADATA_FORMAT_VERSION = "0.2"
153153
ICF_DEFAULT_COMPRESSOR = numcodecs.Blosc(
154-
cname="lz4", clevel=7, shuffle=numcodecs.Blosc.NOSHUFFLE
154+
cname="zstd", clevel=7, shuffle=numcodecs.Blosc.NOSHUFFLE
155155
)
156156

157157

@@ -1505,7 +1505,9 @@ def __init__(self, path, icf, schema, dimension_separator=None):
15051505
self.schema = schema
15061506
store = zarr.DirectoryStore(self.path)
15071507
# Default to using nested directories following the Zarr v3 default.
1508-
self.dimension_separator = "/" if dimension_separator is None else dimension_separator
1508+
self.dimension_separator = (
1509+
"/" if dimension_separator is None else dimension_separator
1510+
)
15091511
self.root = zarr.group(store=store)
15101512

15111513
def init_array(self, variable):
@@ -1521,7 +1523,7 @@ def init_array(self, variable):
15211523
compressor=numcodecs.get_codec(variable.compressor),
15221524
filters=[numcodecs.get_codec(filt) for filt in variable.filters],
15231525
object_codec=object_codec,
1524-
dimension_separator=self.dimension_separator
1526+
dimension_separator=self.dimension_separator,
15251527
)
15261528
# Dimension names are part of the spec in Zarr v3
15271529
a.attrs["_ARRAY_DIMENSIONS"] = variable.dimensions

tests/test_icf.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,16 @@ def test_compressor_explode(self, tmp_path, compressor):
275275
icf = self.run_explode(tmp_path, compressor=compressor)
276276
assert icf.metadata.compressor == compressor.get_config()
277277

278+
def test_default_compressor_explode(self, tmp_path):
279+
icf = self.run_explode(tmp_path)
280+
assert icf.metadata.compressor == vcf.ICF_DEFAULT_COMPRESSOR.get_config()
281+
assert icf.metadata.compressor["cname"] == "zstd"
282+
283+
def test_default_compressor_dexplode(self, tmp_path):
284+
icf = self.run_dexplode(tmp_path)
285+
assert icf.metadata.compressor == vcf.ICF_DEFAULT_COMPRESSOR.get_config()
286+
assert icf.metadata.compressor["cname"] == "zstd"
287+
278288
@pytest.mark.parametrize(
279289
"compressor",
280290
[
@@ -340,7 +350,8 @@ def test_empty_chunk_file(self, tmp_path):
340350
with pytest.raises(RuntimeError, match="blosc"):
341351
icf["POS"].values
342352

343-
@pytest.mark.parametrize("length", [10, 100, 200, 210])
353+
# Chunk file is 195 long
354+
@pytest.mark.parametrize("length", [10, 100, 190, 194])
344355
def test_truncated_chunk_file(self, tmp_path, length):
345356
icf_path = tmp_path / "icf"
346357
vcf.explode(icf_path, [self.data_path])
@@ -359,7 +370,7 @@ def test_chunk_incorrect_length(self, tmp_path):
359370
icf_path = tmp_path / "icf"
360371
vcf.explode(icf_path, [self.data_path])
361372
chunk_file = icf_path / "POS" / "p0" / "2"
362-
compressor = numcodecs.Blosc(cname="lz4")
373+
compressor = numcodecs.Blosc(cname="zstd")
363374
with open(chunk_file, "rb") as f:
364375
pkl = compressor.decode(f.read())
365376
x = pickle.loads(pkl)

tests/test_vcf_examples.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ def test_split_explode(tmp_path):
841841
pcvcf = vcf.IntermediateColumnarFormat(out)
842842
assert pcvcf.columns["POS"].vcf_field.summary.asdict() == {
843843
"num_chunks": 3,
844-
"compressed_size": 630,
844+
"compressed_size": 587,
845845
"uncompressed_size": 1008,
846846
"max_number": 1,
847847
"max_value": 1235237,

0 commit comments

Comments
 (0)