Skip to content

Commit 00140b0

Browse files
committed
Remove redundant size from INT32_{FILL,MISSING}
1 parent 0e20acb commit 00140b0

File tree

5 files changed

+15
-15
lines changed

5 files changed

+15
-15
lines changed

sgkit/io/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ..typing import ArrayLike, DType
99
from ..utils import encode_array, max_str_len
1010

11-
INT32_MISSING, INT32_FILL = -1, -2
11+
INT_MISSING, INT_FILL = -1, -2
1212

1313
FLOAT32_MISSING, FLOAT32_FILL = np.array([0x7F800001, 0x7F800002], dtype=np.int32).view(
1414
np.float32

sgkit/io/vcf/vcf_reader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
from sgkit import variables
2727
from sgkit.io.dataset import load_dataset
28-
from sgkit.io.utils import FLOAT32_FILL, INT32_FILL, INT32_MISSING, STR_FILL
28+
from sgkit.io.utils import FLOAT32_FILL, INT_FILL, INT_MISSING, STR_FILL
2929
from sgkit.io.vcf import partition_into_regions
3030
from sgkit.io.vcf.utils import build_url, chunks, temporary_directory, url_filename
3131
from sgkit.io.vcfzarr_reader import (
@@ -136,7 +136,7 @@ def _vcf_type_to_numpy_type_and_fill_value(
136136
if vcf_type == "Flag":
137137
return "bool", False
138138
elif vcf_type == "Integer":
139-
return "i4", INT32_FILL
139+
return "i4", INT_FILL
140140
# the VCF spec defines Float as 32 bit, and in BCF is stored as 32 bit
141141
elif vcf_type == "Float":
142142
return "f4", FLOAT32_FILL
@@ -265,7 +265,7 @@ def update_dataset(self, ds: xr.Dataset) -> None:
265265
# cyvcf2 represents missing Integer values as the minimum int32 value
266266
# so change these to be the missing value
267267
if self.array.dtype == np.int32:
268-
self.array[self.array == np.iinfo(np.int32).min] = INT32_MISSING
268+
self.array[self.array == np.iinfo(np.int32).min] = INT_MISSING
269269

270270
ds[self.variable_name] = (self.dims, self.array)
271271
if len(self.description) > 0:

sgkit/io/vcfzarr_reader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from fsspec import get_mapper
2525
from scipy.special import comb
2626

27-
from sgkit.io.utils import INT32_FILL, concatenate_and_rechunk, str_is_int
27+
from sgkit.io.utils import INT_FILL, concatenate_and_rechunk, str_is_int
2828

2929
from ..model import DIM_SAMPLE, DIM_VARIANT, create_genotype_call_dataset
3030
from ..typing import ArrayLike, PathType
@@ -300,7 +300,7 @@ def _add_field_to_dataset(
300300

301301
def _replace_fill_values(arr: ArrayLike) -> ArrayLike:
302302
if arr.dtype == np.int32:
303-
arr[arr == -1] = INT32_FILL
303+
arr[arr == -1] = INT_FILL
304304
return arr
305305

306306

sgkit/tests/io/vcf/test_vcf_reader.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from numpy.testing import assert_allclose, assert_array_equal
99

1010
from sgkit import load_dataset
11-
from sgkit.io.utils import FLOAT32_FILL, INT32_FILL, INT32_MISSING
11+
from sgkit.io.utils import FLOAT32_FILL, INT_FILL, INT_MISSING
1212
from sgkit.io.vcf import (
1313
MaxAltAllelesExceededWarning,
1414
partition_into_regions,
@@ -751,7 +751,7 @@ def test_vcf_to_zarr__fields(shared_datadir, tmp_path):
751751
)
752752
ds = xr.open_zarr(output)
753753

754-
missing, fill = INT32_MISSING, INT32_FILL
754+
missing, fill = INT_MISSING, INT_FILL
755755
assert_array_equal(ds["variant_DP"], [fill, fill, 14, 11, 10, 13, 9, fill, fill])
756756
assert ds["variant_DP"].attrs["comment"] == "Total Depth"
757757

@@ -832,7 +832,7 @@ def test_vcf_to_zarr__field_defs(shared_datadir, tmp_path):
832832
)
833833
ds = xr.open_zarr(output)
834834

835-
fill = INT32_FILL
835+
fill = INT_FILL
836836
assert_array_equal(ds["variant_DP"], [fill, fill, 14, 11, 10, 13, 9, fill, fill])
837837
assert ds["variant_DP"].attrs["comment"] == "Combined depth across samples"
838838

@@ -862,7 +862,7 @@ def test_vcf_to_zarr__field_number_A(shared_datadir, tmp_path):
862862
)
863863
ds = xr.open_zarr(output)
864864

865-
fill = INT32_FILL
865+
fill = INT_FILL
866866
assert_array_equal(
867867
ds["variant_AC"],
868868
[
@@ -901,7 +901,7 @@ def test_vcf_to_zarr__field_number_R(shared_datadir, tmp_path):
901901
variants=slice(10002764, 10002793)
902902
)
903903

904-
fill = INT32_FILL
904+
fill = INT_FILL
905905
ad = np.array(
906906
[
907907
[[40, 14, 0, fill]],
@@ -930,7 +930,7 @@ def test_vcf_to_zarr__field_number_G(shared_datadir, tmp_path):
930930
variants=slice(10002764, 10002793)
931931
)
932932

933-
fill = INT32_FILL
933+
fill = INT_FILL
934934
pl = np.array(
935935
[
936936
[[319, 0, 1316, 440, 1358, 1798, fill, fill, fill, fill]],
@@ -973,7 +973,7 @@ def test_vcf_to_zarr__field_number_fixed(shared_datadir, tmp_path):
973973
)
974974
ds = xr.open_zarr(output)
975975

976-
missing, fill = INT32_MISSING, INT32_FILL
976+
missing, fill = INT_MISSING, INT_FILL
977977
assert_array_equal(
978978
ds["call_HQ"],
979979
[

sgkit/tests/io/vcf/test_vcf_roundtrip.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from xarray import Dataset
3030

3131
import sgkit as sg
32-
from sgkit.io.utils import INT32_FILL, INT32_MISSING
32+
from sgkit.io.utils import INT_FILL, INT_MISSING
3333
from sgkit.io.vcf import vcf_to_zarr
3434

3535

@@ -78,7 +78,7 @@ def fix_missing_fields(ds: Dataset) -> Dataset:
7878
# scikit-allel doesn't distinguish between missing and fill fields, so set all to fill
7979
for var in ds.data_vars:
8080
if ds[var].dtype == np.int32: # type: ignore[comparison-overlap]
81-
ds[var] = ds[var].where(ds[var] != INT32_MISSING, INT32_FILL)
81+
ds[var] = ds[var].where(ds[var] != INT_MISSING, INT_FILL)
8282
return ds
8383

8484

0 commit comments

Comments
 (0)