Skip to content

Commit 0e20acb

Browse files
committed
Use small negative sentinels for int
1 parent 217fd16 commit 0e20acb

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

sgkit/io/utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,7 @@
88
from ..typing import ArrayLike, DType
99
from ..utils import encode_array, max_str_len
1010

11-
# These values are based on BCF conventions
12-
INT32_MISSING, INT32_FILL = np.array(
13-
# Equivalent to 0x80000000, 0x80000001
14-
[np.iinfo(np.int32).min, np.iinfo(np.int32).min + 1],
15-
dtype=np.int32,
16-
)
11+
INT32_MISSING, INT32_FILL = -1, -2
1712

1813
FLOAT32_MISSING, FLOAT32_FILL = np.array([0x7F800001, 0x7F800002], dtype=np.int32).view(
1914
np.float32

sgkit/io/vcf/vcf_reader.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
from sgkit import variables
2727
from sgkit.io.dataset import load_dataset
28-
from sgkit.io.utils import FLOAT32_FILL, INT32_FILL, STR_FILL
28+
from sgkit.io.utils import FLOAT32_FILL, INT32_FILL, INT32_MISSING, STR_FILL
2929
from sgkit.io.vcf import partition_into_regions
3030
from sgkit.io.vcf.utils import build_url, chunks, temporary_directory, url_filename
3131
from sgkit.io.vcfzarr_reader import (
@@ -262,6 +262,11 @@ def truncate_array(self, length: int) -> None:
262262
self.array = self.array[:length]
263263

264264
def update_dataset(self, ds: xr.Dataset) -> None:
265+
# cyvcf2 represents missing Integer values as the minimum int32 value
266+
# so change these to be the missing value
267+
if self.array.dtype == np.int32:
268+
self.array[self.array == np.iinfo(np.int32).min] = INT32_MISSING
269+
265270
ds[self.variable_name] = (self.dims, self.array)
266271
if len(self.description) > 0:
267272
ds[self.variable_name].attrs["comment"] = self.description

0 commit comments

Comments
 (0)