Skip to content

Commit ebfbbf0

Browse files
benjefferyjeromekelleher
authored andcommitted
vcf_field -> source
1 parent 9afe435 commit ebfbbf0

File tree

4 files changed

+17
-25
lines changed

4 files changed

+17
-25
lines changed

bio2zarr/icf.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,6 @@ def convert_local_allele_field_types(fields):
844844
dimensions = gt.dimensions[:-1]
845845

846846
la = vcz.ZarrArraySpec.new(
847-
vcf_field=None,
848847
name="call_LA",
849848
dtype="i1",
850849
shape=gt.shape,
@@ -859,7 +858,7 @@ def convert_local_allele_field_types(fields):
859858
if ad is not None:
860859
# TODO check if call_LAD is in the list already
861860
ad.name = "call_LAD"
862-
ad.vcf_field = None
861+
ad.source = None
863862
ad.shape = (*shape, 2)
864863
ad.chunks = (*chunks, 2)
865864
ad.dimensions = (*dimensions, "local_alleles")
@@ -869,7 +868,7 @@ def convert_local_allele_field_types(fields):
869868
if pl is not None:
870869
# TODO check if call_LPL is in the list already
871870
pl.name = "call_LPL"
872-
pl.vcf_field = None
871+
pl.source = None
873872
pl.shape = (*shape, 3)
874873
pl.chunks = (*chunks, 3)
875874
pl.description += " (local-alleles)"
@@ -1060,13 +1059,13 @@ def spec_from_field(field, array_name=None):
10601059
def fixed_field_spec(
10611060
name,
10621061
dtype,
1063-
vcf_field=None,
1062+
source=None,
10641063
shape=(m,),
10651064
dimensions=("variants",),
10661065
chunks=None,
10671066
):
10681067
return vcz.ZarrArraySpec.new(
1069-
vcf_field=vcf_field,
1068+
source=source,
10701069
name=name,
10711070
dtype=dtype,
10721071
shape=shape,
@@ -1137,7 +1136,6 @@ def fixed_field_spec(
11371136
dimensions = ["variants", "samples"]
11381137
array_specs.append(
11391138
vcz.ZarrArraySpec.new(
1140-
vcf_field=None,
11411139
name="call_genotype_phased",
11421140
dtype="bool",
11431141
shape=list(shape),
@@ -1151,7 +1149,6 @@ def fixed_field_spec(
11511149
dimensions += ["ploidy"]
11521150
array_specs.append(
11531151
vcz.ZarrArraySpec.new(
1154-
vcf_field=None,
11551152
name="call_genotype",
11561153
dtype=gt_field.smallest_dtype(),
11571154
shape=list(shape),
@@ -1162,7 +1159,6 @@ def fixed_field_spec(
11621159
)
11631160
array_specs.append(
11641161
vcz.ZarrArraySpec.new(
1165-
vcf_field=None,
11661162
name="call_genotype_mask",
11671163
dtype="bool",
11681164
shape=list(shape),

bio2zarr/plink.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def generate_schema(
8383

8484
array_specs = [
8585
vcz.ZarrArraySpec.new(
86-
vcf_field="position",
86+
source="position",
8787
name="variant_position",
8888
dtype="i4",
8989
shape=[m],
@@ -92,7 +92,6 @@ def generate_schema(
9292
description=None,
9393
),
9494
vcz.ZarrArraySpec.new(
95-
vcf_field=None,
9695
name="variant_allele",
9796
dtype="O",
9897
shape=[m, 2],
@@ -101,7 +100,6 @@ def generate_schema(
101100
description=None,
102101
),
103102
vcz.ZarrArraySpec.new(
104-
vcf_field=None,
105103
name="call_genotype_phased",
106104
dtype="bool",
107105
shape=[m, n],
@@ -113,7 +111,6 @@ def generate_schema(
113111
description=None,
114112
),
115113
vcz.ZarrArraySpec.new(
116-
vcf_field=None,
117114
name="call_genotype",
118115
dtype="i1",
119116
shape=[m, n, 2],
@@ -126,7 +123,6 @@ def generate_schema(
126123
description=None,
127124
),
128125
vcz.ZarrArraySpec.new(
129-
vcf_field=None,
130126
name="call_genotype_mask",
131127
dtype="bool",
132128
shape=[m, n, 2],

bio2zarr/vcz.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ class ZarrArraySpec:
9393
chunks: tuple
9494
dimensions: tuple
9595
description: str
96-
vcf_field: str
9796
compressor: dict
9897
filters: list
98+
source: str = None
9999

100100
def __post_init__(self):
101101
if self.name in _fixed_field_descriptions:
@@ -151,7 +151,7 @@ def from_field(
151151
else:
152152
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
153153
return ZarrArraySpec.new(
154-
vcf_field=vcf_field.full_name,
154+
source=vcf_field.full_name,
155155
name=array_name,
156156
dtype=vcf_field.smallest_dtype(),
157157
shape=shape,
@@ -465,7 +465,7 @@ def has_genotypes(self):
465465

466466
def has_local_alleles(self):
467467
for field in self.schema.fields:
468-
if field.name == "call_LA" and field.vcf_field is None:
468+
if field.name == "call_LA" and field.source is None:
469469
return True
470470
return False
471471

@@ -673,7 +673,7 @@ def encode_partition(self, partition_index):
673673
self.encode_contig_partition(partition_index)
674674
self.encode_alleles_partition(partition_index)
675675
for array_spec in self.schema.fields:
676-
if array_spec.vcf_field is not None:
676+
if array_spec.source is not None:
677677
self.encode_array_partition(array_spec, partition_index)
678678
if self.has_genotypes():
679679
self.encode_genotypes_partition(partition_index)
@@ -717,7 +717,7 @@ def encode_array_partition(self, array_spec, partition_index):
717717
partition = self.metadata.partitions[partition_index]
718718
ba = self.init_partition_array(partition_index, array_spec.name)
719719
for value in self.source.iter_field(
720-
array_spec.vcf_field,
720+
array_spec.source,
721721
ba.buff.shape[1:],
722722
partition.start,
723723
partition.stop,
@@ -789,7 +789,7 @@ def encode_local_allele_fields_partition(self, partition_index):
789789
for descriptor in localisable_fields:
790790
if descriptor.array_name not in field_map:
791791
continue
792-
assert field_map[descriptor.array_name].vcf_field is None
792+
assert field_map[descriptor.array_name].source is None
793793

794794
buff = self.init_partition_array(partition_index, descriptor.array_name)
795795
source = self.source.fields[descriptor.vcf_field].iter_values(

tests/test_vcz.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def test_variant_contig(self, schema):
340340
"dimensions": ("variants",),
341341
"description": "An identifier from the reference genome or an "
342342
"angle-bracketed ID string pointing to a contig in the assembly file",
343-
"vcf_field": None,
343+
"source": None,
344344
"compressor": {
345345
"id": "blosc",
346346
"cname": "zstd",
@@ -359,7 +359,7 @@ def test_call_genotype(self, schema):
359359
"chunks": (1000, 10000, 2),
360360
"dimensions": ("variants", "samples", "ploidy"),
361361
"description": "",
362-
"vcf_field": None,
362+
"source": None,
363363
"compressor": {
364364
"id": "blosc",
365365
"cname": "zstd",
@@ -378,7 +378,7 @@ def test_call_genotype_mask(self, schema):
378378
"chunks": (1000, 10000, 2),
379379
"dimensions": ("variants", "samples", "ploidy"),
380380
"description": "",
381-
"vcf_field": None,
381+
"source": None,
382382
"compressor": {
383383
"id": "blosc",
384384
"cname": "zstd",
@@ -397,7 +397,7 @@ def test_call_genotype_phased(self, schema):
397397
"chunks": (1000, 10000, 2),
398398
"dimensions": ("variants", "samples", "ploidy"),
399399
"description": "",
400-
"vcf_field": None,
400+
"source": None,
401401
"compressor": {
402402
"id": "blosc",
403403
"cname": "zstd",
@@ -416,7 +416,7 @@ def test_call_GQ(self, schema):
416416
"chunks": (1000, 10000),
417417
"dimensions": ("variants", "samples"),
418418
"description": "Genotype Quality",
419-
"vcf_field": "FORMAT/GQ",
419+
"source": "FORMAT/GQ",
420420
"compressor": {
421421
"id": "blosc",
422422
"cname": "zstd",
@@ -437,6 +437,7 @@ def test_differences(self, schema, local_alleles_schema):
437437
def test_call_LA(self, local_alleles_schema):
438438
d = get_field_dict(local_alleles_schema, "call_LA")
439439
assert d == {
440+
"source": None,
440441
"name": "call_LA",
441442
"dtype": "i1",
442443
"shape": (9, 3, 2),
@@ -446,7 +447,6 @@ def test_call_LA(self, local_alleles_schema):
446447
"0-based indices into REF+ALT, indicating which alleles"
447448
" are relevant (local) for the current sample"
448449
),
449-
"vcf_field": None,
450450
"compressor": {
451451
"id": "blosc",
452452
"cname": "zstd",

0 commit comments

Comments
 (0)