Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions bio2zarr/icf.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,6 @@ def convert_local_allele_field_types(fields):
dimensions = gt.dimensions[:-1]

la = vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_LA",
dtype="i1",
shape=gt.shape,
Expand All @@ -859,7 +858,7 @@ def convert_local_allele_field_types(fields):
if ad is not None:
# TODO check if call_LAD is in the list already
ad.name = "call_LAD"
ad.vcf_field = None
ad.source = None
ad.shape = (*shape, 2)
ad.chunks = (*chunks, 2)
ad.dimensions = (*dimensions, "local_alleles")
Expand All @@ -869,7 +868,7 @@ def convert_local_allele_field_types(fields):
if pl is not None:
# TODO check if call_LPL is in the list already
pl.name = "call_LPL"
pl.vcf_field = None
pl.source = None
pl.shape = (*shape, 3)
pl.chunks = (*chunks, 3)
pl.description += " (local-alleles)"
Expand Down Expand Up @@ -1060,13 +1059,13 @@ def spec_from_field(field, array_name=None):
def fixed_field_spec(
name,
dtype,
vcf_field=None,
source=None,
shape=(m,),
dimensions=("variants",),
chunks=None,
):
return vcz.ZarrArraySpec.new(
vcf_field=vcf_field,
source=source,
name=name,
dtype=dtype,
shape=shape,
Expand Down Expand Up @@ -1137,7 +1136,6 @@ def fixed_field_spec(
dimensions = ["variants", "samples"]
array_specs.append(
vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_genotype_phased",
dtype="bool",
shape=list(shape),
Expand All @@ -1151,7 +1149,6 @@ def fixed_field_spec(
dimensions += ["ploidy"]
array_specs.append(
vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_genotype",
dtype=gt_field.smallest_dtype(),
shape=list(shape),
Expand All @@ -1162,7 +1159,6 @@ def fixed_field_spec(
)
array_specs.append(
vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_genotype_mask",
dtype="bool",
shape=list(shape),
Expand Down
6 changes: 1 addition & 5 deletions bio2zarr/plink.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def generate_schema(

array_specs = [
vcz.ZarrArraySpec.new(
vcf_field="position",
source="position",
name="variant_position",
dtype="i4",
shape=[m],
Expand All @@ -92,7 +92,6 @@ def generate_schema(
description=None,
),
vcz.ZarrArraySpec.new(
vcf_field=None,
name="variant_allele",
dtype="O",
shape=[m, 2],
Expand All @@ -101,7 +100,6 @@ def generate_schema(
description=None,
),
vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_genotype_phased",
dtype="bool",
shape=[m, n],
Expand All @@ -113,7 +111,6 @@ def generate_schema(
description=None,
),
vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_genotype",
dtype="i1",
shape=[m, n, 2],
Expand All @@ -126,7 +123,6 @@ def generate_schema(
description=None,
),
vcz.ZarrArraySpec.new(
vcf_field=None,
name="call_genotype_mask",
dtype="bool",
shape=[m, n, 2],
Expand Down
12 changes: 6 additions & 6 deletions bio2zarr/vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ class ZarrArraySpec:
chunks: tuple
dimensions: tuple
description: str
vcf_field: str
compressor: dict
filters: list
source: str = None

def __post_init__(self):
if self.name in _fixed_field_descriptions:
Expand Down Expand Up @@ -151,7 +151,7 @@ def from_field(
else:
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
return ZarrArraySpec.new(
vcf_field=vcf_field.full_name,
source=vcf_field.full_name,
name=array_name,
dtype=vcf_field.smallest_dtype(),
shape=shape,
Expand Down Expand Up @@ -465,7 +465,7 @@ def has_genotypes(self):

def has_local_alleles(self):
for field in self.schema.fields:
if field.name == "call_LA" and field.vcf_field is None:
if field.name == "call_LA" and field.source is None:
return True
return False

Expand Down Expand Up @@ -667,7 +667,7 @@ def encode_partition(self, partition_index):
self.encode_contig_partition(partition_index)
self.encode_alleles_partition(partition_index)
for array_spec in self.schema.fields:
if array_spec.vcf_field is not None:
if array_spec.source is not None:
self.encode_array_partition(array_spec, partition_index)
if self.has_genotypes():
self.encode_genotypes_partition(partition_index)
Expand Down Expand Up @@ -711,7 +711,7 @@ def encode_array_partition(self, array_spec, partition_index):
partition = self.metadata.partitions[partition_index]
ba = self.init_partition_array(partition_index, array_spec.name)
for value in self.source.iter_field(
array_spec.vcf_field,
array_spec.source,
ba.buff.shape[1:],
partition.start,
partition.stop,
Expand Down Expand Up @@ -783,7 +783,7 @@ def encode_local_allele_fields_partition(self, partition_index):
for descriptor in localisable_fields:
if descriptor.array_name not in field_map:
continue
assert field_map[descriptor.array_name].vcf_field is None
assert field_map[descriptor.array_name].source is None

buff = self.init_partition_array(partition_index, descriptor.array_name)
source = self.source.fields[descriptor.vcf_field].iter_values(
Expand Down
12 changes: 6 additions & 6 deletions tests/test_vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def test_variant_contig(self, schema):
"dimensions": ("variants",),
"description": "An identifier from the reference genome or an "
"angle-bracketed ID string pointing to a contig in the assembly file",
"vcf_field": None,
"source": None,
"compressor": {
"id": "blosc",
"cname": "zstd",
Expand All @@ -359,7 +359,7 @@ def test_call_genotype(self, schema):
"chunks": (1000, 10000, 2),
"dimensions": ("variants", "samples", "ploidy"),
"description": "",
"vcf_field": None,
"source": None,
"compressor": {
"id": "blosc",
"cname": "zstd",
Expand All @@ -378,7 +378,7 @@ def test_call_genotype_mask(self, schema):
"chunks": (1000, 10000, 2),
"dimensions": ("variants", "samples", "ploidy"),
"description": "",
"vcf_field": None,
"source": None,
"compressor": {
"id": "blosc",
"cname": "zstd",
Expand All @@ -397,7 +397,7 @@ def test_call_genotype_phased(self, schema):
"chunks": (1000, 10000, 2),
"dimensions": ("variants", "samples", "ploidy"),
"description": "",
"vcf_field": None,
"source": None,
"compressor": {
"id": "blosc",
"cname": "zstd",
Expand All @@ -416,7 +416,7 @@ def test_call_GQ(self, schema):
"chunks": (1000, 10000),
"dimensions": ("variants", "samples"),
"description": "Genotype Quality",
"vcf_field": "FORMAT/GQ",
"source": "FORMAT/GQ",
"compressor": {
"id": "blosc",
"cname": "zstd",
Expand All @@ -437,6 +437,7 @@ def test_differences(self, schema, local_alleles_schema):
def test_call_LA(self, local_alleles_schema):
d = get_field_dict(local_alleles_schema, "call_LA")
assert d == {
"source": None,
"name": "call_LA",
"dtype": "i1",
"shape": (9, 3, 2),
Expand All @@ -446,7 +447,6 @@ def test_call_LA(self, local_alleles_schema):
"0-based indices into REF+ALT, indicating which alleles"
" are relevant (local) for the current sample"
),
"vcf_field": None,
"compressor": {
"id": "blosc",
"cname": "zstd",
Expand Down
Loading