Skip to content

Commit e873d89

Browse files
committed
Add iter_contig
1 parent fc2b074 commit e873d89

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

bio2zarr/vcf2zarr/icf.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,6 @@ def sanitise_value_float_1d(shape, value):
424424
value = drop_empty_second_dim(value)
425425
result = np.full(shape, constants.FLOAT32_FILL, dtype=np.float32)
426426
result[: value.shape[0]] = value
427-
print(result)
428427
return result
429428

430429

@@ -435,7 +434,6 @@ def sanitise_value_float_2d(shape, value):
435434
value = np.array(value, ndmin=2, dtype=np.float32, copy=True)
436435
result = np.full(shape, constants.FLOAT32_FILL, dtype=np.float32)
437436
result[:, : value.shape[1]] = value
438-
print(result)
439437
return result
440438

441439

@@ -943,6 +941,19 @@ def iter_filters(self, start, stop):
943941
) from None
944942
yield filters
945943

944+
def iter_contig(self, start, stop):
945+
source_field = self.fields["CHROM"]
946+
lookup = {
947+
contig.id: index for index, contig in enumerate(self.metadata.contigs)
948+
}
949+
950+
for value in source_field.iter_values(start, stop):
951+
# Note: because we are using the indexes to define the lookups
952+
# and we always have an index, it seems that we the contig lookup
953+
# will always succeed. However, if anyone ever does hit a KeyError
954+
# here, please do open an issue with a reproducible example!
955+
yield lookup[value[0]]
956+
946957
def iter_field(self, field_name, shape, start, stop):
947958
source_field = self.fields[field_name]
948959
sanitiser = source_field.sanitiser_factory(shape)

bio2zarr/writer.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -557,18 +557,12 @@ def encode_filters_partition(self, partition_index):
557557
self.finalise_partition_array(partition_index, var_filter)
558558

559559
def encode_contig_partition(self, partition_index):
560-
lookup = {contig.id: index for index, contig in enumerate(self.schema.contigs)}
561560
contig = self.init_partition_array(partition_index, "variant_contig")
562561
partition = self.metadata.partitions[partition_index]
563-
field = self.source.fields["CHROM"]
564562

565-
for value in field.iter_values(partition.start, partition.stop):
563+
for contig_index in self.source.iter_contig(partition.start, partition.stop):
566564
j = contig.next_buffer_row()
567-
# Note: because we are using the indexes to define the lookups
568-
# and we always have an index, it seems that we the contig lookup
569-
# will always succeed. However, if anyone ever does hit a KeyError
570-
# here, please do open an issue with a reproducible example!
571-
contig.buff[j] = lookup[value[0]]
565+
contig.buff[j] = contig_index
572566

573567
self.finalise_partition_array(partition_index, contig)
574568

0 commit comments

Comments
 (0)