Skip to content

Commit 4da9bca

Browse files
Fix bed reading problems
1 parent 413a963 commit 4da9bca

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

bio2zarr/cli.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,14 @@ def vcf2zarr():
155155
@click.argument("in_path", type=click.Path())
156156
@click.argument("out_path", type=click.Path())
157157
@worker_processes
158-
@click.option("--chunk-width", type=int, default=None)
159-
@click.option("--chunk-length", type=int, default=None)
160-
def convert_plink(in_path, out_path, worker_processes, chunk_width, chunk_length):
158+
@verbose
159+
@chunk_length
160+
@chunk_width
161+
def convert_plink(in_path, out_path, verbose, worker_processes, chunk_length, chunk_width):
161162
"""
162163
In development; DO NOT USE!
163164
"""
165+
setup_logging(verbose)
164166
plink.convert(
165167
in_path,
166168
out_path,

bio2zarr/plink.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22

3+
import humanfriendly
34
import numpy as np
45
import zarr
56
import bed_reader
@@ -21,12 +22,13 @@ def encode_genotypes_slice(bed_path, zarr_path, start, stop):
2122
n = gt.array.shape[1]
2223
assert start % chunk_length == 0
2324

24-
B = bed.read(dtype=np.int8).T
25-
25+
logger.debug(f"Reading slice {start}:{stop}")
2626
chunk_start = start
2727
while chunk_start < stop:
2828
chunk_stop = min(chunk_start + chunk_length, stop)
29-
bed_chunk = bed.read(index=np.s_[:, chunk_start:chunk_stop], dtype=np.int8).T
29+
logger.debug(f"Reading bed slice {chunk_start}:{chunk_stop}")
30+
bed_chunk = bed.read(slice(chunk_start, chunk_stop), dtype=np.int8).T
31+
logger.debug(f"Got bed slice {humanfriendly.format_size(bed_chunk.nbytes)}")
3032
# Probably should do this without iterating over rows, but it's a bit
3133
# simpler and lines up better with the array buffering API. The bottleneck
3234
# is in the encoding anyway.
@@ -61,6 +63,7 @@ def convert(
6163
n = bed.iid_count
6264
m = bed.sid_count
6365
del bed
66+
logging.info(f"Scanned plink with {n} samples and {m} variants")
6467

6568
# FIXME
6669
if chunk_width is None:

0 commit comments

Comments
 (0)