Skip to content

Commit 6e5feb5

Browse files
Basic docs
1 parent dbd8e1d commit 6e5feb5

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,22 @@ python3 -m bio2zarr vcf2zarr explode tests/data/vcf/sample.vcf.gz tmp/sample.exp
2929

3030
Then, (optionally) inspect this representation to get a feel for your dataset
3131
```
32-
python3 -m bio2zarr vcf2zarr summarise tmp/sample.exploded
32+
python3 -m bio2zarr vcf2zarr inspec tmp/sample.exploded
3333
```
3434

3535
Then, (optionally) generate a conversion schema to describe the corresponding
3636
Zarr arrays:
3737

3838
```
39-
python3 -m bio2zarr vcf2zarr genspec tmp/sample.exploded > sample.schema.json
39+
python3 -m bio2zarr vcf2zarr mkschema tmp/sample.exploded > sample.schema.json
4040
```
4141

4242
View and edit the schema, deleting any columns you don't want.
4343

4444
Finally, convert to Zarr
4545

4646
```
47-
python3 -m bio2zarr vcf2zarr to-zarr tmp/sample.exploded tmp/sample.zarr -s sample.schema.json
47+
python3 -m bio2zarr vcf2zarr encode tmp/sample.exploded tmp/sample.zarr -s sample.schema.json
4848
```
4949

5050
Use the ``-p, --worker-processes`` argument to control the number of workers used

bio2zarr/cli.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ def setup_logging(verbosity):
3535
@worker_processes
3636
@click.option("-c", "--column-chunk-size", type=int, default=64)
3737
def explode(vcfs, out_path, verbose, worker_processes, column_chunk_size):
38+
"""
39+
Convert VCF(s) to columnar intermediate format
40+
"""
3841
setup_logging(verbose)
3942
vcf.explode(
4043
vcfs,
@@ -49,6 +52,9 @@ def explode(vcfs, out_path, verbose, worker_processes, column_chunk_size):
4952
@click.argument("if_path", type=click.Path())
5053
@verbose
5154
def inspect(if_path, verbose):
55+
"""
56+
Inspect an intermediate format file
57+
"""
5258
setup_logging(verbose)
5359
data = vcf.inspect(if_path)
5460
click.echo(tabulate.tabulate(data, headers="keys"))
@@ -57,6 +63,9 @@ def inspect(if_path, verbose):
5763
@click.command
5864
@click.argument("if_path", type=click.Path())
5965
def mkschema(if_path):
66+
"""
67+
Generate a schema for zarr encoding
68+
"""
6069
stream = click.get_text_stream("stdout")
6170
vcf.mkschema(if_path, stream)
6271

@@ -68,6 +77,9 @@ def mkschema(if_path):
6877
@click.option("-s", "--schema", default=None)
6978
@worker_processes
7079
def encode(if_path, zarr_path, verbose, schema, worker_processes):
80+
"""
81+
Encode intermediate format (see explode) to vcfzarr
82+
"""
7183
setup_logging(verbose)
7284
vcf.encode(
7385
if_path,
@@ -84,6 +96,9 @@ def encode(if_path, zarr_path, verbose, schema, worker_processes):
8496
@verbose
8597
@worker_processes
8698
def convert_vcf(vcfs, out_path, verbose, worker_processes):
99+
"""
100+
Convert input VCF(s) directly to vcfzarr (not recommended for large files)
101+
"""
87102
setup_logging(verbose)
88103
vcf.convert(vcfs, out_path, show_progress=True, worker_processes=worker_processes)
89104

@@ -101,6 +116,7 @@ def vcf2zarr():
101116
pass
102117

103118

119+
# TODO figure out how to get click to list these in the given order.
104120
vcf2zarr.add_command(explode)
105121
vcf2zarr.add_command(inspect)
106122
vcf2zarr.add_command(mkschema)
@@ -116,6 +132,9 @@ def vcf2zarr():
116132
@click.option("--chunk-width", type=int, default=None)
117133
@click.option("--chunk-length", type=int, default=None)
118134
def convert_plink(in_path, out_path, worker_processes, chunk_width, chunk_length):
135+
"""
136+
In development; DO NOT USE!
137+
"""
119138
plink.convert(
120139
in_path,
121140
out_path,

0 commit comments

Comments
 (0)