File tree Expand file tree Collapse file tree 7 files changed +84
-6
lines changed Expand file tree Collapse file tree 7 files changed +84
-6
lines changed Original file line number Diff line number Diff line change @@ -241,8 +241,8 @@ def generate_schema(
241
241
242
242
243
243
def convert (
244
- ts_path ,
245
- zarr_path ,
244
+ ts_or_path ,
245
+ vcz_path ,
246
246
* ,
247
247
model_mapping = None ,
248
248
contig_id = None ,
@@ -252,8 +252,14 @@ def convert(
252
252
worker_processes = 1 ,
253
253
show_progress = False ,
254
254
):
255
+ """
256
+ Convert a :class:`tskit.TreeSequence` (or path to a tree sequence
257
+ file) to VCF Zarr format stored at the specified path.
258
+
259
+ .. todo:: Document parameters
260
+ """
255
261
tskit_format = TskitFormat (
256
- ts_path ,
262
+ ts_or_path ,
257
263
model_mapping = model_mapping ,
258
264
contig_id = contig_id ,
259
265
isolated_as_missing = isolated_as_missing ,
@@ -262,7 +268,7 @@ def convert(
262
268
variants_chunk_size = variants_chunk_size ,
263
269
samples_chunk_size = samples_chunk_size ,
264
270
)
265
- zarr_path = pathlib .Path (zarr_path )
271
+ zarr_path = pathlib .Path (vcz_path )
266
272
vzw = vcz .VcfZarrWriter (TskitFormat , zarr_path )
267
273
# Rough heuristic to split work up enough to keep utilisation high
268
274
target_num_partitions = max (1 , worker_processes * 4 )
Original file line number Diff line number Diff line change @@ -24,14 +24,15 @@ html:
24
24
extra_footer : |
25
25
<p>
26
26
Documentation available under the terms of the
27
- <a href="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0</a>
27
+ <a href="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0</a>
28
28
license.
29
29
</p>
30
30
31
31
sphinx :
32
32
extra_extensions :
33
33
- sphinx_click.ext
34
34
- sphinx.ext.todo
35
+ - sphinx.ext.autodoc
35
36
config :
36
37
html_show_copyright : false
37
38
# This is needed to make sure that text is output in single block from
@@ -40,3 +41,6 @@ sphinx:
40
41
todo_include_todos : true
41
42
myst_enable_extensions :
42
43
- colon_fence
44
+ intersphinx_mapping :
45
+ python : ["https://docs.python.org/3/", null]
46
+ tskit : ["https://tskit.dev/tskit/docs/stable", null]
Original file line number Diff line number Diff line change @@ -9,6 +9,10 @@ chapters:
9
9
- file : plink2zarr/overview
10
10
sections :
11
11
- file : plink2zarr/cli_ref
12
+ - file : tskit2zarr/overview
13
+ sections :
14
+ - file : tskit2zarr/python_api
15
+ - file : tskit2zarr/cli_ref
12
16
- file : vcfpartition/overview
13
17
sections :
14
18
- file : vcfpartition/cli_ref
Original file line number Diff line number Diff line change 14
14
.. click:: bio2zarr.cli:convert_plink
15
15
:prog: plink2zarr convert
16
16
:nested: full
17
-
17
+ ```
Original file line number Diff line number Diff line change
1
+ (sec-tskit2zarr-cli-ref)=
2
+ # CLI Reference
3
+
4
+ % A note on cross references... There's some weird long-standing problem with
5
+ % cross referencing program values in Sphinx, which means that we can't use
6
+ % the built-in labels generated by sphinx-click. We can make our own explicit
7
+ % targets, but these have to have slightly weird names to avoid conflicting
8
+ % with what sphinx-click is doing. So, hence the cmd- prefix.
9
+ % Based on: https://github.com/skypilot-org/skypilot/pull/2834
10
+
11
+ ``` {eval-rst}
12
+
13
+ .. _cmd-tskit2zarr-convert:
14
+ .. click:: bio2zarr.cli:convert_tskit
15
+ :prog: tskit2zarr convert
16
+ :nested: full
17
+
18
+ ```
Original file line number Diff line number Diff line change
1
+ (sec-tskit2zarr)=
2
+ # tskit2zarr
3
+
4
+ Convert tskit data to the
5
+ [ VCF Zarr specification] ( https://github.com/sgkit-dev/vcf-zarr-spec/ )
6
+ reliably in parallel.
7
+
8
+ See {ref}` sec-tskit2zarr-cli-ref ` for detailed documentation on
9
+ command line options.
10
+
Original file line number Diff line number Diff line change
1
+ (sec-tskit2zarr-python-api)=
2
+ # Python API
3
+
4
+ Basic usage:
5
+ ``` python
6
+ import bio2zarr.tskit as ts2z
7
+
8
+ ts2z.convert(ts_path, vcz_path, worker_processes = 8 )
9
+ ```
10
+
11
+ This will convert the [ tskit] ( https://tskit.dev ) tree sequence stored
12
+ at `` ts_path `` to VCF Zarr stored at `` vcz_path `` using 8 worker processes.
13
+ The details of how we map from the
14
+ tskit {ref}` tskit:sec_data_model ` to VCF Zarr are taken care of by
15
+ TreeSequence.map_to_vcf_model method, which is called with no
16
+ parameters by default if the `` model_mapping `` parameter to
17
+ {func}` ~bio2zarr.tskit.convert ` is not specified.
18
+
19
+ For more control over the properties of the output, for example
20
+ to pick a specific subset of individuals, you can use
21
+ TreeSequence.map_to_vcf_model
22
+ to return the required mapping:
23
+
24
+ ``` python
25
+ model_mapping = ts.map_vcf_model(individuals = [0 , 1 ])
26
+ ts2z.convert(ts, vcz_path, model_mapping = model_mapping)
27
+ ```
28
+
29
+
30
+ ## API reference
31
+
32
+ ``` {eval-rst}
33
+
34
+ .. autofunction:: bio2zarr.tskit.convert
35
+
36
+ ```
You can’t perform that action at this time.
0 commit comments