@@ -1891,7 +1891,7 @@ def aslist(self):
18911891 @classmethod
18921892 def load (cls , path ):
18931893 ts = _tskit .TreeSequence ()
1894- ts .load (path )
1894+ ts .load (str ( path ) )
18951895 return TreeSequence (ts )
18961896
18971897 @classmethod
@@ -1911,7 +1911,8 @@ def dump(self, path, zlib_compression=False):
19111911 warnings .warn (
19121912 "The zlib_compression option is no longer supported and is ignored" ,
19131913 RuntimeWarning )
1914- self ._ll_tree_sequence .dump (path )
1914+ # Convert the path to str to allow us use Pathlib inputs
1915+ self ._ll_tree_sequence .dump (str (path ))
19151916
19161917 @property
19171918 def tables (self ):
@@ -2774,17 +2775,56 @@ def write_vcf(self, output, ploidy=1, contig_id="1"):
27742775 to the prefix ``msp_`` such that we would have the sample names
27752776 ``msp_0``, ``msp_1`` and ``msp_2`` in the running example.
27762777
2777- Example usage:
2778-
2779- >>> with open("output.vcf", "w") as vcf_file:
2780- >>> tree_sequence.write_vcf(vcf_file, 2)
2781-
27822778 .. warning::
27832779 This output function does not currently use information in the
27842780 :class:`IndividualTable`, and so will only correctly produce
27852781 non-haploid output if the nodes corresponding to each individual
27862782 are contiguous as described above.
27872783
2784+ Example usage:
2785+
2786+ .. code-block:: python
2787+
2788+ with open("output.vcf", "w") as vcf_file:
2789+ tree_sequence.write_vcf(vcf_file, 2)
2790+
2791+ The VCF output can also be compressed using the :mod:`gzip` module, if you wish:
2792+
2793+ .. code-block:: python
2794+
2795+ import gzip
2796+ with gzip.open("output.vcf.gz", "wt") as f:
2797+ ts.write_vcf(f)
2798+
2799+ However, this gzipped VCF may not be fully compatible with downstream tools
2800+ such as tabix, which may require the VCF use the specialised bgzip format.
2801+ A general way to convert VCF data to various formats is to pipe the text
2802+ produced by ``tskit`` into ``bcftools``, as done here:
2803+
2804+ .. code-block:: python
2805+
2806+ import os
2807+ import subprocess
2808+
2809+ read_fd, write_fd = os.pipe()
2810+ write_pipe = os.fdopen(write_fd, "w")
2811+ with open("output.bcf", "w") as bcf_file:
2812+ proc = subprocess.Popen(
2813+ ["bcftools", "view", "-O", "b"], stdin=read_fd, stdout=bcf_file)
2814+ ts.write_vcf(write_pipe)
2815+ write_pipe.close()
2816+ os.close(read_fd)
2817+ proc.wait()
2818+ if proc.returncode != 0:
2819+ raise RuntimeError("bcftools failed with status:", proc.returncode)
2820+
2821+ This can also be achieved on the command line use the ``tskit vcf`` command,
2822+ e.g.:
2823+
2824+ .. code-block:: bash
2825+
2826+ $ tskit vcf example.trees | bcftools view -O b > example.bcf
2827+
27882828 :param File output: The file-like object to write the VCF output.
27892829 :param int ploidy: The ploidy of the individuals to be written to
27902830 VCF. This sample size must be evenly divisible by ploidy.
0 commit comments