@@ -232,29 +232,27 @@ def field_descriptors(self):
232232
233233@dataclasses .dataclass
234234class Variant :
235+ """
236+ Represents a single variant, including the genomic position and the integer encoded
237+ genotypes.
238+ """
235239 position : int
236240 genotypes : np .ndarray
237241 alleles : list
238242
239243
240244class Dataset (collections .abc .Mapping ):
245+ """
246+ Open a sc2ts VCF Zarr dataset for convenient access to alignments and metadata.
241247
248+ The dataset is opened read-only from ``path``, which may be either a
249+ directory store or a consolidated ``.zip`` file.
250+
251+ :param str path: Path to a directory or ``.zip`` Zarr store.
252+ :param int chunk_cache_size: Maximum number of chunks to cache for
253+ alignments and metadata. Defaults to 1.
254+ """
242255 def __init__ (self , path , chunk_cache_size = 1 , date_field = None ):
243- """
244- Open a sc2ts VCF Zarr dataset for convenient access to alignments and metadata.
245-
246- The dataset is opened read-only from ``path``, which may be either a
247- directory store or a consolidated ``.zip`` file. The ``date_field``
248- argument specifies which metadata field should be interpreted as the
249- sample date when constructing :attr:`metadata`.
250-
251- :param str path: Path to a directory or ``.zip`` Zarr store.
252- :param int chunk_cache_size: Maximum number of chunks to cache for
253- alignments and metadata. Defaults to 1.
254- :param str date_field: Name of the metadata field to use as the
255- sample date, or ``None`` to disable date handling. Defaults
256- to ``None``.
257- """
258256 logger .info (f"Loading dataset @{ path } using { date_field } as date field" )
259257 self .date_field = date_field
260258 self .path = pathlib .Path (path )
@@ -310,6 +308,17 @@ def metadata(self):
310308 """
311309 return self ._metadata
312310
311+ def metadata_dataframe (self , fields = None ):
312+ """
313+ Returns the metadata in this dataset as a Pandas dataframe,
314+ indexed by sample_id.
315+
316+ :param fields: List of metadata fields to include; if ``None``,
317+ all fields are used.
318+ :return: Pandas dataframe
319+ """
320+ return self .metadata .as_dataframe (fields )
321+
313322 @property
314323 def sample_id (self ):
315324 """
@@ -356,7 +365,7 @@ def variants(self, sample_id=None, position=None):
356365 """
357366 Iterate over variants at the specified positions for the given samples.
358367
359- Yields Variant objects containing the genomic position, encoded
368+ Yields :class:`. Variant` objects containing the genomic position, encoded
360369 genotypes, and allele labels for each requested site.
361370
362371 :param sample_id: Iterable of sample IDs to include; if ``None``,
0 commit comments