Skip to content

Commit d640186

Browse files
committed
feat: allow for non-diploid genotypes
1 parent 8741513 commit d640186

File tree

1 file changed

+78
-3
lines changed

1 file changed

+78
-3
lines changed

src/bcf/record.rs

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
// except according to those terms.
55

66
use std::borrow::{Borrow, BorrowMut};
7-
use std::ffi;
87
use std::fmt;
98
use std::marker::PhantomData;
109
use std::ops::Deref;
@@ -13,6 +12,7 @@ use std::ptr;
1312
use std::rc::Rc;
1413
use std::slice;
1514
use std::str;
15+
use std::{ffi, iter};
1616

1717
use bio_types::genome;
1818
use derive_new::new;
@@ -692,6 +692,76 @@ impl Record {
692692
self.push_format_integer(b"GT", &encoded)
693693
}
694694

695+
/// Add/replace genotypes in FORMAT GT tag by providing a list of genotypes.
696+
///
697+
/// # Arguments
698+
///
699+
/// - `genotypes` - a two-dimensional array of GenotypeAllele
700+
/// - `max_ploidy` - the maximum number of alleles allowed for any genotype on any sample.
701+
///
702+
/// # Errors
703+
///
704+
/// Returns an error if any genotype has more allelles than `max_ploidy` or if the GT tag is not present in the header.
705+
///
706+
/// # Example
707+
///
708+
/// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag and three samples.
709+
/// See [module documentation](../index.html#example-writing) for how to set up
710+
/// VCF, header, and record.
711+
///
712+
/// ```
713+
/// # use rust_htslib::bcf::{Format, Writer};
714+
/// # use rust_htslib::bcf::header::Header;
715+
/// # use rust_htslib::bcf::record::GenotypeAllele;
716+
/// # use std::iter;
717+
/// # let mut header = Header::new();
718+
/// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
719+
/// # header.push_record(header_contig_line.as_bytes());
720+
/// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
721+
/// # header.push_record(header_gt_line.as_bytes());
722+
/// # header.push_sample("first_sample".as_bytes());
723+
/// # header.push_sample("second_sample".as_bytes());
724+
/// # header.push_sample("third_sample".as_bytes());
725+
/// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf)?;
726+
/// # let mut record = vcf.empty_record();
727+
/// let alleles = vec![
728+
/// vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
729+
/// vec![GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)],
730+
/// vec![GenotypeAllele::Unphased(0)],
731+
/// ];
732+
/// record.push_genotype_structured(&alleles, 2);
733+
/// let gts = record.genotypes()?;
734+
/// assert_eq!("1/1", &format!("{}", gts.get(0)));
735+
/// assert_eq!("0|1", &format!("{}", gts.get(1)));
736+
/// assert_eq!("0", &format!("{}", gts.get(2)));
737+
/// # Ok::<(), rust_htslib::errors::Error>(())
738+
/// ```
739+
pub fn push_genotype_structured<GT>(
740+
&mut self,
741+
genotypes: &[GT],
742+
max_ploidy: usize,
743+
) -> Result<()>
744+
where
745+
GT: AsRef<[GenotypeAllele]>,
746+
{
747+
let mut data = Vec::with_capacity(max_ploidy * genotypes.len());
748+
for gt in genotypes {
749+
if gt.as_ref().len() > max_ploidy {
750+
return Err(Error::BcfSetValues);
751+
}
752+
data.extend(
753+
gt.as_ref()
754+
.iter()
755+
.map(|gta| i32::from(*gta))
756+
.chain(iter::repeat_n(
757+
VECTOR_END_INTEGER,
758+
max_ploidy - gt.as_ref().len(),
759+
)),
760+
);
761+
}
762+
self.push_format_integer(b"GT", &data)
763+
}
764+
695765
/// Get genotypes as vector of one `Genotype` per sample.
696766
///
697767
/// # Example
@@ -1249,14 +1319,19 @@ where
12491319
}
12501320

12511321
impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1252-
/// Get genotype of ith sample. So far, only supports diploid genotypes.
1322+
/// Get genotype of ith sample.
12531323
///
12541324
/// Note that the result complies with the BCF spec. This means that the
12551325
/// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
12561326
/// this method will return `[Unphased(1), Phased(1)]`.
12571327
pub fn get(&self, i: usize) -> Genotype {
12581328
let igt = self.encoded[i];
1259-
Genotype(igt.iter().map(|&e| GenotypeAllele::from(e)).collect())
1329+
let allelles = igt
1330+
.iter()
1331+
.take_while(|&&i| i != VECTOR_END_INTEGER)
1332+
.map(|&i| GenotypeAllele::from(i))
1333+
.collect();
1334+
Genotype(allelles)
12601335
}
12611336
}
12621337

0 commit comments

Comments
 (0)