Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dabuild"
version = "0.3.0"
version = "0.3.1"
description = "Access genome build metadata"
keywords = ["reference", "genome", "build", "bioinformatics"]
edition = "2021"
Expand Down
6 changes: 4 additions & 2 deletions src/builds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ const GRCh38_p13: &[u8] = include_bytes!("data/GCF_000001405.39_GRCh38.p13_assem
/// If the builtin assembly report cannot be parsed (should not happen).
pub fn get_grch37_p13() -> GenomeBuild {
let id = GenomeBuildIdentifier::from(("GRCh37", "p13"));
parse_assembly_report(id, GRCh37_p13).expect("The embedded assembly report for GRCh37.p13 should be valid")
parse_assembly_report(id, GRCh37_p13)
.expect("The embedded assembly report for GRCh37.p13 should be valid")
}

/// Get the *GRCh38.p13* build.
Expand All @@ -70,7 +71,8 @@ pub fn get_grch37_p13() -> GenomeBuild {
/// If the builtin assembly report cannot be parsed (should not really happen).
pub fn get_grch38_p13() -> GenomeBuild {
let id = GenomeBuildIdentifier::from(("GRCh38", "p13"));
parse_assembly_report(id, GRCh38_p13).expect("The embedded assembly report for GRCh38.p13 should be valid")
parse_assembly_report(id, GRCh38_p13)
.expect("The embedded assembly report for GRCh38.p13 should be valid")
}

/// Parse an assembly report into a [`GenomeBuild`].
Expand Down
265 changes: 259 additions & 6 deletions src/genome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,44 @@ impl Contig {
&self.name
}

/// Get a builder to build the [`Contig`].
///
/// # Example
///
/// Build a contig from the minimal required attributes.
///
/// ```
/// use dabuild::Contig;
///
/// let contig = Contig::builder().name("Y").length(57_227_415u32).build();
///
/// assert_eq!(contig.name(), "Y");
/// assert_eq!(contig.length(), 57_227_415)
/// ```
///
/// Build a contig with optional attributes, including GenBank, RefSeq, and UCSC accession identifiers.
///
/// ```
/// use dabuild::Contig;
///
/// let contig = Contig::builder()
/// .length(57_227_415u32)
/// .name("Y")
/// .genbank_name("CM000686.2")
/// .refseq_name("NC_000024.10")
/// .ucsc_name("chrY")
/// .build();
///
/// assert_eq!(contig.name(), "Y");
/// assert_eq!(contig.length(), 57_227_415);
/// assert_eq!(contig.genbank_name().unwrap(), "CM000686.2");
/// assert_eq!(contig.refseq_name().unwrap(), "NC_000024.10");
/// assert_eq!(contig.ucsc_name().unwrap(), "chrY");
/// ```
pub fn builder() -> ContigBuilder<Uninit> {
ContigBuilder { state: Uninit }
}

/// Get the alternative contig identifiers.
///
/// For instance, `CM000686.2`, `NC_000024.10`, and `chrY` for chromosome `Y`.
Expand All @@ -32,7 +70,13 @@ impl Contig {
/// ```
/// use dabuild::Contig;
///
/// let contig = Contig::new("Y", &["CM000686.2", "NC_000024.10", "chrY"], 57_227_415).expect("The contig data are valid");
/// let contig = Contig::builder()
/// .length(57_227_415u32)
/// .name("Y")
/// .genbank_name("CM000686.2")
/// .refseq_name("NC_000024.10")
/// .ucsc_name("chrY")
/// .build();
///
/// let alt_names: Vec<_> = contig.alt_names().collect();
/// assert_eq!(&alt_names, &["CM000686.2", "NC_000024.10", "chrY"]);
Expand All @@ -52,7 +96,11 @@ impl Contig {
/// ```
/// use dabuild::Contig;
///
/// let contig = Contig::new("Y", &["CM000686.2", "NC_000024.10", "chrY"], 57_227_415).expect("The contig data are valid");
/// let contig = Contig::builder()
/// .name("Y")
/// .length(57_227_415u32)
/// .genbank_name("CM000686.2")
/// .build();
///
/// assert_eq!(contig.genbank_name(), Some("CM000686.2"));
/// ```
Expand All @@ -68,7 +116,11 @@ impl Contig {
/// ```
/// use dabuild::Contig;
///
/// let contig = Contig::new("Y", &["CM000686.2", "NC_000024.10", "chrY"], 57_227_415).expect("The contig data are valid");
/// let contig = Contig::builder()
/// .name("Y")
/// .length(57_227_415u32)
/// .refseq_name("NC_000024.10")
/// .build();
///
/// assert_eq!(contig.refseq_name(), Some("NC_000024.10"));
/// ```
Expand All @@ -84,7 +136,11 @@ impl Contig {
/// ```
/// use dabuild::Contig;
///
/// let contig = Contig::new("Y", &["CM000686.2", "NC_000024.10", "chrY"], 57_227_415).expect("The contig data are valid");
/// let contig = Contig::builder()
/// .name("Y")
/// .length(57_227_415u32)
/// .ucsc_name("chrY")
/// .build();
///
/// assert_eq!(contig.ucsc_name(), Some("chrY"));
/// ```
Expand Down Expand Up @@ -143,7 +199,9 @@ impl Contig {
/// * UCSC accession
///
/// An accession equaling to an empty string or `"na"` is filtered out.
#[deprecated(since = "0.3.1", note = "use `Contig::builder()` instead")]
pub fn new(name: impl ToString, alt_names: &[impl ToString], length: u32) -> Option<Self> {
// TODO: remove in `0.4.0`.
const NON_EMPTY_NON_NA_STRING: fn(&String) -> bool = |v| !v.is_empty() && v != "na";

Some(Self {
Expand All @@ -166,21 +224,216 @@ impl Contig {
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ContigBuilder<State> {
state: State,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Uninit;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WithName {
name: String,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WithLength {
length: u32,
}

impl ContigBuilder<Uninit> {
/// Set the contig name (e.g. `"Y"` for chromosome Y).
pub fn name(self, name: impl ToString) -> ContigBuilder<WithName> {
ContigBuilder {
state: WithName {
name: name.to_string(),
},
}
}

/// Set the contig length (e.g. `57_227_415` for chromosome Y of GRCh38.p13).
pub fn length(self, length: impl Into<u32>) -> ContigBuilder<WithLength> {
ContigBuilder {
state: WithLength {
length: length.into(),
},
}
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WithNameAndLength {
name: String,
length: u32,
}

impl ContigBuilder<WithLength> {
/// Set the contig name (e.g. `"Y"` for chromosome Y).
pub fn name(self, name: impl ToString) -> ContigBuilder<WithNameAndLength> {
ContigBuilder {
state: WithNameAndLength {
name: name.to_string(),
length: self.state.length,
},
}
}
}

impl ContigBuilder<WithName> {
/// Set the contig length (e.g. `57_227_415` for chromosome Y of GRCh38.p13).
pub fn length(self, length: impl Into<u32>) -> ContigBuilder<WithNameAndLength> {
ContigBuilder {
state: WithNameAndLength {
name: self.state.name,
length: length.into(),
},
}
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WithNameLengthAndAltNames {
name: String,
length: u32,
genbank_name: Option<String>,
refseq_name: Option<String>,
ucsc_name: Option<String>,
}

/// Add optional contig attributes or finalize the build.
impl ContigBuilder<WithNameAndLength> {
/// Build the complete [`Contig`].
pub fn build(self) -> Contig {
Contig {
name: self.state.name,
genbank_name: None,
refseq_name: None,
ucsc_name: None,
length: self.state.length,
}
}

/// Set the GenBank accession (e.g. `"CM000686.2"` for chromosome Y).
pub fn genbank_name(
self,
genbank_name: impl ToString,
) -> ContigBuilder<WithNameLengthAndAltNames> {
ContigBuilder {
state: WithNameLengthAndAltNames {
name: self.state.name,
length: self.state.length,
genbank_name: Some(genbank_name.to_string()),
refseq_name: None,
ucsc_name: None,
},
}
}

/// Set the RefSeq accession (e.g. `"NC_000024.10"` for chromosome Y).
pub fn refseq_name(
self,
refseq_name: impl ToString,
) -> ContigBuilder<WithNameLengthAndAltNames> {
ContigBuilder {
state: WithNameLengthAndAltNames {
name: self.state.name,
length: self.state.length,
genbank_name: None,
refseq_name: Some(refseq_name.to_string()),
ucsc_name: None,
},
}
}

/// Set the UCSC accession (e.g. `"chrY"` for chromosome Y).
pub fn ucsc_name(self, ucsc_name: impl ToString) -> ContigBuilder<WithNameLengthAndAltNames> {
ContigBuilder {
state: WithNameLengthAndAltNames {
name: self.state.name,
length: self.state.length,
genbank_name: None,
refseq_name: None,
ucsc_name: Some(ucsc_name.to_string()),
},
}
}
}

/// Add optional contig attributes or finalize the build.
impl ContigBuilder<WithNameLengthAndAltNames> {
/// Build the complete [`Contig`].
pub fn build(self) -> Contig {
Contig {
name: self.state.name,
length: self.state.length,
genbank_name: self.state.genbank_name,
refseq_name: self.state.refseq_name,
ucsc_name: self.state.ucsc_name,
}
}

/// Set the GenBank accession (e.g. `"CM000686.2"` for chromosome Y).
pub fn genbank_name(
self,
genbank_name: impl ToString,
) -> ContigBuilder<WithNameLengthAndAltNames> {
ContigBuilder {
state: WithNameLengthAndAltNames {
name: self.state.name,
length: self.state.length,
genbank_name: Some(genbank_name.to_string()),
refseq_name: self.state.refseq_name,
ucsc_name: self.state.ucsc_name,
},
}
}

/// Set the RefSeq accession (e.g. `"NC_000024.10"` for chromosome Y).
pub fn refseq_name(
self,
refseq_name: impl ToString,
) -> ContigBuilder<WithNameLengthAndAltNames> {
ContigBuilder {
state: WithNameLengthAndAltNames {
name: self.state.name,
length: self.state.length,
genbank_name: self.state.genbank_name,
refseq_name: Some(refseq_name.to_string()),
ucsc_name: self.state.ucsc_name,
},
}
}

/// Set the UCSC accession (e.g. `"chrY"` for chromosome Y).
pub fn ucsc_name(self, ucsc_name: impl ToString) -> ContigBuilder<WithNameLengthAndAltNames> {
ContigBuilder {
state: WithNameLengthAndAltNames {
name: self.state.name,
length: self.state.length,
genbank_name: self.state.genbank_name,
refseq_name: self.state.refseq_name,
ucsc_name: Some(ucsc_name.to_string()),
},
}
}
}

#[cfg(test)]
mod contig_tests {
use super::Contig;

#[test]
fn test_transpose_coordinate() {
let contig = Contig::new("X", &["Y"], 10).unwrap();
let contig = Contig::builder().name("X").length(10u8).build();

assert_eq!(contig.transpose_coordinate(10).unwrap(), 0);
assert_eq!(contig.transpose_coordinate(8).unwrap(), 2);
}

#[test]
fn test_transpose_coordinate_panics() {
let contig = Contig::new("X", &["Y"], 10).unwrap();
let contig = Contig::builder().name("X").length(10u8).build();

assert!(contig.transpose_coordinate(11).is_none())
}
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
//!
//! ## Use genome build
//!
//! Genome build is basically a data container and the usage involves accessing the data.
//! [`GenomeBuild`] is basically a data container and the usage involves accessing the data.
//!
//! We show several examples with the *GRCh38.p13* genome build.
//!
Expand All @@ -39,7 +39,7 @@
//!
//! ## Access contigs
//!
//! The genome build contains one or more contigs.
//! The genome build contains one or more [`Contig`]s.
//!
//! We can iterate over all contigs, e.g. to count them:
//!
Expand All @@ -51,7 +51,7 @@
//! assert_eq!(count, 640);
//! ```
//!
//! and we can also access a contig (e.g. `chrY`) by one of its names:
//! and we can also access a specific [`Contig`] (e.g. for `chrY`) by one of its names:
//!
//! ```rust
//! # use dabuild::{GenomeBuild, GenomeBuildIdentifier};
Expand Down
Loading