From c02400fa557f54be8865034eea704c7e0e075a6b Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 6 Dec 2024 12:03:00 +0100 Subject: [PATCH 1/4] Set vendor centroiding for thermo raw files, fixes reading of thermo raw files that do not yet include centroided peaks. --- Cargo.toml | 10 ++++++---- src/lib.rs | 21 +++++++++++---------- src/parse_mzdata.rs | 41 +++++++++++++++++++++++------------------ 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 994665b..716e740 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ms2rescore-rs" -version = "0.4.0-1" +version = "0.4.1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -10,10 +10,12 @@ crate-type = ["cdylib"] [features] default = ["thermo"] - thermo = ["mzdata/thermo"] [dependencies] -pyo3 = "0.20.0" -mzdata = "0.33.0" +mzdata = "0.39.0" timsrust = "0.3.0" + +[dependencies.pyo3] +version = "0.23.3" +features = ["anyhow"] diff --git a/src/lib.rs b/src/lib.rs index 0bdcc22..f096566 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,8 @@ mod file_types; +mod ms2_spectrum; mod parse_mzdata; mod parse_timsrust; mod precursor; -mod ms2_spectrum; use std::collections::HashMap; @@ -10,8 +10,8 @@ use pyo3::exceptions::{PyException, PyValueError}; use pyo3::prelude::*; use file_types::{match_file_type, SpectrumFileType}; -use precursor::Precursor; use ms2_spectrum::MS2Spectrum; +use precursor::Precursor; /// Check if spectrum path matches a supported file type. #[pyfunction] @@ -27,9 +27,10 @@ pub fn get_precursor_info(spectrum_path: String) -> PyResult { - parse_mzdata::parse_precursor_info(&spectrum_path) - } + SpectrumFileType::MascotGenericFormat + | SpectrumFileType::MzML + | SpectrumFileType::MzMLb + | SpectrumFileType::ThermoRaw => parse_mzdata::parse_precursor_info(&spectrum_path), SpectrumFileType::BrukerRaw => parse_timsrust::parse_precursor_info(&spectrum_path), SpectrumFileType::Unknown => return Err(PyValueError::new_err("Unsupported file type")), }; @@ -46,9 +47,10 @@ pub fn get_ms2_spectra(spectrum_path: String) -> PyResult { - parse_mzdata::read_ms2_spectra(&spectrum_path) - } + SpectrumFileType::MascotGenericFormat + | SpectrumFileType::MzML + | SpectrumFileType::MzMLb + | SpectrumFileType::ThermoRaw => parse_mzdata::read_ms2_spectra(&spectrum_path), SpectrumFileType::BrukerRaw => parse_timsrust::read_ms2_spectra(&spectrum_path), SpectrumFileType::Unknown => return Err(PyValueError::new_err("Unsupported file type")), }; @@ -59,10 +61,9 @@ pub fn get_ms2_spectra(spectrum_path: String) -> PyResult PyResult<()> { +fn ms2rescore_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_function(wrap_pyfunction!(is_supported_file_type, m)?)?; diff --git a/src/parse_mzdata.rs b/src/parse_mzdata.rs index b4247e0..12975dd 100644 --- a/src/parse_mzdata.rs +++ b/src/parse_mzdata.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; -use mzdata::params::ParamValue; -use mzdata::mz_read; +use mzdata::{params::ParamValue, prelude::*, MZReader}; use crate::ms2_spectrum::MS2Spectrum; use crate::precursor::Precursor; @@ -49,25 +48,31 @@ impl From for MS2Spectrum { pub fn parse_precursor_info( spectrum_path: &str, ) -> Result, std::io::Error> { - mz_read!(spectrum_path.as_ref(), reader => { - reader.filter(|spectrum| spectrum.description.ms_level == 2) - .filter_map(|spectrum| { - spectrum.description.precursor.as_ref()?; - Some((spectrum.description.id.clone(), Precursor::from(&spectrum))) - }) - .collect::>() - }) + let mut reader = MZReader::open_path(spectrum_path)?; + if let MZReader::ThermoRaw(inner) = &mut reader { + inner.set_centroiding(true); + } + + Ok(reader + .filter(|spectrum| spectrum.description.ms_level == 2) + .filter_map(|spectrum| { + spectrum.description.precursor.as_ref()?; + Some((spectrum.description.id.clone(), Precursor::from(&spectrum))) + }) + .collect::>()) } /// Read MS2 spectra from spectrum files with mzdata -pub fn read_ms2_spectra( - spectrum_path: &str, -) -> Result, std::io::Error> { - mz_read!(spectrum_path.as_ref(), reader => { - reader.filter(|spectrum| spectrum.description.ms_level == 2) - .map(MS2Spectrum::from) - .collect::>() - }) +pub fn read_ms2_spectra(spectrum_path: &str) -> Result, std::io::Error> { + let mut reader = MZReader::open_path(spectrum_path)?; + if let MZReader::ThermoRaw(inner) = &mut reader { + inner.set_centroiding(true); + } + + Ok(reader + .filter(|spectrum| spectrum.description.ms_level == 2) + .map(MS2Spectrum::from) + .collect::>()) } fn get_charge_from_spectrum(spectrum: &mzdata::spectrum::MultiLayerSpectrum) -> Option { From e5ad3d68289eba8b53d406483ea2072c4e90076f Mon Sep 17 00:00:00 2001 From: RalfG Date: Sat, 7 Dec 2024 16:09:03 +0100 Subject: [PATCH 2/4] Update to timsrust 0.4 --- Cargo.toml | 2 +- src/parse_timsrust.rs | 51 ++++++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 716e740..8449a1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ thermo = ["mzdata/thermo"] [dependencies] mzdata = "0.39.0" -timsrust = "0.3.0" +timsrust = "0.4.1" [dependencies.pyo3] version = "0.23.3" diff --git a/src/parse_timsrust.rs b/src/parse_timsrust.rs index bf7fc69..5f12b5f 100644 --- a/src/parse_timsrust.rs +++ b/src/parse_timsrust.rs @@ -3,8 +3,8 @@ use std::collections::HashMap; use crate::ms2_spectrum::MS2Spectrum; use crate::precursor::Precursor; -impl From for Precursor { - fn from(precursor: timsrust::ms_data::Precursor) -> Self { +impl From for Precursor { + fn from(precursor: timsrust::Precursor) -> Self { Precursor { mz: precursor.mz, rt: precursor.rt, @@ -15,8 +15,8 @@ impl From for Precursor { } } -impl From for MS2Spectrum { - fn from(spectrum: timsrust::ms_data::Spectrum) -> Self { +impl From for MS2Spectrum { + fn from(spectrum: timsrust::Spectrum) -> Self { MS2Spectrum::new( spectrum.index.to_string(), spectrum.mz_values.iter().map(|mz| *mz as f32).collect(), @@ -25,7 +25,7 @@ impl From for MS2Spectrum { .iter() .map(|intensity| *intensity as f32) .collect(), - Some(Precursor::from(spectrum.precursor)), + spectrum.precursor.map(Precursor::from), ) } } @@ -34,35 +34,36 @@ impl From for MS2Spectrum { pub fn parse_precursor_info( spectrum_path: &str, ) -> Result, std::io::Error> { - let reader = timsrust::FileReader::new(spectrum_path) + let reader = timsrust::readers::SpectrumReader::new(spectrum_path) .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; - Ok(reader - .read_all_spectra() + let spectra = reader + .get_all() .into_iter() - .filter(|spectrum| { - matches!( - spectrum.precursor, - timsrust::ms_data::Precursor { .. } - ) - }) - .map(|spectrum| { - ( - spectrum.index.to_string(), - Precursor::from(spectrum.precursor), - ) + .collect::, _>>() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; + + let precursor_info = spectra + .into_iter() + .filter_map(|spectrum| match spectrum.precursor { + Some(precursor) => Some((spectrum.index.to_string(), Precursor::from(precursor))), + None => None, }) - .collect::>()) + .collect::>(); + + Ok(precursor_info) } /// Read MS2 spectra from spectrum files with timsrust pub fn read_ms2_spectra(spectrum_path: &str) -> Result, std::io::Error> { - let reader = timsrust::FileReader::new(spectrum_path) + let reader = timsrust::readers::SpectrumReader::new(spectrum_path) .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; - Ok(reader - .read_all_spectra() + let spectra = reader + .get_all() .into_iter() - .map(MS2Spectrum::from) - .collect()) + .collect::, _>>() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; + + Ok(spectra.into_iter().map(MS2Spectrum::from).collect()) } From f8782144433d8cb61e573534bfe465dbf2b3af6a Mon Sep 17 00:00:00 2001 From: Ralf Gabriels Date: Wed, 15 Jan 2025 11:12:54 +0100 Subject: [PATCH 3/4] Cargo.toml: Keep dependencies in one list Co-authored-by: Kevin Velghe --- Cargo.toml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8449a1c..bd9ec92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,6 @@ default = ["thermo"] thermo = ["mzdata/thermo"] [dependencies] +pyo3 = { version = "0.23.3", features = ["anyhow"] } mzdata = "0.39.0" timsrust = "0.4.1" - -[dependencies.pyo3] -version = "0.23.3" -features = ["anyhow"] From 1cce4e567b02c5336abc1e94fd8fd6266bba1db5 Mon Sep 17 00:00:00 2001 From: Ralf Gabriels Date: Wed, 15 Jan 2025 11:13:21 +0100 Subject: [PATCH 4/4] Update src/parse_mzdata.rs Co-authored-by: Kevin Velghe --- src/parse_mzdata.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parse_mzdata.rs b/src/parse_mzdata.rs index 12975dd..7b340c0 100644 --- a/src/parse_mzdata.rs +++ b/src/parse_mzdata.rs @@ -48,11 +48,7 @@ impl From for MS2Spectrum { pub fn parse_precursor_info( spectrum_path: &str, ) -> Result, std::io::Error> { - let mut reader = MZReader::open_path(spectrum_path)?; - if let MZReader::ThermoRaw(inner) = &mut reader { - inner.set_centroiding(true); - } - + let reader = MZReader::open_path(spectrum_path)?; Ok(reader .filter(|spectrum| spectrum.description.ms_level == 2) .filter_map(|spectrum| {