Skip to content

Commit 0bcf835

Browse files
committed
feature: add attribute to hold data processing method references to DataArray instances.
1 parent ccc12d8 commit 0bcf835

File tree

5 files changed

+122
-1
lines changed

5 files changed

+122
-1
lines changed

src/io/mzml/async_reader.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,9 @@ impl<
290290
let mut reader = Reader::from_reader(&mut self.handle);
291291
reader.trim_text(true);
292292
accumulator.instrument_id_map = Some(&mut self.instrument_id_map);
293+
if let Some(val) = self.run.default_data_processing_id.as_ref() {
294+
accumulator.set_run_data_processing(Some(val.clone().into()));
295+
}
293296
let mut offset: usize = 0;
294297
loop {
295298
let event = reader.read_event_into_async(&mut self.buffer).await;

src/io/mzml/reader.rs

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,46 @@ pub trait SpectrumBuilding<'a, C: CentroidLike, D: DeconvolutedCentroidLike, S:
5858
fn isolation_window_mut(&mut self) -> &mut IsolationWindow;
5959
/// Get the last scan window being constructed.
6060
fn scan_window_mut(&mut self) -> &mut ScanWindow;
61+
62+
/// Get the current [`SelectedIon`] being built.
6163
fn selected_ion_mut(&mut self) -> &mut SelectedIon;
64+
65+
/// Add a new [`SelectedIon`] to the stack for the current [`Precursor`].
66+
/// This will be the current [`SelectedIon`].
6267
fn new_selected_ion(&mut self) -> &mut SelectedIon;
68+
69+
/// Add a new [`Precursor`] to the stack. This will be the current [`Precursor`].
6370
fn new_precursor_mut(&mut self) -> &mut Precursor;
71+
72+
/// Get the current [`Precursor`] being built.
6473
fn precursor_mut(&mut self) -> &mut Precursor;
74+
75+
/// Get the current [`DataArray`] being built. This may be an empty instance if
76+
/// if there is no array being built currently.
6577
fn current_array_mut(&mut self) -> &mut DataArray;
78+
6679
/// Move all the data into the provided `spectrum` reference
6780
fn into_spectrum(self, spectrum: &mut S);
6881

82+
/// Optionally set the global data processing identifier for the run to be used if
83+
/// a data processing reference isn't specified locally.
84+
///
85+
/// This is a no-op if not explicitly implemented.
86+
fn set_run_data_processing(&mut self, _identifier: Option<Box<str>>) {}
87+
88+
/// Move all the data into the provided `chromatogram` reference
6989
fn into_chromatogram(self, chromatogram: &mut Chromatogram);
7090

91+
/// Put a parameter-like instance into the current top-level instance
7192
fn fill_spectrum<P: ParamLike + Into<Param> + ParamValue>(&mut self, param: P);
7293

94+
/// Set the compression method for the current [`DataArray`]
7395
fn set_current_compressiion(&mut self, compression: BinaryCompressionType) {
7496
trace!("Setting current compression method for {:?} to {compression:?}", self.current_array_mut().name());
7597
self.current_array_mut().compression = compression;
7698
}
7799

100+
/// Put a parameter-like instance into the current [`DataArray`]
78101
fn fill_binary_data_array<P: ParamLike + Into<Param> + ParamValue>(&mut self, param: P) {
79102
if param.is_ms() {
80103
match param.accession().unwrap() {
@@ -229,6 +252,7 @@ pub trait SpectrumBuilding<'a, C: CentroidLike, D: DeconvolutedCentroidLike, S:
229252
}
230253
}
231254

255+
/// Put a parameter-like instance into the current [`SelectedIon`]
232256
fn fill_selected_ion(&mut self, param: Param) {
233257
match param.name.as_ref() {
234258
"selected ion m/z" => {
@@ -248,6 +272,7 @@ pub trait SpectrumBuilding<'a, C: CentroidLike, D: DeconvolutedCentroidLike, S:
248272
};
249273
}
250274

275+
/// Put a parameter-like instance into the current [`IsolationWindow`]
251276
fn fill_isolation_window(&mut self, param: Param) {
252277
let window = self.isolation_window_mut();
253278
match param.name.as_ref() {
@@ -324,6 +349,7 @@ pub trait SpectrumBuilding<'a, C: CentroidLike, D: DeconvolutedCentroidLike, S:
324349
}
325350
}
326351

352+
/// Put a parameter-like instance into the current [`ScanWindow`]
327353
fn fill_scan_window(&mut self, param: Param) {
328354
let window = self.scan_window_mut();
329355
match param.name.as_ref() {
@@ -355,7 +381,10 @@ macro_rules! xml_error {
355381
const BUFFER_SIZE: usize = 10000;
356382

357383
/// An accumulator for the attributes of a spectrum as it is read from an
358-
/// mzML document
384+
/// mzML document.
385+
///
386+
/// While this type is public, it is unnecessary for most users. Instead
387+
/// just use [`MzMLReaderType::read_next`].
359388
pub struct MzMLSpectrumBuilder<
360389
'a,
361390
C: CentroidLike = CentroidPeak,
@@ -376,6 +405,8 @@ pub struct MzMLSpectrumBuilder<
376405
pub has_precursor: bool,
377406
pub detail_level: DetailLevel,
378407
pub instrument_id_map: Option<&'a mut IncrementingIdMap>,
408+
pub run_level_data_processing: Option<Box<str>>,
409+
pub spectrum_data_processing_ref: Option<Box<str>>,
379410
entry_type: EntryType,
380411
centroid_type: PhantomData<C>,
381412
deconvoluted_type: PhantomData<D>,
@@ -397,6 +428,8 @@ impl<C: CentroidLike, D: DeconvolutedCentroidLike> Default for MzMLSpectrumBuild
397428
has_precursor: Default::default(),
398429
detail_level: Default::default(),
399430
instrument_id_map: Default::default(),
431+
run_level_data_processing: None,
432+
spectrum_data_processing_ref: None,
400433
entry_type: Default::default(),
401434
centroid_type: PhantomData,
402435
deconvoluted_type: PhantomData,
@@ -410,6 +443,10 @@ impl<C: CentroidLike, D: DeconvolutedCentroidLike> CVParamParse for MzMLSpectrum
410443
impl<'inner, C: CentroidLike, D: DeconvolutedCentroidLike>
411444
SpectrumBuilding<'inner, C, D, MultiLayerSpectrum<C, D>> for MzMLSpectrumBuilder<'inner, C, D>
412445
{
446+
fn set_run_data_processing(&mut self, identifier: Option<Box<str>>) {
447+
self.run_level_data_processing = identifier;
448+
}
449+
413450
fn isolation_window_mut(&mut self) -> &mut IsolationWindow {
414451
&mut self.precursor_mut().isolation_window
415452
}
@@ -542,6 +579,10 @@ impl<
542579
Self::with_detail_level(DetailLevel::Full)
543580
}
544581

582+
pub fn set_run_data_processing(&mut self, identifier: Option<Box<str>>) {
583+
self.run_level_data_processing = identifier;
584+
}
585+
545586
pub fn with_detail_level(detail_level: DetailLevel) -> MzMLSpectrumBuilder<'inner, C, D> {
546587
Self {
547588
detail_level,
@@ -707,6 +748,10 @@ impl<C: CentroidLike + BuildFromArrayMap, D: DeconvolutedCentroidLike + BuildFro
707748
.expect("Failed to parse index");
708749
trace!("Stored spectrum index = {}", self.index);
709750
}
751+
b"dataProcessingRef" => {
752+
let ident: Box<str> = String::from_utf8_lossy(&attr.value).into();
753+
self.spectrum_data_processing_ref = Some(ident);
754+
}
710755
_ => {}
711756
},
712757
Err(msg) => {
@@ -799,6 +844,38 @@ impl<C: CentroidLike + BuildFromArrayMap, D: DeconvolutedCentroidLike + BuildFro
799844
return Ok(MzMLParserState::BinaryDataArrayList);
800845
}
801846
b"binaryDataArray" => {
847+
let mut dp_set = false;
848+
for attr_parsed in event.attributes() {
849+
match attr_parsed {
850+
Ok(attr) => {
851+
match attr.key.as_ref() {
852+
b"dataProcessingRef" => {
853+
match attr.unescape_value() {
854+
Ok(v) => {
855+
self.current_array.set_data_processing_reference(Some(v.into()));
856+
dp_set = true;
857+
break;
858+
},
859+
Err(msg) => return Err(self.handle_xml_error(msg.into(), state))
860+
}
861+
}
862+
_ => {}
863+
}
864+
},
865+
Err(msg) => {
866+
return Err(self.handle_xml_error(msg.into(), state));
867+
},
868+
}
869+
}
870+
if !dp_set {
871+
if let Some(dp_ref) = self.spectrum_data_processing_ref.as_ref() {
872+
self.current_array.set_data_processing_reference(Some(dp_ref.clone()));
873+
}
874+
else if let Some(dp_ref) = self.run_level_data_processing.as_ref() {
875+
self.current_array.set_data_processing_reference(Some(dp_ref.clone()));
876+
}
877+
}
878+
802879
return Ok(MzMLParserState::BinaryDataArray);
803880
}
804881
b"binary" => {
@@ -1317,6 +1394,7 @@ impl<
13171394
let mut reader = Reader::from_reader(&mut self.handle);
13181395
reader.trim_text(true);
13191396
accumulator = accumulator.borrow_instrument_configuration(&mut self.instrument_id_map);
1397+
accumulator.set_run_data_processing(self.run.default_data_processing_id.clone().map(|v| v.into_boxed_str()));
13201398
let mut offset: usize = 0;
13211399

13221400
macro_rules! err_state {

src/io/mzml/writer.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,6 +1752,13 @@ where
17521752

17531753
let encoded_len = encoded_array.len().to_string();
17541754
attrib!("encodedLength", encoded_len, outer);
1755+
if let Some(dp_id) = array.data_processing_reference() {
1756+
if let Some(dp_global) = self.run.default_data_processing_id.as_deref() {
1757+
if dp_id.as_ref() != dp_global {
1758+
attrib!("dataProcessingRef", dp_id, outer);
1759+
}
1760+
}
1761+
}
17551762
let array_len = array.data_len()?;
17561763
if array_len != default_array_len {
17571764
let array_len = array_len.to_string();

src/spectrum/bindata/array.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pub struct DataArray {
5252
pub params: Option<Box<ParamList>>,
5353
pub unit: Unit,
5454
item_count: Option<usize>,
55+
data_processing_reference: Option<Box<str>>,
5556
}
5657

5758
impl core::fmt::Debug for DataArray {
@@ -63,6 +64,7 @@ impl core::fmt::Debug for DataArray {
6364
.field("compression", &self.compression)
6465
.field("params", &self.params)
6566
.field("unit", &self.unit)
67+
.field("data_processing_ref", &self.data_processing_reference)
6668
.finish()
6769
}
6870
}
@@ -890,6 +892,16 @@ impl<'transient, 'lifespan: 'transient> DataArray {
890892
pub fn raw_len(&self) -> usize {
891893
self.data.len()
892894
}
895+
896+
/// Get the identifier referencing a [`DataProcessing`](crate::meta::DataProcessing)
897+
pub fn data_processing_reference(&self) -> Option<&Box<str>> {
898+
self.data_processing_reference.as_ref()
899+
}
900+
901+
/// Set the identifier referencing a [`DataProcessing`](crate::meta::DataProcessing)
902+
pub fn set_data_processing_reference(&mut self, data_processing_reference: Option<Box<str>>) {
903+
self.data_processing_reference = data_processing_reference;
904+
}
893905
}
894906

895907
impl<'transient, 'lifespan: 'transient> ByteArrayView<'transient, 'lifespan> for DataArray {
@@ -915,6 +927,10 @@ impl<'transient, 'lifespan: 'transient> ByteArrayView<'transient, 'lifespan> for
915927
self.unit
916928
}
917929

930+
fn data_processing_reference(&self) -> Option<&Box<str>> {
931+
self.data_processing_reference()
932+
}
933+
918934
fn name(&self) -> &ArrayType {
919935
&self.name
920936
}
@@ -928,6 +944,10 @@ impl<'transient, 'lifespan: 'transient> ByteArrayViewMut<'transient, 'lifespan>
928944
fn unit_mut(&mut self) -> &mut Unit {
929945
&mut self.unit
930946
}
947+
948+
fn set_data_processing_reference(&mut self, data_processing_reference: Option<Box<str>>) {
949+
self.set_data_processing_reference(data_processing_reference);
950+
}
931951
}
932952

933953
impl_param_described_deferred!(DataArray);
@@ -975,6 +995,10 @@ impl<'transient, 'lifespan: 'transient> ByteArrayView<'transient, 'lifespan>
975995
fn name(&self) -> &ArrayType {
976996
self.source.name()
977997
}
998+
999+
fn data_processing_reference(&self) -> Option<&Box<str>> {
1000+
self.source.data_processing_reference()
1001+
}
9781002
}
9791003

9801004
#[cfg(test)]

src/spectrum/bindata/traits.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ pub trait ByteArrayView<'transient, 'lifespan: 'transient> {
7979
/// The unit of measurement each data point is in
8080
fn unit(&self) -> Unit;
8181

82+
/// Get the identifier referencing a [`DataProcessing`](crate::meta::DataProcessing)
83+
fn data_processing_reference(&self) -> Option<&Box<str>> {
84+
None
85+
}
86+
8287
fn to_f32(&'lifespan self) -> Result<Cow<'transient, [f32]>, ArrayRetrievalError> {
8388
type D = f32;
8489
match self.dtype() {
@@ -224,6 +229,10 @@ pub trait ByteArrayViewMut<'transient, 'lifespan: 'transient>:
224229
let view = self.view_mut()?;
225230
Self::coerce_from_mut(view)
226231
}
232+
233+
#[allow(unused)]
234+
/// Set the identifier referencing a [`DataProcessing`](crate::meta::DataProcessing)
235+
fn set_data_processing_reference(&mut self, data_processing_reference: Option<Box<str>>) {}
227236
}
228237

229238
#[derive(Debug)]

0 commit comments

Comments
 (0)