Merge pull request #586 from bigbio/dev

ypriverol · web-flow · commit 891614fc1b9d · 2026-01-22T06:57:20.000Z
Improvements in Big data experiments
diff --git a/docs/PXD010899/multiqc_report.html b/docs/PXD010899/multiqc_report.html
diff --git a/docs/PXD010899_disable_hoverinfo/multiqc_report.html b/docs/PXD010899_disable_hoverinfo/multiqc_report.html
diff --git a/docs/README.md b/docs/README.md
@@ -253,7 +253,8 @@ You can find example reports on the [docs page](https://bigbio.github.io/pmultiq
 
 | Example Type | Description | Link | Dataset Download |
 |---|---|---|---|
-| Big quantms DIA | Data-independent acquisition | [Big quantms DIA - 165 samples](https://pmultiqc.quantms.org/PXD062383/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD062383_disable_hoverinfo/multiqc_report.html)) | [PXD062383.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD062383.zip) |
+| Big LFQ | Label-free quantification | [Big LFQ (1808 runs)](https://pmultiqc.quantms.org/PXD010899/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD010899_disable_hoverinfo/multiqc_report.html)) | [PXD010899.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD010899.zip) |
+| Big quantms DIA | Data-independent acquisition | [Big quantms DIA (160 runs)](https://pmultiqc.quantms.org/PXD062383/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD062383_disable_hoverinfo/multiqc_report.html)) | [PXD062383.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD062383.zip) |
 
 ## 👥 Contributing
 
diff --git a/pmultiqc/modules/common/ms/msinfo.py b/pmultiqc/modules/common/ms/msinfo.py
@@ -16,6 +16,9 @@
 )
 
 
+log = get_logger("pmultiqc.modules.common.ms")
+
+
 class MsInfoReader(BaseParser):
     def __init__(
         self,
@@ -116,9 +119,11 @@ def parse(self, **_kwargs) -> None:
                     )
 
                 if m_name not in self.identified_spectrum:
-                    raise ValueError(
+                    log.warning(
                         f"identified_spectrum missing entries for '{m_name}'. Check your mzTab file."
                     )
+                    continue
+
                 identified_spectrum_scan_id = [
                     spectra_ref_check(spectrum_id)
                     for spectrum_id in self.identified_spectrum[m_name]
diff --git a/pmultiqc/modules/common/ms/mztab.py b/pmultiqc/modules/common/ms/mztab.py
@@ -3,7 +3,6 @@
 from datetime import datetime
 from pyteomics import mztab
 import pandas as pd
-import os
 import re
 
 from multiqc import config
@@ -69,16 +68,14 @@ def parse(self, **_kwargs) -> None:
                 lambda x: 1 if self.dis_decoy(x["accession"]) == "DECOY" else 0, axis=1
             )
         # map to spectrum file name in experimental design file
-        psm["stand_spectra_ref"] = psm.apply(
-            lambda x: os.path.basename(meta_data[x.spectra_ref.split(":")[0] + "-location"])
-            + ":"
-            + x.spectra_ref.split(":")[1],
-            axis=1,
-        )
-        psm["filename"] = psm.apply(
-            lambda x: file_prefix(meta_data[x.spectra_ref.split(":")[0] + "-location"]),
-            axis=1,
-        )
+        spectra_ref_parts = psm["spectra_ref"].str.split(":", n=1, expand=True)
+        spectra_ref_key = spectra_ref_parts[0] + "-location"
+        spectra_ref_path = spectra_ref_key.map(meta_data)
+
+        psm["stand_spectra_ref"] = spectra_ref_path.map(file_prefix) + ":" + spectra_ref_parts[1]
+        psm["filename"] = spectra_ref_path.map(file_prefix)
+        del spectra_ref_parts, spectra_ref_key, spectra_ref_path
+
         self.ms_with_psm = psm["filename"].unique().tolist()
 
         prot = mztab_data.protein_table
diff --git a/pmultiqc/modules/quantms/quantms.py b/pmultiqc/modules/quantms/quantms.py
@@ -375,7 +375,7 @@ def draw_plots(self):
         # quantms: LFQ or TMT
         else:
 
-            if not config.kwargs["ignored_idxml"]:
+            if not config.kwargs["ignored_idxml"] and self.idx_paths:
                 self.parse_idxml(self.mzml_table)
             self.cal_heat_map_score()
 
@@ -426,7 +426,7 @@ def draw_plots(self):
                 header_cols=spectrum_tracking_headers
             )
 
-            if not config.kwargs["ignored_idxml"]:
+            if not config.kwargs["ignored_idxml"] and self.idx_paths:
                 self.draw_search_engine()
 
             draw_precursor_charge_distribution(
@@ -1096,10 +1096,9 @@ def cal_heat_map_score(self):
             pep_df_need_cols = ["accession", "opt_global_cv_MS:1002217_decoy_peptide", "spectra_ref"] + study_variables
             pep_table = pep_table[pep_df_need_cols].copy()
 
-            pep_table.loc[:, "stand_spectra_ref"] = pep_table.apply(
-                lambda x: file_prefix(meta_data[x.spectra_ref.split(":")[0] + "-location"]),
-                axis=1,
-            )
+            spectra_file_map = pep_table["spectra_ref"].str.split(":", n=1).str[0] + "-location"
+            pep_table["stand_spectra_ref"] = spectra_file_map.map(meta_data).map(file_prefix)
+            del spectra_file_map
 
             pep_table["average_intensity"] = pep_table[study_variables].mean(axis=1, skipna=True)
 
@@ -1402,7 +1401,6 @@ def get_unimod_modification(modifis):
             psm = psm[psm["opt_global_cv_MS:1002217_decoy_peptide"] == 0].copy()
 
         for m, group in psm.groupby("filename"):
-            # m = os.path.basename(m)
 
             # Modifications
             mod_plot_dict, modified_cat = summarize_modifications(
@@ -2351,11 +2349,10 @@ def aggregate_spectrum_tracking(
         "MS1_Num", "MS2_Num", "MSGF", "Comet", "Sage", "num_quant_psms", "num_quant_peps"
     ]
 
-    for i in header_cols:
-        if any([i in v for k, v in mzml_table.items()]):
-            pass
-        else:
-            header_cols.remove(i)
+    header_cols = [
+        i for i in header_cols
+        if any(i in v for v in mzml_table.values())
+    ]
 
     if sdrf_file_df.empty:
 

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,9 @@`
`16`	`16`	`)`
`17`	`17`
`18`	`18`
	`19`	`+log = get_logger("pmultiqc.modules.common.ms")`
	`20`	`+`
	`21`	`+`
`19`	`22`	`class MsInfoReader(BaseParser):`
`20`	`23`	`def __init__(`
`21`	`24`	`self,`
`@@ -116,9 +119,11 @@ def parse(self, **_kwargs) -> None:`
`116`	`119`	`)`
`117`	`120`
`118`	`121`	`if m_name not in self.identified_spectrum:`
`119`		`- raise ValueError(`
	`122`	`+ log.warning(`
`120`	`123`	`f"identified_spectrum missing entries for '{m_name}'. Check your mzTab file."`
`121`	`124`	`)`
	`125`	`+ continue`
	`126`	`+`
`122`	`127`	`identified_spectrum_scan_id = [`
`123`	`128`	`spectra_ref_check(spectrum_id)`
`124`	`129`	`for spectrum_id in self.identified_spectrum[m_name]`