Skip to content

Commit 891614f

Browse files
authored
Merge pull request #586 from bigbio/dev
Improvements in Big data experiments
2 parents 28956d4 + 5d8e9e0 commit 891614f

File tree

6 files changed

+18017
-25
lines changed

6 files changed

+18017
-25
lines changed

docs/PXD010899/multiqc_report.html

Lines changed: 8996 additions & 0 deletions
Large diffs are not rendered by default.

docs/PXD010899_disable_hoverinfo/multiqc_report.html

Lines changed: 8996 additions & 0 deletions
Large diffs are not rendered by default.

docs/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,8 @@ You can find example reports on the [docs page](https://bigbio.github.io/pmultiq
253253

254254
| Example Type | Description | Link | Dataset Download |
255255
|---|---|---|---|
256-
| Big quantms DIA | Data-independent acquisition | [Big quantms DIA - 165 samples](https://pmultiqc.quantms.org/PXD062383/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD062383_disable_hoverinfo/multiqc_report.html)) | [PXD062383.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD062383.zip) |
256+
| Big LFQ | Label-free quantification | [Big LFQ (1808 runs)](https://pmultiqc.quantms.org/PXD010899/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD010899_disable_hoverinfo/multiqc_report.html)) | [PXD010899.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD010899.zip) |
257+
| Big quantms DIA | Data-independent acquisition | [Big quantms DIA (160 runs)](https://pmultiqc.quantms.org/PXD062383/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD062383_disable_hoverinfo/multiqc_report.html)) | [PXD062383.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD062383.zip) |
257258

258259
## 👥 Contributing
259260

pmultiqc/modules/common/ms/msinfo.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
)
1717

1818

19+
log = get_logger("pmultiqc.modules.common.ms")
20+
21+
1922
class MsInfoReader(BaseParser):
2023
def __init__(
2124
self,
@@ -116,9 +119,11 @@ def parse(self, **_kwargs) -> None:
116119
)
117120

118121
if m_name not in self.identified_spectrum:
119-
raise ValueError(
122+
log.warning(
120123
f"identified_spectrum missing entries for '{m_name}'. Check your mzTab file."
121124
)
125+
continue
126+
122127
identified_spectrum_scan_id = [
123128
spectra_ref_check(spectrum_id)
124129
for spectrum_id in self.identified_spectrum[m_name]

pmultiqc/modules/common/ms/mztab.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from datetime import datetime
44
from pyteomics import mztab
55
import pandas as pd
6-
import os
76
import re
87

98
from multiqc import config
@@ -69,16 +68,14 @@ def parse(self, **_kwargs) -> None:
6968
lambda x: 1 if self.dis_decoy(x["accession"]) == "DECOY" else 0, axis=1
7069
)
7170
# map to spectrum file name in experimental design file
72-
psm["stand_spectra_ref"] = psm.apply(
73-
lambda x: os.path.basename(meta_data[x.spectra_ref.split(":")[0] + "-location"])
74-
+ ":"
75-
+ x.spectra_ref.split(":")[1],
76-
axis=1,
77-
)
78-
psm["filename"] = psm.apply(
79-
lambda x: file_prefix(meta_data[x.spectra_ref.split(":")[0] + "-location"]),
80-
axis=1,
81-
)
71+
spectra_ref_parts = psm["spectra_ref"].str.split(":", n=1, expand=True)
72+
spectra_ref_key = spectra_ref_parts[0] + "-location"
73+
spectra_ref_path = spectra_ref_key.map(meta_data)
74+
75+
psm["stand_spectra_ref"] = spectra_ref_path.map(file_prefix) + ":" + spectra_ref_parts[1]
76+
psm["filename"] = spectra_ref_path.map(file_prefix)
77+
del spectra_ref_parts, spectra_ref_key, spectra_ref_path
78+
8279
self.ms_with_psm = psm["filename"].unique().tolist()
8380

8481
prot = mztab_data.protein_table

pmultiqc/modules/quantms/quantms.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ def draw_plots(self):
375375
# quantms: LFQ or TMT
376376
else:
377377

378-
if not config.kwargs["ignored_idxml"]:
378+
if not config.kwargs["ignored_idxml"] and self.idx_paths:
379379
self.parse_idxml(self.mzml_table)
380380
self.cal_heat_map_score()
381381

@@ -426,7 +426,7 @@ def draw_plots(self):
426426
header_cols=spectrum_tracking_headers
427427
)
428428

429-
if not config.kwargs["ignored_idxml"]:
429+
if not config.kwargs["ignored_idxml"] and self.idx_paths:
430430
self.draw_search_engine()
431431

432432
draw_precursor_charge_distribution(
@@ -1096,10 +1096,9 @@ def cal_heat_map_score(self):
10961096
pep_df_need_cols = ["accession", "opt_global_cv_MS:1002217_decoy_peptide", "spectra_ref"] + study_variables
10971097
pep_table = pep_table[pep_df_need_cols].copy()
10981098

1099-
pep_table.loc[:, "stand_spectra_ref"] = pep_table.apply(
1100-
lambda x: file_prefix(meta_data[x.spectra_ref.split(":")[0] + "-location"]),
1101-
axis=1,
1102-
)
1099+
spectra_file_map = pep_table["spectra_ref"].str.split(":", n=1).str[0] + "-location"
1100+
pep_table["stand_spectra_ref"] = spectra_file_map.map(meta_data).map(file_prefix)
1101+
del spectra_file_map
11031102

11041103
pep_table["average_intensity"] = pep_table[study_variables].mean(axis=1, skipna=True)
11051104

@@ -1402,7 +1401,6 @@ def get_unimod_modification(modifis):
14021401
psm = psm[psm["opt_global_cv_MS:1002217_decoy_peptide"] == 0].copy()
14031402

14041403
for m, group in psm.groupby("filename"):
1405-
# m = os.path.basename(m)
14061404

14071405
# Modifications
14081406
mod_plot_dict, modified_cat = summarize_modifications(
@@ -2351,11 +2349,10 @@ def aggregate_spectrum_tracking(
23512349
"MS1_Num", "MS2_Num", "MSGF", "Comet", "Sage", "num_quant_psms", "num_quant_peps"
23522350
]
23532351

2354-
for i in header_cols:
2355-
if any([i in v for k, v in mzml_table.items()]):
2356-
pass
2357-
else:
2358-
header_cols.remove(i)
2352+
header_cols = [
2353+
i for i in header_cols
2354+
if any(i in v for v in mzml_table.values())
2355+
]
23592356

23602357
if sdrf_file_df.empty:
23612358

0 commit comments

Comments
 (0)