Skip to content

Commit 2204635

Browse files
authored
Merge pull request #618 from bigbio/dev
Include the metadata and version of DIANN in the reports from log files
2 parents 9a043e7 + f5f969a commit 2204635

File tree

9 files changed

+168
-3
lines changed

9 files changed

+168
-3
lines changed

.github/workflows/python-app.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jobs:
9898
run: |
9999
wget -nv https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/dia/dia.zip
100100
unzip -d ./dia dia.zip
101-
multiqc --quantms-plugin ./dia --config ./dia/multiqc_config.yml -o ./results_dia
101+
multiqc --quantms-plugin ./dia --config ./dia/multiqc_config.yml -o ./results_dia
102102
- uses: actions/upload-artifact@v4
103103
if: always()
104104
name: Upload results

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pmultiqc supports the following data sources:
5050

5151
3. **[DIA-NN](https://aptila.bio)** result files:
5252
- `report.tsv` or `report.parquet`: DIA-NN main report
53+
- `report.log.txt` or `diannsummary.log`: DIA-NN log
5354
- `*sdrf.tsv`: SDRF-Proteomics (optional)
5455
- `*ms_info.parquet`: mzML statistics after RAW-to-mzML conversion (using **[quantms-utils](https://github.com/bigbio/quantms-utils)**) (optional)
5556

docs/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ pmultiqc supports the following data sources:
7070

7171
### 3. **[DIA-NN](https://aptila.bio)** result files:
7272
- `report.tsv` or `report.parquet`: DIA-NN main report
73+
- `report.log.txt` or `diannsummary.log`: DIA-NN log
7374
- `*sdrf.tsv`: SDRF-Proteomics (optional)
7475
- `*ms_info.parquet`: mzML statistics after RAW-to-mzML conversion (using **[quantms-utils](https://github.com/bigbio/quantms-utils)**) (optional)
7576

pmultiqc/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,12 @@ def pmultiqc_plugin_execution_start():
135135
config.sp, {"pmultiqc/diann_report_parquet": {"fn": "report.parquet", "num_lines": 0}}
136136
)
137137

138+
if "pmultiqc/diann_log_txt" not in config.sp:
139+
config.update_dict(config.sp, {"pmultiqc/diann_log_txt": {"fn": "report.log.txt", "num_lines": 0}})
140+
141+
if "pmultiqc/diann_log" not in config.sp:
142+
config.update_dict(config.sp, {"pmultiqc/diann_log": {"fn": "diannsummary.log", "num_lines": 0}})
143+
138144
if "pmultiqc/maxquant_result" not in config.sp:
139145
config.update_dict(
140146
config.sp, {"pmultiqc/maxquant_result": {"fn": "*.txt", "num_lines": 0}}

pmultiqc/modules/common/dia_utils.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from collections import OrderedDict
66
from sdrf_pipelines.openms.openms import UnimodDatabase
77
from multiqc.plots import table
8+
import os
89

910
from pmultiqc.modules.common.histogram import Histogram
1011
from pmultiqc.modules.common.stats import (
@@ -984,3 +985,77 @@ def dia_sample_level_modifications(df, sdrf_file_df):
984985
mod_plot[f"Sample {str(sample)}"] = mod_plot_dict
985986

986987
return mod_plot
988+
989+
990+
def parse_diann_version(log_file_path):
991+
"""Parse the DIA-NN version from a DIA-NN log file (report.log.txt).
992+
993+
The DIA-NN log file typically starts with a line like:
994+
DIA-NN 1.8.1 (Data-Independent Acquisition by Neural Networks)
995+
996+
Args:
997+
log_file_path: Path to the DIA-NN log file.
998+
999+
Returns:
1000+
Version string (e.g. "1.8.1") or None if not found.
1001+
"""
1002+
version_pattern = re.compile(r"^DIA-NN\s+(\d+(?:\.\d+)*)", re.IGNORECASE)
1003+
try:
1004+
with open(log_file_path, "r", encoding="utf-8", errors="replace") as f:
1005+
for i, line in enumerate(f):
1006+
match = version_pattern.match(line.strip())
1007+
if match:
1008+
return match.group(1)
1009+
# Version line is expected near the top; stop after 20 lines
1010+
if i >= 20:
1011+
break
1012+
except Exception as e:
1013+
log.warning(f"Could not parse DIA-NN version from {log_file_path}: {e}")
1014+
return None
1015+
1016+
1017+
def draw_diann_metadata_table(sub_section, diann_version):
1018+
"""Draw a metadata table showing DIA-NN software version.
1019+
1020+
Args:
1021+
sub_section: The sub-section list to add the table to.
1022+
diann_version: DIA-NN version string to display.
1023+
"""
1024+
if not diann_version:
1025+
return
1026+
1027+
table_data = {
1028+
1: {
1029+
"parameter": "DIA-NN Version",
1030+
"value": diann_version,
1031+
}
1032+
}
1033+
1034+
draw_config = {
1035+
"id": "diann_metadata",
1036+
"title": "DIA-NN Metadata",
1037+
"save_file": False,
1038+
"sort_rows": False,
1039+
"only_defined_headers": True,
1040+
"col1_header": "No.",
1041+
"no_violin": True,
1042+
"save_data_file": False,
1043+
}
1044+
1045+
headers = {
1046+
"parameter": {"title": "Parameter"},
1047+
"value": {"title": "Value"},
1048+
}
1049+
1050+
table_html = table.plot(data=table_data, headers=headers, pconfig=draw_config)
1051+
1052+
add_sub_section(
1053+
sub_section=sub_section,
1054+
plot=table_html,
1055+
order=0,
1056+
description="This table presents the DIA-NN software version used for the analysis.",
1057+
helptext="""
1058+
DIA-NN metadata, extracted from the DIA-NN log file (report.log.txt), shows the
1059+
version of DIA-NN used for data-independent acquisition analysis.
1060+
""",
1061+
)

pmultiqc/modules/diann/diann.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
parse_mzml,
1414
aggregate_general_stats
1515
)
16-
from pmultiqc.modules.common.dia_utils import parse_diann_report
16+
from pmultiqc.modules.common.dia_utils import parse_diann_report, parse_diann_version, draw_diann_metadata_table
1717
from pmultiqc.modules.common.plots.general import draw_exp_design
1818
from pmultiqc.modules.common.plots.id import (
1919
draw_summary_protein_ident_table,
@@ -98,6 +98,20 @@ def get_data(self):
9898
log.error("DIANN report not found. Please check your data!")
9999
return False
100100

101+
# DIA-NN log file for version extraction
102+
self.diann_version = None
103+
for key in ["pmultiqc/diann_log_txt", "pmultiqc/diann_log"]:
104+
for f in self.find_log_files(key, filecontents=False):
105+
log_path = os.path.join(f["root"], f["fn"])
106+
self.diann_version = parse_diann_version(log_path)
107+
108+
if self.diann_version:
109+
log.info(f"DIA-NN version detected: {self.diann_version}")
110+
break
111+
112+
if self.diann_version:
113+
break
114+
101115
(
102116
self.mzml_table,
103117
self.mzml_peaks_ms2_plot,
@@ -133,6 +147,10 @@ def draw_plots(self):
133147
self.cal_num_table_data = {}
134148
self.quantms_modified = {}
135149

150+
# Draw DIA-NN metadata table (version info) in the experiment section
151+
if self.diann_version:
152+
draw_diann_metadata_table(self.sub_sections["experiment"], self.diann_version)
153+
136154
general_stats_data = aggregate_general_stats(
137155
ms1_general_stats=self.ms1_general_stats,
138156
current_sum_by_run=self.current_sum_by_run,

pmultiqc/modules/quantms/quantms.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from . import sparklines
3030

3131
from pmultiqc.modules.base import BasePMultiqcModule
32-
from pmultiqc.modules.common.dia_utils import parse_diann_report
32+
from pmultiqc.modules.common.dia_utils import parse_diann_report, parse_diann_version, draw_diann_metadata_table
3333
from pmultiqc.modules.common.common_utils import (
3434
parse_sdrf,
3535
get_ms_path,
@@ -255,6 +255,15 @@ def get_data(self):
255255
self.diann_report_path = diann_report_path
256256
self.enable_dia = True
257257

258+
# DIA-NN log file for version extraction
259+
self.diann_version = None
260+
for f in self.find_log_files("pmultiqc/diann_log", filecontents=False):
261+
log_path = os.path.join(f["root"], f["fn"])
262+
self.diann_version = parse_diann_version(log_path)
263+
if self.diann_version:
264+
log.info(f"DIA-NN version detected: {self.diann_version}")
265+
break
266+
258267
if not self.enable_dia:
259268
for f in self.find_log_files("pmultiqc/mztab", filecontents=False):
260269
self.out_mztab_path = os.path.join(f["root"], f["fn"])
@@ -316,6 +325,10 @@ def draw_plots(self):
316325
# quantms: DIA
317326
if self.enable_dia:
318327

328+
# Draw DIA-NN metadata table (version info) in the experiment section
329+
if self.diann_version:
330+
draw_diann_metadata_table(self.sub_sections["experiment"], self.diann_version)
331+
319332
(
320333
self.total_protein_quantified,
321334
self.total_peptide_count,
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
DIA-NN 1.8.1 (Data-Independent Acquisition by Neural Networks)
2+
Compiled on Jun 23 2022 at 15:23:37
3+
4+
Thread number set to 1
5+
Library-free mode is not active
6+
Mass accuracy settings detected
7+
Loading FASTA /data/uniprot_human.fasta

tests/test_diann.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Tests for DIA-NN module functionality."""
2+
3+
import os
4+
from pathlib import Path
5+
6+
import pytest
7+
8+
from pmultiqc.modules.common.dia_utils import parse_diann_version
9+
10+
TEST_DATA_DIR = Path(os.path.dirname(__file__)) / "resources" / "diann"
11+
12+
13+
class TestDiann:
14+
"""Tests for DIA-NN module functions."""
15+
16+
def test_parse_diann_version_from_log(self):
17+
"""Test parsing DIA-NN version from log file."""
18+
log_file = TEST_DATA_DIR / "report.log.txt"
19+
assert log_file.exists(), f"Test log file {log_file} does not exist"
20+
21+
version = parse_diann_version(str(log_file))
22+
assert version is not None, "DIA-NN version should be parsed from log file"
23+
assert version == "1.8.1", f"Expected version '1.8.1', got '{version}'"
24+
25+
def test_parse_diann_version_nonexistent_file(self):
26+
"""Test that parse_diann_version returns None for nonexistent file."""
27+
version = parse_diann_version("/nonexistent/path/report.log.txt")
28+
assert version is None, "Should return None for nonexistent file"
29+
30+
def test_parse_diann_version_no_version_line(self, tmp_path):
31+
"""Test that parse_diann_version returns None when no version line is found."""
32+
log_file = tmp_path / "report.log.txt"
33+
log_file.write_text("This log file has no version line.\nAnother line.\n")
34+
35+
version = parse_diann_version(str(log_file))
36+
assert version is None, "Should return None when no version line is present"
37+
38+
def test_parse_diann_version_different_formats(self, tmp_path):
39+
"""Test parsing DIA-NN version with different version string formats."""
40+
log_file = tmp_path / "report.log.txt"
41+
log_file.write_text("DIA-NN 2.0 (Data-Independent Acquisition by Neural Networks)\n")
42+
43+
version = parse_diann_version(str(log_file))
44+
assert version == "2.0", f"Expected version '2.0', got '{version}'"

0 commit comments

Comments
 (0)