Skip to content

Commit 07ad2d0

Browse files
committed
update conversion script and yaml definitions
1 parent 4cefb47 commit 07ad2d0

File tree

2 files changed

+47
-20
lines changed

2 files changed

+47
-20
lines changed

tests/datasets/1744801204_SecA_cluster_Krishnamurthy/convert/convert.py

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# %%
22
"""convert v 0.2.x datasets to v 0.3.x dataset"""
33

4-
from hdxms_datasets.v2.models import (
4+
from hdxms_datasets.database import submit_dataset
5+
from hdxms_datasets.models import (
56
Author,
67
HDXDataSet,
78
HDXState,
@@ -14,7 +15,9 @@
1415

1516
from pathlib import Path
1617
import yaml
17-
from hdxms_datasets.v2.migration.v020 import get_peptides
18+
from hdxms_datasets.migration.v020 import get_peptides
19+
from hdxms_datasets.verification import verify_dataset
20+
from hdxms_datasets.view import StructureView
1821
# %%
1922

2023
root = Path(__file__).parent
@@ -44,50 +47,60 @@
4447
)
4548

4649
structure = Structure(
47-
data_file=Path("data/SecA_monomer.pdb"),
50+
data_file=root / "SecA_monomer.pdb",
4851
format="pdb",
4952
description="NMR structure with ligand removed in sillico",
5053
pdb_id="2VDA",
5154
)
5255

5356
# %%
54-
states = list(hdx_spec["peptides"].keys())
57+
StructureView(structure)
5558

5659
# %%
60+
orig_states = list(hdx_spec["peptides"].keys())
61+
# states = list(hdx_spec["peptides"].keys())
62+
orig_states
5763

64+
# %%
5865

5966
states_mapping = {
60-
"WT ADP": "SecA-WT_ADP",
61-
"Monomer ADP": "SecA-monomer_ADP",
62-
"1-834 ADP": "SecA-1-834_ADP",
6367
"WT apo": "SecA-WT_apo",
6468
"Monomer apo": "SecA-monomer_apo",
6569
"1-834 apo": "SecA-1-834_apo",
70+
"WT ADP": "SecA-WT_ADP",
71+
"Monomer ADP": "SecA-monomer_ADP",
72+
"1-834 ADP": "SecA-1-834_ADP",
6673
}
6774

68-
6975
# %%
7076

7177
# load the seca state data as reference for protein data
7278
seca_state_dir = root.parent.parent / "1665149400_SecA_Krishnamurthy"
7379
json_str = (seca_state_dir / "dataset.json").read_text()
7480

7581
# %%
76-
ds_state = HDXDataSet.model_validate_json(json_str)
77-
wt_state = ds_state.get_state("SecA-WT_ADP")
82+
ref_dataset = HDXDataSet.model_validate_json(json_str)
83+
wt_state = ref_dataset.get_state("SecA-WT_apo")
7884
wt_state.protein_state
7985
# %%
8086

8187
hdx_states = []
82-
for state in states:
83-
peptide_spec = hdx_spec["peptides"][state]
84-
peptides = get_peptides(peptide_spec, hdx_spec["data_files"])
88+
for state in states_mapping:
8589
new_state_name = states_mapping[state]
8690

87-
ref_state = ds_state.get_state(new_state_name)
91+
peptide_spec = hdx_spec["peptides"][state]
92+
peptides = get_peptides(peptide_spec, hdx_spec["data_files"], root_dir=root.parent, chain=["B"])
93+
# we only keep the FD control for the WT apo state; other states should use the same one
94+
if new_state_name != "SecA-WT_apo":
95+
peptides = [p for p in peptides if p.deuteration_type != "fully_deuterated"]
96+
print(state)
97+
for peptide in peptides:
98+
print("state filter:", peptide.filters["State"])
99+
100+
ref_state = ref_dataset.get_state(new_state_name)
88101

89102
hdx_state = HDXState(
90-
name=state,
103+
name=new_state_name,
91104
protein_state=ref_state.protein_state,
92105
peptides=peptides,
93106
)
@@ -96,15 +109,29 @@
96109

97110
# %%
98111

112+
113+
# %%
99114
dataset = HDXDataSet(
100-
description="SecA quiescent states dataset",
115+
description="SecA quiescent states dataset (cluster data)",
101116
states=hdx_states,
102117
structure=structure,
103118
protein_identifiers=protein_info,
104119
metadata=metadata,
105120
)
106121

107-
s = dataset.model_dump_json(indent=2, exclude_none=True)
108-
Path(root.parent / "dataset.json").write_text(s)
122+
# %%
123+
124+
offending_state = dataset.get_state("SecA-1-834_apo")
125+
offending_state.peptides
126+
127+
# %%
128+
verify_dataset(dataset)
129+
database_dir = root.parent
130+
# submit the dataset to our database
131+
success, msg_or_id = submit_dataset(dataset, database_dir)
132+
if success:
133+
print(f"Dataset submitted successfully with ID: {msg_or_id}")
134+
else:
135+
print(f"Failed to submit dataset: {msg_or_id}")
109136

110137
# %%

tests/datasets/1744801204_SecA_cluster_Krishnamurthy/convert/hdx_spec.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,12 @@ peptides:
9797
non_deuterated:
9898
data_file: SecA_cluster
9999
filters:
100-
State: SecA1-834 apo
100+
State: SecA1-901 wt apo
101101
Exposure: 0
102102
partially_deuterated:
103103
data_file: SecA_cluster
104104
filters:
105-
State: SecA1-834 apo
105+
State: SecA1-901 wt apo
106106
Exposure:
107107
- 0.167
108108
- 0.5

0 commit comments

Comments
 (0)