|
1 | 1 | # %% |
2 | 2 | """convert v 0.2.x datasets to v 0.3.x dataset""" |
3 | 3 |
|
4 | | -from hdxms_datasets.v2.models import ( |
| 4 | +from hdxms_datasets.database import submit_dataset |
| 5 | +from hdxms_datasets.models import ( |
5 | 6 | Author, |
6 | 7 | HDXDataSet, |
7 | 8 | HDXState, |
|
14 | 15 |
|
15 | 16 | from pathlib import Path |
16 | 17 | import yaml |
17 | | -from hdxms_datasets.v2.migration.v020 import get_peptides |
| 18 | +from hdxms_datasets.migration.v020 import get_peptides |
| 19 | +from hdxms_datasets.verification import verify_dataset |
| 20 | +from hdxms_datasets.view import StructureView |
18 | 21 | # %% |
19 | 22 |
|
20 | 23 | root = Path(__file__).parent |
|
44 | 47 | ) |
45 | 48 |
|
46 | 49 | structure = Structure( |
47 | | - data_file=Path("data/SecA_monomer.pdb"), |
| 50 | + data_file=root / "SecA_monomer.pdb", |
48 | 51 | format="pdb", |
49 | 52 | description="NMR structure with ligand removed in sillico", |
50 | 53 | pdb_id="2VDA", |
51 | 54 | ) |
52 | 55 |
|
53 | 56 | # %% |
54 | | -states = list(hdx_spec["peptides"].keys()) |
| 57 | +StructureView(structure) |
55 | 58 |
|
56 | 59 | # %% |
| 60 | +orig_states = list(hdx_spec["peptides"].keys()) |
| 61 | +# states = list(hdx_spec["peptides"].keys()) |
| 62 | +orig_states |
57 | 63 |
|
| 64 | +# %% |
58 | 65 |
|
59 | 66 | states_mapping = { |
60 | | - "WT ADP": "SecA-WT_ADP", |
61 | | - "Monomer ADP": "SecA-monomer_ADP", |
62 | | - "1-834 ADP": "SecA-1-834_ADP", |
63 | 67 | "WT apo": "SecA-WT_apo", |
64 | 68 | "Monomer apo": "SecA-monomer_apo", |
65 | 69 | "1-834 apo": "SecA-1-834_apo", |
| 70 | + "WT ADP": "SecA-WT_ADP", |
| 71 | + "Monomer ADP": "SecA-monomer_ADP", |
| 72 | + "1-834 ADP": "SecA-1-834_ADP", |
66 | 73 | } |
67 | 74 |
|
68 | | - |
69 | 75 | # %% |
70 | 76 |
|
71 | 77 | # load the seca state data as reference for protein data |
72 | 78 | seca_state_dir = root.parent.parent / "1665149400_SecA_Krishnamurthy" |
73 | 79 | json_str = (seca_state_dir / "dataset.json").read_text() |
74 | 80 |
|
75 | 81 | # %% |
76 | | -ds_state = HDXDataSet.model_validate_json(json_str) |
77 | | -wt_state = ds_state.get_state("SecA-WT_ADP") |
| 82 | +ref_dataset = HDXDataSet.model_validate_json(json_str) |
| 83 | +wt_state = ref_dataset.get_state("SecA-WT_apo") |
78 | 84 | wt_state.protein_state |
79 | 85 | # %% |
80 | 86 |
|
81 | 87 | hdx_states = [] |
82 | | -for state in states: |
83 | | - peptide_spec = hdx_spec["peptides"][state] |
84 | | - peptides = get_peptides(peptide_spec, hdx_spec["data_files"]) |
| 88 | +for state in states_mapping: |
85 | 89 | new_state_name = states_mapping[state] |
86 | 90 |
|
87 | | - ref_state = ds_state.get_state(new_state_name) |
| 91 | + peptide_spec = hdx_spec["peptides"][state] |
| 92 | + peptides = get_peptides(peptide_spec, hdx_spec["data_files"], root_dir=root.parent, chain=["B"]) |
| 93 | + # we only keep the FD control for the WT apo state; other states should use the same one |
| 94 | + if new_state_name != "SecA-WT_apo": |
| 95 | + peptides = [p for p in peptides if p.deuteration_type != "fully_deuterated"] |
| 96 | + print(state) |
| 97 | + for peptide in peptides: |
| 98 | + print("state filter:", peptide.filters["State"]) |
| 99 | + |
| 100 | + ref_state = ref_dataset.get_state(new_state_name) |
88 | 101 |
|
89 | 102 | hdx_state = HDXState( |
90 | | - name=state, |
| 103 | + name=new_state_name, |
91 | 104 | protein_state=ref_state.protein_state, |
92 | 105 | peptides=peptides, |
93 | 106 | ) |
|
96 | 109 |
|
97 | 110 | # %% |
98 | 111 |
|
| 112 | + |
| 113 | +# %% |
99 | 114 | dataset = HDXDataSet( |
100 | | - description="SecA quiescent states dataset", |
| 115 | + description="SecA quiescent states dataset (cluster data)", |
101 | 116 | states=hdx_states, |
102 | 117 | structure=structure, |
103 | 118 | protein_identifiers=protein_info, |
104 | 119 | metadata=metadata, |
105 | 120 | ) |
106 | 121 |
|
107 | | -s = dataset.model_dump_json(indent=2, exclude_none=True) |
108 | | -Path(root.parent / "dataset.json").write_text(s) |
| 122 | +# %% |
| 123 | + |
| 124 | +offending_state = dataset.get_state("SecA-1-834_apo") |
| 125 | +offending_state.peptides |
| 126 | + |
| 127 | +# %% |
| 128 | +verify_dataset(dataset) |
| 129 | +database_dir = root.parent |
| 130 | +# submit the dataset to our database |
| 131 | +success, msg_or_id = submit_dataset(dataset, database_dir) |
| 132 | +if success: |
| 133 | + print(f"Dataset submitted successfully with ID: {msg_or_id}") |
| 134 | +else: |
| 135 | + print(f"Failed to submit dataset: {msg_or_id}") |
109 | 136 |
|
110 | 137 | # %% |
0 commit comments