Skip to content

Commit 9eee115

Browse files
committed
minor refactoring
1 parent 84266eb commit 9eee115

File tree

4 files changed

+31
-19
lines changed

4 files changed

+31
-19
lines changed

examples/create_dataset.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from hdxms_datasets.models import (
77
DatasetMetadata,
88
HDXDataSet,
9-
HDXState,
9+
State,
1010
DeuterationType,
1111
Author,
1212
PeptideFormat,
@@ -16,6 +16,7 @@
1616
Publication,
1717
Structure,
1818
)
19+
from hdxms_datasets.utils import verify_sequence
1920

2021
# %%
2122

@@ -76,10 +77,16 @@
7677
),
7778
]
7879

80+
# %%
81+
# %%
82+
# test loading the peptides and verifying the sequence
83+
# by comparing sequences of peptides to the protein state sequence
84+
for peptide in peptides:
85+
verify_sequence(peptide.load(), protein_state.sequence, n_term=protein_state.n_term)
7986

8087
# %%
8188
states = [
82-
HDXState(
89+
State(
8390
name="Tetramer",
8491
description="SecB WT in tetrameric state",
8592
protein_state=protein_state,
@@ -99,7 +106,7 @@
99106
"Y109A",
100107
"T115A",
101108
"S119A",
102-
], # this information is also deducible from comparing sequences
109+
], # this information is also deducible by comparing sequences between states
103110
)
104111

105112
peptides = [
@@ -118,16 +125,22 @@
118125
)
119126
]
120127

128+
# %%
129+
# test loading the peptides and verifying the sequence
130+
# by comparing sequences of peptides to the protein state sequence
131+
for peptide in peptides:
132+
verify_sequence(peptide.load(), protein_state.sequence, n_term=protein_state.n_term)
133+
134+
# %%
135+
121136
states.append(
122-
HDXState(
137+
State(
123138
name="Dimer",
124139
description="SecB mutatant in dimeric state",
125140
protein_state=protein_state,
126141
peptides=peptides,
127142
)
128143
)
129-
states
130-
131144

132145
# %%
133146
pub = Publication(

hdxms_datasets/database.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,14 +207,14 @@ def submit_dataset(
207207

208208

209209
class DataBase:
210-
def __init__(self, database_root: Path | str):
211-
self.database_root = Path(database_root)
212-
self.database_root.mkdir(exist_ok=True, parents=True)
210+
def __init__(self, database_dir: Path | str):
211+
self.database_dir = Path(database_dir)
212+
self.database_dir.mkdir(exist_ok=True, parents=True)
213213

214214
@property
215215
def datasets(self) -> list[str]:
216216
"""List of available datasets in the cache dir"""
217-
return [d.stem for d in self.database_root.iterdir() if self.is_dataset(d)]
217+
return [d.stem for d in self.database_dir.iterdir() if self.is_dataset(d)]
218218

219219
@staticmethod
220220
def is_dataset(path: Path) -> bool:
@@ -225,11 +225,11 @@ def is_dataset(path: Path) -> bool:
225225
return (path / "dataset.json").exists()
226226

227227
def clear_cache(self) -> None:
228-
for dir in self.database_root.iterdir():
228+
for dir in self.database_dir.iterdir():
229229
shutil.rmtree(dir)
230230

231231
def load_dataset(self, dataset_id: str) -> HDXDataSet:
232-
dataset_root = self.database_root / dataset_id
232+
dataset_root = self.database_dir / dataset_id
233233
dataset = HDXDataSet.model_validate_json(
234234
Path(dataset_root, "dataset.json").read_text(),
235235
context={"dataset_root": dataset_root},
@@ -249,8 +249,8 @@ class RemoteDataBase(DataBase):
249249
remote_url: URL of the remote repository (default: DATABASE_URL).
250250
"""
251251

252-
def __init__(self, data_root_path: Path | str, remote_url: str = DATABASE_URL):
253-
super().__init__(data_root_path)
252+
def __init__(self, database_dir: Path | str, remote_url: str = DATABASE_URL):
253+
super().__init__(database_dir)
254254
self.remote_url = remote_url
255255

256256
def get_index(self) -> nw.DataFrame:

hdxms_datasets/loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def read_csv(source: Path | str | IO | bytes) -> nw.DataFrame:
7171
try:
7272
import pandas as pd
7373

74-
return nw.from_native(pd.read_csv(source))
74+
return nw.from_native(pd.read_csv(source)) # type: ignore
7575
except ImportError:
7676
raise ValueError("No suitable backend found for reading file-like objects or bytes.")
7777

hdxms_datasets/process.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@
22

33
from pathlib import Path
44
import warnings
5-
from collections import defaultdict
65
from functools import reduce
76
from operator import and_
8-
from typing import Literal, Optional, TypedDict, Union
9-
7+
from typing import Optional
108
import narwhals as nw
119
from statsmodels.stats.weightstats import DescrStatsW
1210
from uncertainties import Variable, ufloat
@@ -139,9 +137,10 @@ def apply_filters(df, **filters):
139137
return df.filter(f_expr)
140138

141139

140+
@nw.narwhalify
142141
def aggregate_columns(
143142
df: nw.DataFrame, columns: list[str], by: list[str] = ["start", "end", "exposure"]
144-
):
143+
) -> nw.DataFrame:
145144
"""
146145
Aggregate the DataFrame the specified columns by intensity-weighted average.
147146
"""

0 commit comments

Comments
 (0)