Skip to content

Commit b8d5869

Browse files
committed
assume start end column names
1 parent f81fa93 commit b8d5869

File tree

1 file changed

+7
-11
lines changed

1 file changed

+7
-11
lines changed

hdxms_datasets/utils.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,19 +102,19 @@ def verify_sequence(
102102

103103

104104
@nw.narwhalify
105-
def contiguous_peptides(df: IntoFrame, start="start", end="end") -> list[tuple[int, int]]:
105+
def contiguous_peptides(df: IntoFrame) -> list[tuple[int, int]]:
106106
"""
107107
Given a dataframe with 'start' and 'end' columns, each describing a range,
108108
(inclusive intervals), this function returns a list of tuples
109109
representing contiguous regions.
110110
"""
111111
# cast to ensure df is a narwhals DataFrame
112-
df = cast(nw.DataFrame, df).select([start, end]).unique().sort(by=[start, end])
112+
df = cast(nw.DataFrame, df).select(["start", "end"]).unique().sort(by=["start", "end"])
113113

114114
regions = []
115115
current_start, current_end = None, 0
116116

117-
for start_val, end_val in df.select([nw.col(start), nw.col(end)]).iter_rows(named=False):
117+
for start_val, end_val in df.select([nw.col("start"), nw.col("end")]).iter_rows(named=False):
118118
if current_start is None:
119119
# Initialize the first region
120120
current_start, current_end = start_val, end_val
@@ -136,15 +136,13 @@ def contiguous_peptides(df: IntoFrame, start="start", end="end") -> list[tuple[i
136136
@nw.narwhalify
137137
def non_overlapping_peptides(
138138
df: IntoFrame,
139-
start: str = "start",
140-
end: str = "end",
141139
) -> list[tuple[int, int]]:
142140
"""
143141
Given a dataframe with 'start' and 'end' columns, each describing a range,
144142
(inclusive intervals), this function returns a list of tuples
145143
representing non-overlapping peptides.
146144
"""
147-
df = cast(nw.DataFrame, df).select([start, end]).unique().sort(by=[start, end])
145+
df = cast(nw.DataFrame, df).select(["start", "end"]).unique().sort(by=["start", "end"])
148146

149147
regions = df.rows()
150148
out = [regions[0]]
@@ -158,9 +156,7 @@ def non_overlapping_peptides(
158156

159157

160158
@nw.narwhalify
161-
def peptide_redundancy(
162-
df: IntoFrame, start: str = "start", end: str = "end"
163-
) -> tuple[np.ndarray, np.ndarray]:
159+
def peptide_redundancy(df: IntoFrame) -> tuple[np.ndarray, np.ndarray]:
164160
"""
165161
Compute the redundancy of peptides in a DataFrame based on their start and end positions.
166162
Redundancy is defined as the number of peptides overlapping at each position.
@@ -176,8 +172,8 @@ def peptide_redundancy(
176172
- redundancy: An array of redundancy counts for each position in r_number.
177173
178174
"""
179-
df = cast(nw.DataFrame, df).select([start, end]).unique().sort(by=[start, end])
180-
vmin, vmax = df[start][0], df[end][-1]
175+
df = cast(nw.DataFrame, df).select(["start", "end"]).unique().sort(by=["start", "end"])
176+
vmin, vmax = df["start"][0], df["end"][-1]
181177

182178
r_number = np.arange(vmin, vmax + 1, dtype=int)
183179
redundancy = np.zeros_like(r_number, dtype=int)

0 commit comments

Comments
 (0)