Skip to content

Commit 3de2f24

Browse files
authored
Max cutoff fix27 (#31)
* fix #31 and #27 * corrected cuttoff attribution MapContacts (#27) * corrected cutoff saved to metadata * distinguish between max_cutoff and cutoff * removed cutoff argument from MapContacts since it is used only in ProcessContacts * changed MapContacts output file to distinguish max_cutoff used * made map_name arg of ProcessContacts mandatory * fixed MapContacts file write/read error for numpy>2 * added data files for testing * added tests * updated docs * updated CHANGELOG
1 parent d6fdece commit 3de2f24

File tree

8 files changed

+2385
-21
lines changed

8 files changed

+2385
-21
lines changed

.github/workflows/gh-ci.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,9 @@ jobs:
103103
conda info
104104
conda list
105105
106-
107106
- name: Run tests
108107
run: |
109-
pytest -n auto -v --cov=basicrta --cov-report=xml --color=yes basicrta/tests/
108+
pytest -n auto -v --cov=basicrta --cov-report=xml --color=yes basicrta/tests/
110109
111110
- name: codecov
112111
if: github.repository == 'becksteinlab/basicrta' && github.event_name != 'schedule'

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ The rules for this file:
3030
and trajectory source tracking (Issue #16)
3131

3232
### Changed
33+
* distinguished max_cutoff from cutoff in contact file metadata (Issues #27 #32)
3334
* package has final paper citation
3435

3536

basicrta/contacts.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,27 @@ class MapContacts(object):
3636
:param frames: List of frames to use in computing contacts (default is
3737
None, meaning all frames are used).
3838
:type frames: list or np.array, optional
39-
:param cutoff: Maximum cutoff to use in computing contacts. A primary
39+
:param max_cutoff: Maximum cutoff to use in computing contacts. A primary
4040
contact map is created upon which multiple cutoffs can be
4141
imposed, i.e. in the case where a proper cutoff is being
4242
determined. This can typically be left at its default value,
4343
unless a greater value is needed (default is 10.0).
44-
:type cutoff: float, optional
44+
:type max_cutoff: float, optional
4545
:param nslices: Number of slices to break the trajectory into for
4646
processing. If device memory is limited, try increasing
4747
`nslices` (default is 100).
4848
:type nslices: int, optional
4949
"""
5050

51-
def __init__(self, u, ag1, ag2, nproc=1, frames=None, max_cutoff=10.0,
52-
nslices=100):
51+
def __init__(self, u, ag1, ag2, nproc=1, frames=None,
52+
max_cutoff=10.0, nslices=100):
5353
self.u, self.nproc = u, nproc
5454
self.ag1, self.ag2 = ag1, ag2
55-
self.cutoff, self.frames, self.nslices = max_cutoff, frames, nslices
55+
self.max_cutoff = max_cutoff
56+
self.frames, self.nslices = frames, nslices
5657

5758
def run(self):
58-
"""Run contact analysis and save to `contacts.pkl`
59+
"""Run contact analysis and save to `contacts_max{max_cutoff}.pkl`
5960
"""
6061
if self.frames is not None:
6162
sliced_frames = np.array_split(self.frames, self.nslices)
@@ -81,7 +82,7 @@ def run(self):
8182
'traj': self.u.trajectory.filename,
8283
'ag1': self.ag1, 'ag2': self.ag2,
8384
'ts': self.u.trajectory.dt/1000,
84-
'cutoff': self.max_cutoff})
85+
'max_cutoff': self.max_cutoff})
8586

8687
contact_map = np.memmap('.tmpmap', mode='w+',
8788
shape=(mapsize, 5), dtype=dtype)
@@ -91,11 +92,11 @@ def run(self):
9192
delimiter=',')
9293
contact_map.flush()
9394

94-
contact_map.dump('contacts.pkl', protocol=5)
95+
contact_map.dump(f'contacts_max{self.max_cutoff}.pkl', protocol=5)
9596
os.remove('.tmpmap')
9697
cfiles = glob.glob('.contacts*')
9798
[os.remove(f) for f in cfiles]
98-
print('\nSaved contacts as "contacts.pkl"')
99+
print(f'\nSaved contacts as "contacts_max{self.max_cutoff}.pkl')
99100

100101
def _run_contacts(self, i, sliced_traj):
101102
from basicrta.util import get_dec
@@ -114,17 +115,17 @@ def _run_contacts(self, i, sliced_traj):
114115
dset = []
115116
b = distances.capped_distance(self.ag1.positions,
116117
self.ag2.positions,
117-
max_cutoff=self.cutoff)
118+
max_cutoff=self.max_cutoff)
118119
pairlist = [(self.ag1.resids[b[0][i, 0]],
119120
self.ag2.resids[b[0][i, 1]]) for i in
120121
range(len(b[0]))]
121122
pairdir = collections.Counter(a for a in pairlist)
122123
lsum = 0
123124
for j in pairdir:
124125
temp = pairdir[j]
125-
dset.append([ts.frame, j[0], j[1],
126-
min(b[1][lsum:lsum+temp]),
127-
np.round(ts.time, dec)/1000]) # convert to ns
126+
dset.append([int(ts.frame), int(j[0]), int(j[1]),
127+
float(min(b[1][lsum:lsum+temp])),
128+
float(np.round(ts.time, dec)/1000)]) # convert to ns
128129
lsum += temp
129130
[f.write(f"{line}".strip('[]') + "\n") for line in dset]
130131
data_len += len(dset)
@@ -134,19 +135,21 @@ def _run_contacts(self, i, sliced_traj):
134135

135136
class ProcessContacts(object):
136137
"""The :class:`ProcessProtein` class takes the primary contact map
137-
(default is `contacts.pkl`) and collects contacts based on a prescribed
138+
(i.e. `contacts_max10.0.pkl`) and collects contacts based on a prescribed
138139
cutoff.
139140
140141
:param cutoff: Collect all contacts between `ag1` and `ag2` within this
141142
value.
142143
:type cutoff: float
144+
:param map_name: Name of primary contact map. The default produced by
145+
MapContacts is `contacts_max10.0.pkl`.
146+
:type map_name: str
143147
:param nproc: Number of processes to use in collecting contacts (default is
144148
1).
145149
:type nproc: int, optional
146-
:param map_name: Name of primary contact map (default is `contacts.pkl`)
147-
:type map_name: str, optional
148150
"""
149-
def __init__(self, cutoff, nproc=1, map_name='contacts.pkl'):
151+
152+
def __init__(self, cutoff, map_name, nproc=1):
150153
self.nproc = nproc
151154
self.map_name = map_name
152155
self.cutoff = cutoff
@@ -160,6 +163,9 @@ def run(self):
160163
memmap = pickle.load(f)
161164
# memmap = np.load(self.map_name, mmap_mode='r')
162165
dtype = memmap.dtype
166+
new_metadata = dtype.metadata.copy()
167+
new_metadata['cutoff'] = self.cutoff
168+
new_dtype = np.dtype(np.float64, metadata=new_metadata)
163169

164170
memmap = memmap[memmap[:, -2] <= self.cutoff]
165171
else:
@@ -182,7 +188,7 @@ def run(self):
182188
bounds = np.concatenate([[0], np.cumsum(lens)]).astype(int)
183189
mapsize = sum(lens)
184190
contact_map = np.memmap('.tmpmap', mode='w+',
185-
shape=(mapsize, 4), dtype=dtype)
191+
shape=(mapsize, 4), dtype=new_dtype)
186192

187193
for i in range(len(lresids)):
188194
contact_map[bounds[i]:bounds[i+1]] = np.load(f'.contacts_{i:04}.'
@@ -384,4 +390,4 @@ def run(self):
384390
if not os.path.exists('contacts.pkl'):
385391
MapContacts(u, ag1, ag2, nproc=nproc, nslices=nslices).run()
386392

387-
ProcessContacts(cutoff, nproc).run()
393+
ProcessContacts(cutoff, nproc, map_name='contacts_max10.0.pkl').run()

0 commit comments

Comments
 (0)