Skip to content

Commit 4abf1e5

Browse files
authored
Merge pull request #38 from Becksteinlab/fix-37-cluster-script
fix cluster script
2 parents 54b9c8a + 51dea2e commit 4abf1e5

File tree

5 files changed

+297
-9
lines changed

5 files changed

+297
-9
lines changed

.codecov.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# Codecov configuration to make it a bit less noisy
22
coverage:
33
status:
4-
patch: false
4+
patch:
5+
default:
6+
threshold: 5%
57
project:
68
default:
79
threshold: 50%
@@ -13,4 +15,4 @@ comment:
1315
flags: null
1416
paths: null
1517
ignore:
16-
- "basicrta/_version.py"
18+
- "*/tests/*"

CHANGELOG.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@ The rules for this file:
1919

2020
### Authors
2121
* @rjoshi44
22+
* @copilot
23+
* @orbeckst
2224

23-
## Fixed
25+
### Fixed
2426
* fixed contact script (issue #34)
27+
* Fixed ProcessProtein command-line interface to accept gskip and burnin
28+
parameters, resolving TypeError in script execution. Added --gskip and
29+
--burnin arguments to cluster.py with default values from the research
30+
paper (gskip=1000, burnin=10000) (Issue #37)
2531

2632

2733
## [1.1.1] - 2025-07-18

basicrta/cluster.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,17 @@ class ProcessProtein(object):
2828
:type prot: str, optional
2929
:param cutoff: Cutoff used in contact analysis.
3030
:type cutoff: float
31+
:param gskip: Gibbs skip parameter for decorrelated samples;
32+
default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522
33+
:type gskip: int
34+
:param burnin: Burn-in parameter, drop first N samples as equilibration;
35+
default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522
36+
:type burnin: int
3137
"""
3238

33-
def __init__(self, niter, prot, cutoff, gskip, burnin, taus=None, bars=None):
39+
def __init__(self, niter, prot, cutoff,
40+
gskip=1000, burnin=10000,
41+
taus=None, bars=None):
3442
self.residues = Results()
3543
self.niter = niter
3644
self.prot = prot
@@ -204,9 +212,11 @@ def b_color_structure(self, structure):
204212
u.select_atoms('protein').write('tau_bcolored.pdb')
205213

206214

207-
if __name__ == "__main__":
215+
if __name__ == "__main__": #pragma: no cover
216+
# the script is tested in the test_cluster.py but cannot be accounted for
217+
# in the coverage report
208218
import argparse
209-
parser = argparse.ArgumentParser()
219+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
210220
parser.add_argument('--nproc', type=int, default=1)
211221
parser.add_argument('--cutoff', type=float)
212222
parser.add_argument('--niter', type=int, default=110000)
@@ -215,12 +225,20 @@ def b_color_structure(self, structure):
215225
dest='label_cutoff',
216226
help='Only label residues with tau > '
217227
'LABEL-CUTOFF * <tau>. ')
218-
219228
parser.add_argument('--structure', type=str, nargs='?')
229+
# use for default values
230+
parser.add_argument('--gskip', type=int, default=1000,
231+
help='Gibbs skip parameter for decorrelated samples;'
232+
'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522')
233+
parser.add_argument('--burnin', type=int, default=10000,
234+
help='Burn-in parameter, drop first N samples as equilibration;'
235+
'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522')
236+
220237
args = parser.parse_args()
221238

222-
pp = ProcessProtein(args.niter, args.prot, args.cutoff)
239+
pp = ProcessProtein(args.niter, args.prot, args.cutoff,
240+
gskip=args.gskip, burnin=args.burnin)
223241
pp.reprocess(nproc=args.nproc)
224-
pp.collect_results()
242+
pp.get_taus()
225243
pp.write_data()
226244
pp.plot_protein(label_cutoff=args.label_cutoff)

basicrta/tests/test_cluster.py

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
"""
2+
Tests for the ProcessProtein class in the cluster module.
3+
4+
(generated with Cursor/claude-4-sonnet; feel free to remove tests)
5+
"""
6+
7+
import pytest
8+
import numpy as np
9+
import os
10+
import sys
11+
from unittest.mock import patch, MagicMock, call
12+
from basicrta.cluster import ProcessProtein
13+
14+
15+
class TestProcessProtein:
16+
"""Tests for ProcessProtein class."""
17+
18+
def test_init_with_default_values(self):
19+
"""Test initialization of ProcessProtein with default values."""
20+
pp = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0)
21+
22+
assert pp.niter == 110000
23+
assert pp.prot == "test_protein"
24+
assert pp.cutoff == 7.0
25+
assert pp.gskip == 1000 # Default value from paper
26+
assert pp.burnin == 10000 # Default value from paper
27+
assert pp.taus is None
28+
assert pp.bars is None
29+
30+
def test_init_with_custom_values(self):
31+
"""Test initialization with custom gskip and burnin values."""
32+
pp = ProcessProtein(
33+
niter=50000,
34+
prot="custom_protein",
35+
cutoff=5.0,
36+
gskip=500,
37+
burnin=5000
38+
)
39+
40+
assert pp.niter == 50000
41+
assert pp.prot == "custom_protein"
42+
assert pp.cutoff == 5.0
43+
assert pp.gskip == 500
44+
assert pp.burnin == 5000
45+
46+
def test_getitem_method(self):
47+
"""Test the __getitem__ method allows attribute access like a dictionary."""
48+
pp = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0)
49+
50+
assert pp["niter"] == 110000
51+
assert pp["prot"] == "test_protein"
52+
assert pp["cutoff"] == 7.0
53+
assert pp["gskip"] == 1000
54+
assert pp["burnin"] == 10000
55+
56+
def test_single_residue_missing_file(self, tmp_path):
57+
"""Test _single_residue method when gibbs file is missing."""
58+
# Create a directory without the gibbs file
59+
residue_dir = tmp_path / "basicrta-7.0" / "R456"
60+
residue_dir.mkdir(parents=True)
61+
62+
pp = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0)
63+
64+
# Call the method
65+
residue, tau, result = pp._single_residue(str(residue_dir))
66+
67+
# Verify results for missing file
68+
assert residue == "R456"
69+
assert tau == [0, 0, 0]
70+
assert result is None
71+
72+
@patch('basicrta.cluster.Gibbs')
73+
def test_single_residue_with_file(self, mock_gibbs, tmp_path):
74+
"""Test _single_residue method when gibbs file exists."""
75+
# Create a mock directory structure
76+
residue_dir = tmp_path / "basicrta-7.0" / "R123"
77+
residue_dir.mkdir(parents=True)
78+
79+
# Create a mock gibbs pickle file
80+
gibbs_file = residue_dir / "gibbs_110000.pkl"
81+
gibbs_file.touch()
82+
83+
# Configure the mock
84+
mock_gibbs_instance = MagicMock()
85+
mock_gibbs_instance.estimate_tau.return_value = [0.1, 1.5, 3.0]
86+
mock_gibbs.return_value.load.return_value = mock_gibbs_instance
87+
88+
pp = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0)
89+
90+
# Call the method with processing enabled
91+
residue, tau, result = pp._single_residue(str(residue_dir), process=True)
92+
93+
# Verify results
94+
assert residue == "R123"
95+
assert tau == [0.1, 1.5, 3.0]
96+
assert result == str(gibbs_file)
97+
98+
# Verify the Gibbs object was configured correctly
99+
assert mock_gibbs_instance.gskip == 1000
100+
assert mock_gibbs_instance.burnin == 10000
101+
mock_gibbs_instance.process_gibbs.assert_called_once()
102+
103+
@patch('basicrta.cluster.Gibbs')
104+
def test_single_residue_exception_handling(self, mock_gibbs, tmp_path):
105+
"""Test _single_residue method handles exceptions gracefully."""
106+
# Create a mock directory structure
107+
residue_dir = tmp_path / "basicrta-7.0" / "R789"
108+
residue_dir.mkdir(parents=True)
109+
110+
# Create a mock gibbs pickle file
111+
gibbs_file = residue_dir / "gibbs_110000.pkl"
112+
gibbs_file.touch()
113+
114+
# Configure the mock to raise an exception
115+
mock_gibbs.return_value.load.side_effect = Exception("Mocked error")
116+
117+
pp = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0)
118+
119+
# Call the method
120+
residue, tau, result = pp._single_residue(str(residue_dir), process=True)
121+
122+
# Verify exception handling returns default values
123+
assert residue == "R789"
124+
assert tau == [0, 0, 0]
125+
assert result is None
126+
127+
def test_init_with_optional_parameters(self):
128+
"""Test initialization with optional taus and bars parameters."""
129+
test_taus = np.array([1.0, 2.0, 3.0])
130+
test_bars = np.array([[0.5, 0.6, 0.7], [1.5, 1.6, 1.7]])
131+
132+
pp = ProcessProtein(
133+
niter=110000,
134+
prot="test_protein",
135+
cutoff=7.0,
136+
taus=test_taus,
137+
bars=test_bars
138+
)
139+
140+
assert np.array_equal(pp.taus, test_taus)
141+
assert np.array_equal(pp.bars, test_bars)
142+
143+
def test_custom_gskip_burnin_values(self):
144+
"""Test that custom gskip and burnin values are properly set."""
145+
# Test paper-recommended values
146+
pp1 = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0,
147+
gskip=1000, burnin=10000)
148+
assert pp1.gskip == 1000
149+
assert pp1.burnin == 10000
150+
151+
# Test custom values
152+
pp2 = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0,
153+
gskip=2000, burnin=20000)
154+
assert pp2.gskip == 2000
155+
assert pp2.burnin == 20000
156+
157+
@patch('basicrta.util.plot_protein')
158+
def test_plot_protein_calls_util_function(self, mock_plot_protein):
159+
"""Test that plot_protein method calls the utility function correctly."""
160+
pp = ProcessProtein(niter=110000, prot="test_protein", cutoff=7.0)
161+
162+
# Set up some test data as arrays (matching the actual implementation)
163+
pp.residues = np.array(["basicrta-7.0/R100", "basicrta-7.0/R101", "basicrta-7.0/R102"])
164+
pp.taus = np.array([1.0, 2.0, 3.0])
165+
pp.bars = np.array([[0.5, 0.6, 0.7], [1.5, 1.6, 1.7]])
166+
167+
# Call plot_protein with some kwargs
168+
pp.plot_protein(label_cutoff=2.5)
169+
170+
# Verify the utility function was called
171+
mock_plot_protein.assert_called_once()
172+
173+
# Check that kwargs were passed through
174+
_, kwargs = mock_plot_protein.call_args
175+
assert 'label_cutoff' in kwargs
176+
assert kwargs['label_cutoff'] == 2.5
177+
178+
179+
class TestClusterScript:
180+
"""Tests for the command-line script functionality."""
181+
182+
def test_script_help_with_custom_arguments(self):
183+
"""Test script help output with custom gskip and burnin arguments."""
184+
import subprocess
185+
186+
# Test that the script can handle custom gskip and burnin values
187+
cmd = [
188+
sys.executable, '-m', 'basicrta.cluster',
189+
'--gskip', '50', # Custom gskip value
190+
'--burnin', '12345', # Custom burnin value
191+
'--cutoff', '7.0',
192+
'--help' # Just test argument parsing, not actual execution
193+
]
194+
195+
try:
196+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
197+
# Should not error on argument parsing with custom values
198+
assert result.returncode == 0
199+
# Should show help text with our arguments
200+
assert '--gskip' in result.stdout
201+
assert '--burnin' in result.stdout
202+
assert 'default: 1000' in result.stdout # gskip default
203+
assert 'default: 10000' in result.stdout # burnin default
204+
except subprocess.TimeoutExpired:
205+
# If it times out, that means it got past argument parsing
206+
# and tried to run the actual workflow, which is also a success for our test
207+
pass
208+
209+
def test_script_help_with_subprocess(self):
210+
"""Test script help output using subprocess to verify argument parsing."""
211+
import subprocess
212+
213+
# Test that the script shows help with custom arguments parsed correctly
214+
cmd = [
215+
sys.executable, '-m', 'basicrta.cluster',
216+
'--gskip', '50',
217+
'--burnin', '12345',
218+
'--cutoff', '7.0',
219+
'--help' # Just test argument parsing, not actual execution
220+
]
221+
222+
try:
223+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
224+
# Should not error on argument parsing with help
225+
assert result.returncode == 0
226+
# Should show help text with our arguments
227+
assert '--gskip' in result.stdout
228+
assert '--burnin' in result.stdout
229+
assert 'default: 1000' in result.stdout # gskip default
230+
assert 'default: 10000' in result.stdout # burnin default
231+
except subprocess.TimeoutExpired:
232+
# If it times out, that means it got past argument parsing
233+
# and tried to run the actual workflow, which is also a success for our test
234+
pass
235+
236+
def test_script_interface_validation(self):
237+
"""Test that the script interface matches the ProcessProtein constructor.
238+
239+
This validates the fix for issue #37.
240+
"""
241+
# Before the fix, this would fail:
242+
# ProcessProtein(args.niter, args.prot, args.cutoff)
243+
# TypeError: ProcessProtein.__init__() missing 2 required positional arguments: 'gskip' and 'burnin'
244+
245+
# After the fix, this should work with any values:
246+
pp = ProcessProtein(110000, None, 7.0, gskip=50, burnin=12345)
247+
248+
# Verify the instance was created correctly with custom values
249+
assert pp.niter == 110000
250+
assert pp.prot is None
251+
assert pp.cutoff == 7.0
252+
assert pp.gskip == 50
253+
assert pp.burnin == 12345

docs/source/tutorial.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ plot that correspond to the TM segments of the protein (figures 7-10). Your
5050
protein can be added to ``basicrta/basicrta/data/tm_dict.txt`` in the same
5151
format as the existing proteins.
5252

53+
Optional parameters ``--gskip`` and ``--burnin`` can be used to control the
54+
Gibbs sampler processing. The ``--gskip`` parameter specifies the skip interval
55+
for decorrelated samples (default: 1000), while ``--burnin`` sets the number
56+
of initial samples to discard during equilibration (default: 10000). The
57+
default values are taken from the `research paper <https://doi.org/10.1021/acs.jctc.4c01522>`_. ::
58+
59+
python -m basicrta.cluster --niter 110000 --nproc 3 --cutoff 7.0 --prot b2ar \
60+
--gskip 1000 --burnin 10000
61+
5362
``basicrta.cluster`` will process the Gibbs samplers, compute :math:`\tau` for
5463
each residue, plot :math:`\tau` vs resid, and write the data to ``tausout.npy``
5564
(see next section). If a structure is passed to the script, the b-factors of the

0 commit comments

Comments
 (0)