Skip to content

Commit 19e675b

Browse files
gitzhangchorbeckstIAlibay
authored
improve performance of InterRDF_s (#5073)
* Fixes #5067 * Refactored bin index calculation in analysis.rdf.InterRDF_s._single_frame to avoid multiple redundant np.histogram calls, improving performance; at least 20x for small systems, potentially >100x for realistic systems. * fixed change entries for PR #4884 on RDF parallelization - corrected version changed dates for PR #4884 in analysis.rdf to 2.10.0 - moved entry in CHANGELOG to 2.10.0 * add benchmark for InterRDF_s * Update CHANGELOG * update AUTHORS --------- Co-authored-by: Oliver Beckstein <[email protected]> Co-authored-by: Irfan Alibay <[email protected]>
1 parent 58b872b commit 19e675b

File tree

4 files changed

+48
-10
lines changed

4 files changed

+48
-10
lines changed

benchmarks/benchmarks/analysis/rdf.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
pass
77

88
try:
9-
from MDAnalysis.analysis.rdf import InterRDF
9+
from MDAnalysis.analysis.rdf import InterRDF, InterRDF_s
1010
except:
1111
pass
1212

1313

1414
class SimpleRdfBench(object):
15-
"""Benchmarks for MDAnalysis.analysis.rdf"""
15+
"""Benchmarks for MDAnalysis.analysis.rdf.InterRDF"""
1616

1717
params = ([20, 75, 200], [[0, 5], [0, 15], [0, 20]], [1, 100, 1000, 10000])
1818

@@ -39,3 +39,31 @@ def time_interrdf(self, nbins, range_val, natoms):
3939
by MDAnalysis.analysis.rdf.InterRDF
4040
"""
4141
self.rdf.run()
42+
43+
class SimpleRdfsBench(object):
44+
"""Benchmarks for MDAnalysis.analysis.rdf.InterRDF_s"""
45+
46+
params = ([20, 75, 200], [[0, 5], [0, 15], [0, 20]], [10, 100], [1, 3, 9])
47+
48+
param_names = ["nbins", "range_val", "natoms", "npairs"]
49+
50+
def setup(self, nbins, range_val, natoms, npairs):
51+
52+
self.sel_str = "name OW"
53+
54+
self.u = MDAnalysis.Universe(TPR, XTC)
55+
56+
try:
57+
self.sel = self.u.select_atoms(self.sel_str)[:natoms]
58+
except AttributeError:
59+
self.sel = self.u.selectAtoms(self.sel_str)[:natoms]
60+
61+
ags = [[self.sel, self.sel]] * npairs
62+
self.rdf_s = InterRDF_s(self.u, ags, nbins=nbins, range=range_val)
63+
64+
65+
def time_interrdfs(self, nbins, range_val, natoms, npairs):
66+
"""Benchmark a full trajectory parse
67+
by MDAnalysis.analysis.rdf.InterRDF_s
68+
"""
69+
self.rdf_s.run()

package/AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ Chronological list of authors
261261
- Marc Schuh
262262
- Sirsha Ganguly
263263
- Amruthesh Thirumalaiswamy
264+
- Ch Zhang
264265

265266
External code
266267
-------------

package/CHANGELOG

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ The rules for this file:
1616
-------------------------------------------------------------------------------
1717
??/??/?? IAlibay, orbeckst, BHM-Bob, TRY-ER, Abdulrahman-PROG, pbuslaev,
1818
yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn, Gareth-elliott,
19-
hmacdope, tylerjereddy, cbouy, talagayev, DrDomenicoMarson, amruthesht,
20-
p-j-smith
19+
hmacdope, tylerjereddy, cbouy, talagayev, DrDomenicoMarson, amruthesht,
20+
p-j-smith, gitzhangch
2121

2222

2323
* 2.10.0
@@ -47,6 +47,9 @@ Fixes
4747
and does not result in an infinite loop (Issue #5112, PR #5113)
4848

4949
Enhancements
50+
* Improved performance of `analysis.rdf.InterRDF_s` > 20-fold
51+
by using vectorized NumPy calculations. (Issue #5067, PR #5073)
52+
* Parallelize `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` (Issue #4675)
5053
* Support for Python 3.14 was added (PR #5121).
5154
* Added support for reading and processing streamed data in `coordinates.base`
5255
with new `StreamFrameIteratorSliced` and `StreamReaderBase` (Issue #4827, PR #4923)
@@ -125,7 +128,6 @@ Enhancements
125128
* Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
126129
* Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
127130
* Added `precision` for XYZWriter (Issue #4775, PR #4771)
128-
* Parallelize `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` (Issue #4675)
129131
* Added `precision` for XYZWriter (Issue #4775, PR #4771)
130132
* Add check and warning for empty (all zero) coordinates in RDKit converter
131133
(PR #4824)

package/MDAnalysis/analysis/rdf.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ class InterRDF(AnalysisBase):
258258
of the `results` attribute of
259259
:class:`~MDAnalysis.analysis.AnalysisBase`.
260260
261-
.. versionchanged:: 2.9.0
261+
.. versionchanged:: 2.10.0
262262
Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
263263
backends; use the new method :meth:`get_supported_backends` to see all
264264
supported backends.
@@ -638,7 +638,7 @@ class InterRDF_s(AnalysisBase):
638638
Instead of `density=True` use `norm='density'`
639639
.. deprecated:: 2.3.0
640640
The `universe` parameter is superflous.
641-
.. versionchanged:: 2.9.0
641+
.. versionchanged:: 2.10.0
642642
Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
643643
backends; use the new method :meth:`get_supported_backends` to see all
644644
supported backends.
@@ -731,9 +731,16 @@ def _single_frame(self):
731731
backend=self.backend,
732732
)
733733

734-
for j, (idx1, idx2) in enumerate(pairs):
735-
count, _ = np.histogram(dist[j], **self.rdf_settings)
736-
self.results.count[i][idx1, idx2, :] += count
734+
# Fast manual implementation for distance histogram (equidistant bins)
735+
nbins = self.rdf_settings["bins"]
736+
rmin, rmax = self.rdf_settings["range"]
737+
bin_indices = (dist - rmin) * nbins / (rmax - rmin)
738+
bin_indices = bin_indices.astype(np.int64)
739+
counts = (bin_indices >= 0) & (bin_indices < nbins)
740+
counts = counts.astype(np.int64)
741+
idx1s = pairs[:, 0]
742+
idx2s = pairs[:, 1]
743+
self.results.count[i][idx1s, idx2s, bin_indices] += counts
737744

738745
if self.norm == "rdf":
739746
self.results.volume_cum += self._ts.volume

0 commit comments

Comments
 (0)