3333try :
3434 import numba as nb
3535 from numba import prange
36+
3637 HAS_NUMBA = True
3738except ImportError :
3839 HAS_NUMBA = False
39-
40- __all__ = [' optimized_histogram' , ' HAS_NUMBA' ]
40+
41+ __all__ = [" optimized_histogram" , " HAS_NUMBA" ]
4142
4243
4344if HAS_NUMBA :
45+
4446 @nb .jit (nopython = True , parallel = True , fastmath = True )
4547 def _histogram_distances_parallel (distances , bins , bin_edges ):
4648 """
4749 Parallel histogram calculation using Numba with efficient parallelization.
48-
50+
4951 Parameters
5052 ----------
5153 distances : numpy.ndarray
@@ -54,7 +56,7 @@ def _histogram_distances_parallel(distances, bins, bin_edges):
5456 Number of histogram bins
5557 bin_edges : numpy.ndarray
5658 Pre-computed bin edges
57-
59+
5860 Returns
5961 -------
6062 numpy.ndarray
@@ -64,19 +66,19 @@ def _histogram_distances_parallel(distances, bins, bin_edges):
6466 bin_width = (bin_edges [- 1 ] - bin_edges [0 ]) / bins
6567 min_val = bin_edges [0 ]
6668 max_val = bin_edges [- 1 ]
67-
69+
6870 # Use chunks to avoid false sharing and improve cache performance
6971 chunk_size = max (1024 , n // (nb .config .NUMBA_NUM_THREADS * 4 ))
7072 n_chunks = (n + chunk_size - 1 ) // chunk_size
71-
73+
7274 # Pre-allocate result array
7375 partial_hists = np .zeros ((n_chunks , bins ), dtype = np .int64 )
74-
76+
7577 # Process chunks in parallel
7678 for chunk_id in prange (n_chunks ):
7779 start = chunk_id * chunk_size
7880 end = min (start + chunk_size , n )
79-
81+
8082 # Local histogram for this chunk
8183 for i in range (start , end ):
8284 dist = distances [i ]
@@ -85,27 +87,26 @@ def _histogram_distances_parallel(distances, bins, bin_edges):
8587 if bin_idx >= bins :
8688 bin_idx = bins - 1
8789 partial_hists [chunk_id , bin_idx ] += 1
88-
90+
8991 # Sum up partial histograms
9092 hist = np .sum (partial_hists , axis = 0 )
91-
92- return hist
9393
94+ return hist
9495
9596 @nb .jit (nopython = True , cache = True , fastmath = True )
9697 def _histogram_distances_serial (distances , bins , bin_edges ):
9798 """
9899 Serial histogram calculation using Numba with optimizations.
99-
100+
100101 Parameters
101102 ----------
102103 distances : numpy.ndarray
103104 1D array of distances to histogram
104105 bins : int
105- Number of histogram bins
106+ Number of histogram bins
106107 bin_edges : numpy.ndarray
107108 Pre-computed bin edges
108-
109+
109110 Returns
110111 -------
111112 numpy.ndarray
@@ -115,26 +116,28 @@ def _histogram_distances_serial(distances, bins, bin_edges):
115116 hist = np .zeros (bins , dtype = np .int64 )
116117 bin_width = (bin_edges [- 1 ] - bin_edges [0 ]) / bins
117118 min_val = bin_edges [0 ]
118-
119+
119120 for i in range (n ):
120121 dist = distances [i ]
121122 if dist >= min_val and dist <= bin_edges [- 1 ]:
122123 bin_idx = int ((dist - min_val ) / bin_width )
123124 if bin_idx >= bins :
124125 bin_idx = bins - 1
125126 hist [bin_idx ] += 1
126-
127+
127128 return hist
128129
129130
130- def optimized_histogram (distances , bins = 75 , range = (0.0 , 15.0 ), use_parallel = None ):
131+ def optimized_histogram (
132+ distances , bins = 75 , range = (0.0 , 15.0 ), use_parallel = None
133+ ):
131134 """
132135 Optimized histogram function for distance calculations.
133-
136+
134137 This function provides a significant performance improvement over numpy.histogram
135138 for distance histogram calculations, particularly useful for RDF analysis.
136139 Performance improvements of 10-15x are typical for large datasets.
137-
140+
138141 Parameters
139142 ----------
140143 distances : numpy.ndarray
@@ -147,56 +150,60 @@ def optimized_histogram(distances, bins=75, range=(0.0, 15.0), use_parallel=None
147150 Whether to use parallel execution. If None (default), automatically
148151 decides based on array size (parallel for >1000 elements).
149152 Requires Numba to be installed for acceleration.
150-
153+
151154 Returns
152155 -------
153156 counts : numpy.ndarray
154157 The histogram counts
155158 edges : numpy.ndarray
156159 The bin edges
157-
160+
158161 Notes
159162 -----
160163 This function requires Numba for acceleration. If Numba is not installed,
161164 it falls back to numpy.histogram with a warning.
162-
165+
163166 The parallel version provides best performance for large arrays (>10000 elements)
164167 and when multiple CPU cores are available. For small arrays, the serial version
165168 may be faster due to lower overhead.
166-
169+
167170 Examples
168171 --------
169172 >>> import numpy as np
170173 >>> from MDAnalysis.lib.histogram_opt import optimized_histogram
171174 >>> distances = np.random.random(10000) * 15.0
172175 >>> hist, edges = optimized_histogram(distances, bins=75, range=(0, 15))
173-
176+
174177 .. versionadded:: 2.10.0
175178 """
176179 if not HAS_NUMBA :
177180 import warnings
178- warnings .warn ("Numba not available, falling back to numpy.histogram. "
179- "Install numba for 10-15x performance improvement." ,
180- RuntimeWarning , stacklevel = 2 )
181+
182+ warnings .warn (
183+ "Numba not available, falling back to numpy.histogram. "
184+ "Install numba for 10-15x performance improvement." ,
185+ RuntimeWarning ,
186+ stacklevel = 2 ,
187+ )
181188 return np .histogram (distances , bins = bins , range = range )
182-
189+
183190 # Create bin edges
184191 edges = np .linspace (range [0 ], range [1 ], bins + 1 )
185-
192+
186193 # Ensure distances is contiguous for optimal performance
187- if not distances .flags [' C_CONTIGUOUS' ]:
194+ if not distances .flags [" C_CONTIGUOUS" ]:
188195 distances = np .ascontiguousarray (distances )
189-
196+
190197 # Auto-decide parallel vs serial if not specified
191198 if use_parallel is None :
192199 use_parallel = len (distances ) > 1000
193-
200+
194201 # Choose implementation based on size and parallelization setting
195202 if use_parallel :
196203 counts = _histogram_distances_parallel (distances , bins , edges )
197204 else :
198205 counts = _histogram_distances_serial (distances , bins , edges )
199-
206+
200207 return counts .astype (np .float64 ), edges
201208
202209
@@ -211,4 +218,4 @@ def optimized_histogram(distances, bins=75, range=(0.0, 15.0), use_parallel=None
211218 del _test_data , _test_edges
212219 except :
213220 # Silently fail if precompilation doesn't work
214- pass
221+ pass
0 commit comments