@@ -31,21 +31,34 @@ def _cluster_peaks(mzs, ppm, distype='euclidean', linkmode='centroid'):
31
31
if len (mzs ) == 1 :
32
32
return np .zeros_like (mzs , dtype = int ).reshape ((- 1 , 1 ))
33
33
34
+ outer_mzs = np .add .outer (mzs , mzs )
35
+ np .fill_diagonal (outer_mzs , 0 )
36
+
37
+ # avg_mz_pair = np.divide(outer_mzs, 2)
38
+ outer_mzs /= 2 # inplace operation to reduce memory usage
39
+
40
+ # mdist_mz_pair = squareform(avg_mz_pair)
41
+ mdist_mz_pair = squareform (outer_mzs )
42
+ del outer_mzs # reduce memory use
43
+
34
44
m = np .column_stack ([mzs ])
35
45
mdist = fc .pdist (m , metric = distype )
46
+ del m
36
47
37
- outer_mzs = np .add .outer (mzs , mzs )
38
- np .fill_diagonal (outer_mzs , 0 )
39
- avg_mz_pair = np .divide (outer_mzs , 2 )
40
- mdist_mz_pair = squareform (avg_mz_pair )
41
- relative_errors = np .multiply (mdist_mz_pair , 1e-6 )
48
+ # relative_errors = np.multiply(mdist_mz_pair, 1e-6)
49
+ mdist_mz_pair *= 1e-6 # inplace operation to reduce memory usage
42
50
43
51
with np .errstate (divide = 'ignore' , invalid = 'ignore' ): # using errstate context to avoid seterr side effects
44
- m_mass_tol = np .divide (mdist , relative_errors )
45
- m_mass_tol [np .isnan (m_mass_tol )] = 0.0
46
- z = fc .linkage (m_mass_tol , method = linkmode )
52
+ # m_mass_tol = np.divide(mdist, relative_errors)
53
+ mdist /= mdist_mz_pair # inplace operation to reduce memory usage
54
+ # m_mass_tol[np.isnan(m_mass_tol)] = 0.0
55
+ mdist [np .isnan (mdist )] = 0.0
56
+
57
+ # z = fc.linkage(m_mass_tol, method=linkmode)
58
+ z = fc .linkage (mdist , method = linkmode )
59
+ del mdist , mdist_mz_pair
47
60
48
- # cut tree at ppm threshold & order matches the order of mzs
61
+ # cut tree at ppm threshold
49
62
return cluster .hierarchy .cut_tree (z , height = ppm )
50
63
51
64
0 commit comments