Skip to content

Commit 634673a

Browse files
committed
Added some logging
Should be helpful for when the matrix is huge
1 parent b256100 commit 634673a

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

cluster/method/hierarchical.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1616
#
1717

18+
import logging
1819

1920
from cluster.cluster import Cluster
2021
from cluster.method.base import BaseClusterMethod
2122
from cluster.util import median, mean, genmatrix
2223

2324

25+
logger = logging.getLogger(__name__)
26+
27+
2428
class HierarchicalClustering(BaseClusterMethod):
2529
"""
2630
Implementation of the hierarchical clustering method as explained in
@@ -37,15 +41,17 @@ class HierarchicalClustering(BaseClusterMethod):
3741
Note that all of the returned clusters are more that 90 apart
3842
"""
3943

40-
def __init__(self, data, distance_function, linkage='single'):
44+
def __init__(self, data, distance_function, linkage=None):
4145
"""
4246
Constructor
4347
4448
See BaseClusterMethod.__init__ for more details.
4549
"""
50+
if not linkage:
51+
linkage = 'single'
52+
logger.info("Initializing HierarchicalClustering object with linkage method %s",
53+
linkage)
4654
BaseClusterMethod.__init__(self, data, distance_function)
47-
48-
# set the linkage type to single
4955
self.set_linkage_method(linkage)
5056
self.__cluster_created = False
5157

@@ -199,6 +205,7 @@ def cluster(self, matrix=None, level=None, sequence=None):
199205
level - The current level of clustering
200206
sequence - The sequence number of the clustering
201207
"""
208+
logger.info("Performing cluster()")
202209

203210
if matrix is None:
204211
# create level 0, first iteration (sequence)
@@ -247,6 +254,7 @@ def cluster(self, matrix=None, level=None, sequence=None):
247254

248255
# all the data is in one single cluster. We return that and stop
249256
self.__cluster_created = True
257+
logger.info("Call to cluster() is complete")
250258
return
251259

252260
def getlevel(self, threshold):

cluster/util.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1616
#
1717

18+
import logging
19+
20+
21+
logger = logging.getLogger(__name__)
22+
1823

1924
class ClusteringError(Exception):
2025
pass
@@ -100,12 +105,12 @@ def genmatrix(data, combinfunc, symmetric=False, diagonal=None):
100105
will be "0". If this value is set to None, then the
101106
diagonal will be calculated. Default: None
102107
"""
108+
logger.info("Generating matrix for %s items - O(n^2)", len(data))
103109
matrix = []
104-
row_index = 0
105-
for item in data:
110+
for row_index, item in enumerate(data):
111+
logger.debug("Generating row %s", row_index)
106112
row = []
107-
col_index = 0
108-
for item2 in data:
113+
for col_index, item2 in enumerate(data):
109114
if diagonal is not None and col_index == row_index:
110115
# if this is a cell on the diagonal
111116
row.append(diagonal)
@@ -116,9 +121,8 @@ def genmatrix(data, combinfunc, symmetric=False, diagonal=None):
116121
else:
117122
# if this cell is not on the diagonal
118123
row.append(combinfunc(item, item2))
119-
col_index += 1
120124
matrix.append(row)
121-
row_index += 1
125+
logger.info("Matrix generated")
122126
return matrix
123127

124128

0 commit comments

Comments
 (0)