Skip to content

Commit 1448a25

Browse files
committed
Merge branch 'tim-littlefair-master' into hotfix/1.4.1
Fixes #28
2 parents 6f8acd4 + 90032af commit 1448a25

File tree

3 files changed

+84
-0
lines changed

3 files changed

+84
-0
lines changed

cluster/matrix.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,42 @@
2222

2323
logger = logging.getLogger(__name__)
2424

25+
def _encapsulate_item_for_combinfunc(item):
26+
"""
27+
This function has been extracted in order to
28+
make Github issue #28 easier to investigate.
29+
It replaces the following two lines of code,
30+
which occur twice in method genmatrix, just
31+
before the invocation of combinfunc.
32+
if not hasattr(item, '__iter__') or isinstance(item, tuple):
33+
item = [item]
34+
Logging was added to the original two lines
35+
and shows that the outcome of this snippet
36+
has changed between Python2.7 and Python3.5.
37+
This logging showed that the difference in
38+
outcome consisted of the handling of the builtin
39+
str class, which was encapsulated into a list in
40+
Python2.7 but returned naked in Python3.5.
41+
Adding a test for this specific class to the
42+
set of conditions appears to give correct behaviour
43+
under both versions.
44+
"""
45+
encapsulated_item = None
46+
if (
47+
not hasattr(item, '__iter__') or
48+
isinstance(item, tuple) or
49+
isinstance(item, str)
50+
):
51+
encapsulated_item = [item]
52+
else:
53+
encapsulated_item = item
54+
logging.debug(
55+
"item class:%s encapsulated as:%s ",
56+
item.__class__.__name__,
57+
encapsulated_item.__class__.__name__
58+
)
59+
return encapsulated_item
60+
2561

2662
class Matrix(object):
2763
"""
@@ -123,10 +159,17 @@ def genmatrix(self, num_processes=1):
123159
num_tasks_completed += 1
124160
else:
125161
# Otherwise do it here, in line
162+
"""
126163
if not hasattr(item, '__iter__') or isinstance(item, tuple):
127164
item = [item]
128165
if not hasattr(item2, '__iter__') or isinstance(item2, tuple):
129166
item2 = [item2]
167+
"""
168+
# See the comment in function _encapsulate_item_for_combinfunc
169+
# for details of why the lines above have been replaced
170+
# by function invocations
171+
item = _encapsulate_item_for_combinfunc(item)
172+
item2 = _encapsulate_item_for_combinfunc(item2)
130173
row[col_index] = self.combinfunc(item, item2)
131174

132175
if self.symmetric:

cluster/test/test_hierarchical.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,42 @@ def euclidian_distance(a, b):
231231
result = cl.getlevel(40)
232232
self.assertIsNotNone(result)
233233

234+
class Issue28TestCase(Py23TestCase):
235+
'''
236+
Test case to cover the case where the data consist
237+
of dictionary keys, and the distance function executes
238+
on the values these keys are associated with in the
239+
dictionary, rather than the keys themselves.
240+
241+
Behaviour for this test case differs between Python2.7
242+
and Python3.5: on 2.7 the test behaves as expected,
243+
244+
See Github issue #28.
245+
'''
246+
247+
def testIssue28(self):
248+
"Issue28 (Hierarchical Clustering)"
234249

250+
points1D = {
251+
'p4' : 5, 'p2' : 6, 'p7' : 10,
252+
'p9' : 120, 'p10' : 121, 'p11' : 119,
253+
}
254+
255+
distance_func = lambda a,b : abs(points1D[a]-points1D[b])
256+
cl = HierarchicalClustering(list(points1D.keys()), distance_func)
257+
result = cl.getlevel(20)
258+
self.assertIsNotNone(result)
259+
235260
if __name__ == '__main__':
261+
262+
import logging
263+
236264
suite = unittest.TestSuite((
237265
unittest.makeSuite(HClusterIntegerTestCase),
238266
unittest.makeSuite(HClusterSmallListTestCase),
239267
unittest.makeSuite(HClusterStringTestCase),
268+
unittest.makeSuite(Issue28TestCase),
240269
))
241270

271+
logging.basicConfig(level=logging.DEBUG)
242272
unittest.TextTestRunner(verbosity=2).run(suite)

cluster/test/test_linkage.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,14 @@ def test_average_distance(self):
2929
result = average(self.set_a, self.set_b, self.dist)
3030
expected = 22.5
3131
self.assertEqual(result, expected)
32+
33+
if __name__ == '__main__':
34+
35+
import logging
36+
37+
suite = unittest.TestSuite((
38+
unittest.makeSuite(LinkageMethods),
39+
))
40+
41+
logging.basicConfig(level=logging.DEBUG)
42+
unittest.TextTestRunner(verbosity=2).run(suite)

0 commit comments

Comments
 (0)