@@ -214,18 +214,18 @@ def append(self, item):
214214 """
215215 self .__items .append (item )
216216
217- def items (self , newItems = None ):
217+ def items (self , new_items = None ):
218218 """
219219 Sets or gets the items of the cluster
220220
221221 PARAMETERS
222- newItems (optional) - if set, the items of the cluster will be
222+ new_items (optional) - if set, the items of the cluster will be
223223 replaced with that argument.
224224 """
225- if newItems is None :
225+ if new_items is None :
226226 return self .__items
227227 else :
228- self .__items = newItems
228+ self .__items = new_items
229229
230230 def fullyflatten (self , * args ):
231231 """
@@ -441,10 +441,10 @@ def __init__(self, data, distance_function, linkage='single'):
441441 BaseClusterMethod .__init__ (self , data , distance_function )
442442
443443 # set the linkage type to single
444- self .setLinkageMethod (linkage )
445- self .__clusterCreated = False
444+ self .set_linkage_method (linkage )
445+ self .__cluster_created = False
446446
447- def setLinkageMethod (self , method ):
447+ def set_linkage_method (self , method ):
448448 """
449449 Sets the method to determine the distance between two clusters.
450450
@@ -453,18 +453,18 @@ def setLinkageMethod(self, method):
453453 'complete', 'average' or 'uclus'
454454 """
455455 if method == 'single' :
456- self .linkage = self .singleLinkageDistance
456+ self .linkage = self .single_linkage_distance
457457 elif method == 'complete' :
458- self .linkage = self .completeLinkageDistance
458+ self .linkage = self .complete_linkage_distance
459459 elif method == 'average' :
460- self .linkage = self .averageLinkageDistance
460+ self .linkage = self .average_linkage_distance
461461 elif method == 'uclus' :
462- self .linkage = self .uclusDistance
462+ self .linkage = self .uclus_distance
463463 else :
464464 raise ValueError ('distance method must be one of single, '
465465 'complete, average of uclus' )
466466
467- def uclusDistance (self , x , y ):
467+ def uclus_distance (self , x , y ):
468468 """
469469 The method to determine the distance between one cluster an another
470470 item/cluster. The distance equals to the *average* (median) distance
@@ -475,20 +475,24 @@ def uclusDistance(self, x, y):
475475 y - second cluster/item
476476 """
477477 # create a flat list of all the items in <x>
478- if not isinstance (x , Cluster ): x = [x ]
479- else : x = x .fullyflatten ()
478+ if not isinstance (x , Cluster ):
479+ x = [x ]
480+ else :
481+ x = x .fullyflatten ()
480482
481483 # create a flat list of all the items in <y>
482- if not isinstance (y , Cluster ): y = [y ]
483- else : y = y .fullyflatten ()
484+ if not isinstance (y , Cluster ):
485+ y = [y ]
486+ else :
487+ y = y .fullyflatten ()
484488
485489 distances = []
486490 for k in x :
487491 for l in y :
488- distances .append (self .distance (k ,l ))
492+ distances .append (self .distance (k , l ))
489493 return median (distances )
490494
491- def averageLinkageDistance (self , x , y ):
495+ def average_linkage_distance (self , x , y ):
492496 """
493497 The method to determine the distance between one cluster an another
494498 item/cluster. The distance equals to the *average* (mean) distance
@@ -499,20 +503,24 @@ def averageLinkageDistance(self, x, y):
499503 y - second cluster/item
500504 """
501505 # create a flat list of all the items in <x>
502- if not isinstance (x , Cluster ): x = [x ]
503- else : x = x .fullyflatten ()
506+ if not isinstance (x , Cluster ):
507+ x = [x ]
508+ else :
509+ x = x .fullyflatten ()
504510
505511 # create a flat list of all the items in <y>
506- if not isinstance (y , Cluster ): y = [y ]
507- else : y = y .fullyflatten ()
512+ if not isinstance (y , Cluster ):
513+ y = [y ]
514+ else :
515+ y = y .fullyflatten ()
508516
509517 distances = []
510518 for k in x :
511519 for l in y :
512- distances .append (self .distance (k ,l ))
520+ distances .append (self .distance (k , l ))
513521 return mean (distances )
514522
515- def completeLinkageDistance (self , x , y ):
523+ def complete_linkage_distance (self , x , y ):
516524 """
517525 The method to determine the distance between one cluster an another
518526 item/cluster. The distance equals to the *longest* distance from any
@@ -524,22 +532,26 @@ def completeLinkageDistance(self, x, y):
524532 """
525533
526534 # create a flat list of all the items in <x>
527- if not isinstance (x , Cluster ): x = [x ]
528- else : x = x .fullyflatten ()
535+ if not isinstance (x , Cluster ):
536+ x = [x ]
537+ else :
538+ x = x .fullyflatten ()
529539
530540 # create a flat list of all the items in <y>
531- if not isinstance (y , Cluster ): y = [y ]
532- else : y = y .fullyflatten ()
541+ if not isinstance (y , Cluster ):
542+ y = [y ]
543+ else :
544+ y = y .fullyflatten ()
533545
534546 # retrieve the minimum distance (single-linkage)
535547 maxdist = self .distance (x [0 ], y [0 ])
536548 for k in x :
537549 for l in y :
538- maxdist = max (maxdist , self .distance (k ,l ))
550+ maxdist = max (maxdist , self .distance (k , l ))
539551
540552 return maxdist
541553
542- def singleLinkageDistance (self , x , y ):
554+ def single_linkage_distance (self , x , y ):
543555 """
544556 The method to determine the distance between one cluster an another
545557 item/cluster. The distance equals to the *shortest* distance from any
@@ -551,18 +563,22 @@ def singleLinkageDistance(self, x, y):
551563 """
552564
553565 # create a flat list of all the items in <x>
554- if not isinstance (x , Cluster ): x = [x ]
555- else : x = x .fullyflatten ()
566+ if not isinstance (x , Cluster ):
567+ x = [x ]
568+ else :
569+ x = x .fullyflatten ()
556570
557571 # create a flat list of all the items in <y>
558- if not isinstance (y , Cluster ): y = [y ]
559- else : y = y .fullyflatten ()
572+ if not isinstance (y , Cluster ):
573+ y = [y ]
574+ else :
575+ y = y .fullyflatten ()
560576
561577 # retrieve the minimum distance (single-linkage)
562578 mindist = self .distance (x [0 ], y [0 ])
563579 for k in x :
564580 for l in y :
565- mindist = min (mindist , self .distance (k ,l ))
581+ mindist = min (mindist , self .distance (k , l ))
566582
567583 return mindist
568584
@@ -581,9 +597,9 @@ def cluster(self, matrix=None, level=None, sequence=None):
581597
582598 if matrix is None :
583599 # create level 0, first iteration (sequence)
584- level = 0
600+ level = 0
585601 sequence = 0
586- matrix = []
602+ matrix = []
587603
588604 # if the matrix only has two rows left, we are done
589605 while len (matrix ) > 2 or matrix == []:
@@ -625,7 +641,7 @@ def cluster(self, matrix=None, level=None, sequence=None):
625641 self ._data .append (cluster ) # append item 1 and 2 combined
626642
627643 # all the data is in one single cluster. We return that and stop
628- self .__clusterCreated = True
644+ self .__cluster_created = True
629645 return
630646
631647 def getlevel (self , threshold ):
@@ -645,7 +661,7 @@ def getlevel(self, threshold):
645661 return self ._input
646662
647663 # initialize the cluster if not yet done
648- if not self .__clusterCreated :
664+ if not self .__cluster_created :
649665 self .cluster ()
650666
651667 return self ._data [0 ].getlevel (threshold )
@@ -675,6 +691,7 @@ def __init__(self, data, distance=None):
675691 and appiles a generalised form of the
676692 euclidian-distance algorithm on them.
677693 """
694+ self .__clusters = []
678695 self .__data = data
679696 self .distance = distance
680697 self .__initial_length = len (data )
@@ -684,9 +701,9 @@ def __init__(self, data, distance=None):
684701 control_length = len (data [0 ])
685702 for item in data [1 :]:
686703 if len (item ) != control_length :
687- raise ValueError ("Each item in the data list must have the
688- same amount of dimensions . Item ", item, " was out
689- of line !" )
704+ raise ValueError ("Each item in the data list must have "
705+ "the same amount of dimensions. Item %r was out "
706+ " of line!" % item )
690707 # now check if we need and have a distance function
691708 if (len (data ) > 1 and not isinstance (data [0 ], TupleType ) and
692709 distance is None ):
@@ -697,32 +714,32 @@ def __init__(self, data, distance=None):
697714 elif distance is None :
698715 self .distance = minkowski_distance
699716
700- def getclusters (self , n ):
717+ def getclusters (self , count ):
701718 """
702- Generates <n > clusters
719+ Generates <count > clusters
703720
704721 PARAMETERS
705- n - The amount of clusters that should be generated.
706- n must be greater than 1
722+ count - The amount of clusters that should be generated.
723+ count must be greater than 1
707724 """
708725
709726 # only proceed if we got sensible input
710- if n <= 1 :
727+ if count <= 1 :
711728 raise ClusteringError ("When clustering, you need to ask for at "
712- "least two clusters! You asked for %d" % n )
729+ "least two clusters! You asked for %d" % count )
713730
714731 # return the data straight away if there is nothing to cluster
715732 if (self .__data == [] or len (self .__data ) == 1 or
716- n == self .__initial_length ):
733+ count == self .__initial_length ):
717734 return self .__data
718735
719736 # It makes no sense to ask for more clusters than data-items available
720- if n > self .__initial_length :
737+ if count > self .__initial_length :
721738 raise ClusteringError ("Unable to generate more clusters than "
722739 "items available. You supplied %d items, and asked for "
723- "%d clusters." % (self .__initial_length , n ) )
740+ "%d clusters." % (self .__initial_length , count ) )
724741
725- self .initialiseClusters (self .__data , n )
742+ self .initialise_clusters (self .__data , count )
726743
727744 items_moved = True # tells us if any item moved between the clusters,
728745 # as we initialised the clusters, we assume that
@@ -769,22 +786,22 @@ def move_item(self, item, origin, destination):
769786 """
770787 destination .append (origin .pop (origin .index (item )))
771788
772- def initialiseClusters (self , input , clustercount ):
789+ def initialise_clusters (self , input_ , clustercount ):
773790 """
774791 Initialises the clusters by distributing the items from the data
775792 evenly across n clusters
776793
777794 PARAMETERS
778- input - the data set (a list of tuples)
795+ input_ - the data set (a list of tuples)
779796 clustercount - the amount of clusters (n)
780797 """
781798 # initialise the clusters with empty lists
782799 self .__clusters = []
783- for x in xrange (clustercount ):
800+ for _ in xrange (clustercount ):
784801 self .__clusters .append ([])
785802
786803 # distribute the items into the clusters
787804 count = 0
788- for item in input :
805+ for item in input_ :
789806 self .__clusters [count % clustercount ].append (item )
790807 count += 1
0 commit comments