Skip to content

Commit 2c85401

Browse files
committed
Documentation update.
1 parent 9ef790d commit 2c85401

17 files changed

+679
-175
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
*.pyc
2+
/*.egg-info
23
/MANIFEST
34
/build
45
/dist
6+
/docs/_build
57
/env
68
/env3

cluster/cluster.py

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,13 @@ def __init__(self, level, *args):
3636
"""
3737
Constructor
3838
39-
PARAMETERS
40-
level - The level of this cluster. This is used in hierarchical
41-
clustering to retrieve a specific set of clusters. The
42-
higher the level, the smaller the count of clusters
43-
returned. The level depends on the difference function
44-
used.
45-
*args - every additional argument passed following the level value
46-
will get added as item to the cluster. You could also pass
47-
a list as second parameter to initialise the cluster with
48-
that list as content
39+
:param level: The level of this cluster. This is used in hierarchical
40+
clustering to retrieve a specific set of clusters. The higher the
41+
level, the smaller the count of clusters returned. The level depends
42+
on the difference function used.
43+
:param *args: every additional argument passed following the level value
44+
will get added as item to the cluster. You could also pass a list as
45+
second parameter to initialise the cluster with that list as content
4946
"""
5047
self.__level = level
5148
if len(args) == 0:
@@ -57,18 +54,16 @@ def append(self, item):
5754
"""
5855
Appends a new item to the cluster
5956
60-
PARAMETERS
61-
item - The item that is to be appended
57+
:param item: The item that is to be appended.
6258
"""
6359
self.__items.append(item)
6460

6561
def items(self, new_items=None):
6662
"""
6763
Sets or gets the items of the cluster
6864
69-
PARAMETERS
70-
new_items (optional) - if set, the items of the cluster will be
71-
replaced with that argument.
65+
:param new_items: if set, the items of the cluster will be replaced with
66+
that argument.
7267
"""
7368
if new_items is None:
7469
return self.__items
@@ -82,8 +77,7 @@ def fullyflatten(self, *args):
8277
some items of the cluster are clusters in their own right and you only
8378
want the items.
8479
85-
PARAMETERS
86-
*args - only used for recursion.
80+
:param *args: only used for recursion.
8781
"""
8882
flattened_items = []
8983
if len(args) == 0:
@@ -101,13 +95,13 @@ def fullyflatten(self, *args):
10195

10296
def level(self):
10397
"""
104-
Returns the level associated with this cluster
98+
Returns the level associated with this cluster.
10599
"""
106100
return self.__level
107101

108102
def display(self, depth=0):
109103
"""
110-
Pretty-prints this cluster. Useful for debuging
104+
Pretty-prints this cluster. Useful for debuging.
111105
"""
112106
print(depth * " " + "[level %s]" % self.__level)
113107
for item in self.__items:
@@ -120,20 +114,22 @@ def topology(self):
120114
"""
121115
Returns the structure (topology) of the cluster as tuples.
122116
123-
Output from cl.data:
124-
125-
<[email protected](['34.xls',
126-
127-
<[email protected](['ChangeLog', 'ChangeLog.txt'])>])>,
128-
<[email protected](['20060730.py',
129-
<[email protected](['.cvsignore',
130-
<[email protected](['About.py', <[email protected](['.idlerc',
131-
'.pylint.d'])>])>])>])>])>])>])>]
117+
Output from cl.data::
132118
133-
Corresponding output from cl.topo():
134-
('CVS', ('34.xls', (('0.txt', ('ChangeLog', 'ChangeLog.txt')),
135-
('20060730.py', ('.cvsignore', ('About.py',
136-
('.idlerc', '.pylint.d')))))))
119+
120+
<[email protected](['34.xls',
121+
122+
<[email protected](['ChangeLog', 'ChangeLog.txt'])>])>,
123+
<[email protected](['20060730.py',
124+
<[email protected](['.cvsignore',
125+
<[email protected](['About.py', <[email protected](['.idlerc',
126+
'.pylint.d'])>])>])>])>])>])>])>]
127+
128+
Corresponding output from cl.topo()::
129+
130+
('CVS', ('34.xls', (('0.txt', ('ChangeLog', 'ChangeLog.txt')),
131+
('20060730.py', ('.cvsignore', ('About.py',
132+
('.idlerc', '.pylint.d')))))))
137133
"""
138134

139135
left = self.__items[0]
@@ -159,10 +155,9 @@ def getlevel(self, threshold):
159155
receive and the higher you set it, you will receive less but bigger
160156
clusters.
161157
162-
PARAMETERS
163-
threshold - The level threshold
158+
:param threshold: The level threshold:
164159
165-
NOTE
160+
.. note::
166161
It is debatable whether the value passed into this method should
167162
really be as strongly linked to the real cluster-levels as it is
168163
right now. The end-user will not know the range of this value

cluster/matrix.py

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
#
2+
# This is part of "python-cluster". A library to group similar items together.
3+
# Copyright (C) 2006 Michel Albert
4+
#
5+
# This library is free software; you can redistribute it and/or modify it
6+
# under the terms of the GNU Lesser General Public License as published by the
7+
# Free Software Foundation; either version 2.1 of the License, or (at your
8+
# option) any later version.
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12+
# for more details.
13+
# You should have received a copy of the GNU Lesser General Public License
14+
# along with this library; if not, write to the Free Software Foundation,
15+
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16+
#
17+
118

219
import logging
320
from multiprocessing import Process, Queue, current_process
@@ -7,35 +24,35 @@
724

825

926
class Matrix(object):
10-
"""Object representation of the item-item matrix
27+
"""
28+
Object representation of the item-item matrix.
1129
"""
1230

1331
def __init__(self, data, combinfunc, symmetric=False, diagonal=None):
14-
"""Takes a list of data and generates a 2D-matrix using the supplied
32+
"""
33+
Takes a list of data and generates a 2D-matrix using the supplied
1534
combination function to calculate the values.
1635
17-
PARAMETERS
18-
data - the list of items
19-
combinfunc - the function that is used to calculate teh value in a
20-
cell. It has to cope with two arguments.
21-
symmetric - Whether it will be a symmetric matrix along the diagonal.
22-
For example, if the list contains integers, and the
23-
combination function is abs(x-y), then the matrix will
24-
be symmetric.
25-
Default: False
26-
diagonal - The value to be put into the diagonal. For some
27-
functions, the diagonal will stay constant. An example
28-
could be the function "x-y". Then each diagonal cell
29-
will be "0". If this value is set to None, then the
30-
diagonal will be calculated. Default: None
36+
:param data: the list of items.
37+
:param combinfunc: the function that is used to calculate teh value in a
38+
cell. It has to cope with two arguments.
39+
:param symmetric: Whether it will be a symmetric matrix along the
40+
diagonal. For example, if the list contains integers, and the
41+
combination function is ``abs(x-y)``, then the matrix will be
42+
symmetric.
43+
:param diagonal: The value to be put into the diagonal. For some
44+
functions, the diagonal will stay constant. An example could be the
45+
function ``x-y``. Then each diagonal cell will be ``0``. If this
46+
value is set to None, then the diagonal will be calculated.
3147
"""
3248
self.data = data
3349
self.combinfunc = combinfunc
3450
self.symmetric = symmetric
3551
self.diagonal = diagonal
3652

3753
def worker(self):
38-
"""Multiprocessing task function run by worker processes
54+
"""
55+
Multiprocessing task function run by worker processes
3956
"""
4057
tasks_completed = 0
4158
for task in iter(self.task_queue.get, 'STOP'):
@@ -50,14 +67,13 @@ def worker(self):
5067
tasks_completed)
5168

5269
def genmatrix(self, num_processes=1):
53-
"""Actually generate the matrix
54-
55-
PARAMETERS
56-
num_processes
57-
- If you want to use multiprocessing to split up the work
58-
and run combinfunc() in parallel, specify num_processes
59-
> 1 and this number of workers will be spun up, the work
60-
split up amongst them evenly. Default: 1
70+
"""
71+
Actually generate the matrix
72+
73+
:param num_processes: If you want to use multiprocessing to split up the
74+
work and run ``combinfunc()`` in parallel, specify
75+
``num_processes > 1`` and this number of workers will be spun up,
76+
the work is split up amongst them evenly.
6177
"""
6278
use_multiprocessing = num_processes > 1
6379
if use_multiprocessing:
@@ -136,11 +152,8 @@ def genmatrix(self, num_processes=1):
136152

137153
def __str__(self):
138154
"""
139-
Prints out a 2-dimensional list of data cleanly.
140-
This is useful for debugging.
141-
142-
PARAMETERS
143-
data - the 2D-list to display
155+
Returns a 2-dimensional list of data as text-string which can be
156+
displayed to the user.
144157
"""
145158
# determine maximum length
146159
maxlen = 0

cluster/method/base.py

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,33 +19,27 @@
1919
class BaseClusterMethod(object):
2020
"""
2121
The base class of all clustering methods.
22-
"""
23-
24-
def __init__(self, input, distance_function):
25-
"""
26-
Constructs the object and starts clustering
2722
28-
PARAMETERS
29-
input - a list of objects
30-
distance_function - a function returning the distance - or
31-
opposite of similarity ( distance =
32-
-similarity ) - of two items from the input.
33-
In other words, the closer the two items are
34-
related, the smaller this value needs to be.
35-
With 0 meaning they are exactly the same.
23+
:param input: a list of objects
24+
:distance_function: a function returning the distance - or opposite of
25+
similarity ``(distance = -similarity)`` - of two items from the input.
26+
In other words, the closer the two items are related, the smaller this
27+
value needs to be. With 0 meaning they are exactly the same.
3628
37-
NOTES
38-
The distance function should always return the absolute distance
39-
between two given items of the list. Say,
29+
.. note::
30+
The distance function should always return the absolute distance between
31+
two given items of the list. Say::
4032
4133
distance(input[1], input[4]) = distance(input[4], input[1])
4234
43-
This is very important for the clustering algorithm to work!
44-
Naturally, the data returned by the distance function MUST be a
45-
comparable datatype, so you can perform arithmetic comparisons on
46-
them (< or >)! The simplest examples would be floats or ints. But
47-
as long as they are comparable, it's ok.
48-
"""
35+
This is very important for the clustering algorithm to work! Naturally,
36+
the data returned by the distance function MUST be a comparable
37+
datatype, so you can perform arithmetic comparisons on them (``<`` or
38+
``>``)! The simplest examples would be floats or ints. But as long as
39+
they are comparable, it's ok.
40+
"""
41+
42+
def __init__(self, input, distance_function):
4943
self.distance = distance_function
5044
self._input = input # the original input
5145
self._data = input[:] # clone the input so we can work with it
@@ -55,7 +49,7 @@ def topo(self):
5549
"""
5650
Returns the structure (topology) of the cluster.
5751
58-
See Cluster.topology() for information.
52+
See :py:meth:`~cluster.cluster.Cluster.topology` for more information.
5953
"""
6054
return self.data[0].topology()
6155

0 commit comments

Comments
 (0)