Skip to content
This repository was archived by the owner on Jan 8, 2026. It is now read-only.

Commit de403e9

Browse files
committed
Update docs
1 parent 47a38cc commit de403e9

File tree

10 files changed

+343
-161
lines changed

10 files changed

+343
-161
lines changed

clusx/__main__.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
1-
"""Entry point for direct module execution.
1+
"""
2+
Entry point for direct module execution.
23
34
This module serves as the main entry point when the package is executed directly
4-
using ``python -m clusx``. It initializes the command-line interface and passes
5-
control to the main CLI function.
5+
using ``python -m clusx``.
6+
7+
It initializes the command-line interface and passes control to the main CLI function.
8+
9+
When executed with ``python -m clusx``, this module will initialize the CLI and
10+
handle command-line arguments through the main function in the cli module.
11+
12+
See Also
13+
--------
14+
clusx.cli : Contains the main CLI implementation
615
"""
716

817
import sys
@@ -11,11 +20,15 @@
1120

1221

1322
def init() -> None:
14-
"""Run clusx.cli.main() when current file is executed by an interpreter.
23+
"""
24+
Run clusx.cli.main() when current file is executed by an interpreter.
25+
26+
This function ensures that the CLI main function is only executed when this
27+
file is run directly, not when imported as a module.
1528
16-
If the file is used as a module, the :func:`clusx.cli.main` function will
17-
not automatically execute. The :func:`sys.exit` function is called with a
18-
return value of :func:`clusx.cli.main`, as all good UNIX programs do.
29+
The :func:`sys.exit` function is called with the return value of
30+
:func:`clusx.cli.main`, following standard UNIX program conventions for exit
31+
codes.
1932
"""
2033
if __name__ == "__main__":
2134
sys.exit(main())

clusx/clustering/models.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,19 @@
1010
1111
Classes
1212
-------
13-
DirichletProcess
13+
:class:`DirichletProcess`
1414
Implements clustering using the Dirichlet Process with concentration parameter
1515
alpha and precision parameter kappa.
16-
PitmanYorProcess
16+
:class:`PitmanYorProcess`
1717
Extends DirichletProcess with an additional discount parameter for more flexible
1818
power-law behavior in cluster size distributions.
1919
2020
Notes
2121
-----
22-
Both implementations follow a scikit-learn compatible API with fit(), predict(),
23-
and fit_predict() methods. The Pitman-Yor Process is generally better suited for
24-
text data as it can model the power-law distributions common in natural language.
22+
Both implementations follow a scikit-learn compatible API with ``fit()``,
23+
``predict()``, and ``fit_predict()`` methods. The Pitman-Yor Process is generally
24+
better suited for text data as it can model the power-law distributions common in
25+
natural language.
2526
"""
2627

2728
from __future__ import annotations
@@ -598,7 +599,7 @@ def fit_predict(self, documents, _y=None):
598599
599600
Notes
600601
-----
601-
This method is a convenience function that calls fit() followed by
602+
This method is a convenience function that calls :func:`fit` followed by
602603
returning the cluster labels from the fitting process.
603604
"""
604605
self.fit(documents)

clusx/errors.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,41 @@ class EvaluationError(ClusxError):
1010

1111

1212
class ClusterIntegrityError(ClusxError):
13-
"""Error raised when a cluster assignments file has integrity issues.
13+
"""
14+
Error raised when a cluster assignments file has integrity issues.
1415
1516
This error indicates that the cluster assignments file is corrupted,
1617
was created with errors, or is missing critical information needed
1718
for further processing.
19+
20+
See Also
21+
--------
22+
:class:`MissingClusterColumnError`, :class:`MissingParametersError`
1823
"""
1924

2025

2126
class MissingClusterColumnError(ClusterIntegrityError):
22-
"""Error raised when a cluster assignments file is missing the cluster column.
27+
"""
28+
Error raised when a cluster assignments file is missing the cluster column.
2329
2430
This error indicates that the file does not contain a column that starts with
2531
``Cluster_`` (such as Cluster_PYP or Cluster_DP), which is required for identifying
2632
cluster assignments.
33+
34+
See Also
35+
--------
36+
ClusterIntegrityError : Parent class for integrity errors
37+
MissingParametersError : Related error for missing parameters
2738
"""
2839

2940
def __init__(self, file_path: str):
30-
"""Initialize the error with the path to the problematic file.
41+
"""
42+
Initialize the error with the path to the problematic file.
3143
32-
Args:
33-
file_path: Path to the file missing the cluster column
44+
Parameters
45+
----------
46+
file_path : str
47+
Path to the file missing the cluster column
3448
"""
3549
self.file_path = file_path
3650
message = (
@@ -42,18 +56,35 @@ def __init__(self, file_path: str):
4256

4357

4458
class MissingParametersError(ClusterIntegrityError):
45-
"""Error raised when a cluster assignments file is missing required parameters.
59+
"""
60+
Error raised when a cluster assignments file is missing required parameters.
4661
4762
This error indicates that the file is missing one or more of the required
4863
parameters (alpha, sigma, variance) needed for further processing.
64+
65+
Parameters
66+
----------
67+
file_path : str
68+
Path to the file with missing parameters
69+
missing_params : list[str]
70+
List of parameter names that are missing
71+
72+
See Also
73+
--------
74+
ClusterIntegrityError : Parent class for integrity errors
75+
MissingClusterColumnError : Related error for missing cluster columns
4976
"""
5077

5178
def __init__(self, file_path: str, missing_params: list[str]):
52-
"""Initialize the error with the path to the problematic file and missing parameters.
53-
54-
Args:
55-
file_path: Path to the file with missing parameters
56-
missing_params: List of parameter names that are missing
79+
"""
80+
Initialize the error with the path to the problematic file and missing parameters.
81+
82+
Parameters
83+
----------
84+
file_path : str
85+
Path to the file with missing parameters
86+
missing_params : list[str]
87+
List of parameter names that are missing
5788
""" # noqa: E501
5889
self.file_path = file_path
5990
self.missing_params = missing_params

clusx/logging.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,14 @@ def get_logger(name: str) -> logging.Logger:
5555
"""
5656
Get a logger with the specified name.
5757
58-
This function returns a logger instance configured with the specified name,
59-
which is typically the module name (__name__). Using named loggers allows for
60-
hierarchical logging configuration and makes it easier to identify the source
61-
of log messages.
62-
63-
The returned logger inherits settings from the root logger configured by
64-
setup_logging(), but can be further customized if needed.
65-
66-
Args:
67-
name: The name for the logger (typically __name__)
68-
69-
Returns:
70-
logging.Logger: A configured logger instance ready for use
58+
Parameters
59+
----------
60+
name : str
61+
The name for the logger (typically ``__name__``).
62+
63+
Returns
64+
-------
65+
logging.Logger
66+
A configured logger instance ready for use.
7167
"""
7268
return logging.getLogger(name)

clusx/utils.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,21 @@
1919

2020
def to_numpy(embedding: EmbeddingTensor) -> NDArray[np.float32]:
2121
"""
22-
A helper function to convert a tensor to a numpy array.
22+
Convert a tensor to a numpy array.
2323
24-
If embedding is already a numpy array (or compatible), it is returned as is.
25-
Otherwise, it is converted to a numpy array.
24+
This function uses duck typing to detect PyTorch tensors by checking for
25+
the presence of the `detach()` method.
2626
27-
Args:
28-
embedding: The tensor to convert.
27+
Parameters
28+
----------
29+
embedding : EmbeddingTensor
30+
The tensor to convert. Can be a PyTorch tensor or a numpy array.
2931
30-
Returns: The numpy array.
32+
Returns
33+
-------
34+
numpy.ndarray
35+
The input converted to a numpy array. If the input is already a numpy array
36+
(or compatible), it is returned as is.
3137
"""
3238
# Use duck typing to check if this is a PyTorch tensor
3339
# PyTorch tensors have detach() method, numpy arrays don't

clusx/version.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""Version information.
22
3-
Provides package metadata through a cascading resolution strategy:
3+
This module provides package metadata through a cascading resolution strategy.
44
5-
1. Installed package metadata (via :mod:`importlib.metadata`)
6-
2. :file:`pyproject.toml` (for development environments)
5+
The metadata is resolved in the following order:
6+
1. Installed package metadata (via importlib.metadata)
7+
2. pyproject.toml (for development environments)
78
3. Fallback defaults
89
910
"""

0 commit comments

Comments
 (0)