Skip to content

Commit 55191d5

Browse files
authored
Refactor/examples for modules (#54)
1 parent 5a1e6b6 commit 55191d5

File tree

12 files changed

+397
-26
lines changed

12 files changed

+397
-26
lines changed

autointent/modules/prediction/_adaptive.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,29 @@ class AdaptivePredictor(PredictionModule):
4545
:ivar _r: Scaling factor for thresholds.
4646
:ivar tags: List of Tag objects for mutually exclusive classes.
4747
:ivar name: Name of the predictor, defaults to "adaptive".
48+
49+
Examples
50+
--------
51+
>>> from autointent.modules import AdaptivePredictor
52+
>>> import numpy as np
53+
>>> scores = np.array([[0.8, 0.1, 0.4], [0.2, 0.9, 0.5]])
54+
>>> labels = [[1, 0, 0], [0, 1, 0]]
55+
>>> search_space = [0.1, 0.2, 0.3, 0.5, 0.7]
56+
>>> predictor = AdaptivePredictor(search_space=search_space)
57+
>>> predictor.fit(scores, labels)
58+
>>> predictions = predictor.predict(scores)
59+
>>> print(predictions)
60+
[[1 0 0]
61+
[0 1 0]]
62+
63+
Save and load the predictor:
64+
>>> predictor.dump("outputs/")
65+
>>> predictor_loaded = AdaptivePredictor()
66+
>>> predictor_loaded.load("outputs/")
67+
>>> predictions = predictor_loaded.predict(scores)
68+
>>> print(predictions)
69+
[[1 0 0]
70+
[0 1 0]]
4871
"""
4972

5073
metadata_dict_name = "metadata.json"

autointent/modules/prediction/_argmax.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,35 @@ class ArgmaxPredictorDumpMetadata(BaseMetadataDict):
2323

2424

2525
class ArgmaxPredictor(PredictionModule):
26-
"""Argmax prediction module."""
26+
"""
27+
Argmax prediction module.
28+
29+
The ArgmaxPredictor is a simple predictor that selects the class with the highest
30+
score (argmax) for single-label classification tasks.
31+
32+
:ivar n_classes: Number of classes in the dataset.
33+
34+
Examples
35+
--------
36+
>>> from autointent.modules import ArgmaxPredictor
37+
>>> import numpy as np
38+
>>> predictor = ArgmaxPredictor()
39+
>>> train_scores = np.array([[0.2, 0.8, 0.0], [0.7, 0.1, 0.2]])
40+
>>> labels = [1, 0] # Single-label targets
41+
>>> predictor.fit(train_scores, labels)
42+
>>> test_scores = np.array([[0.1, 0.5, 0.4], [0.6, 0.3, 0.1]])
43+
>>> predictions = predictor.predict(test_scores)
44+
>>> print(predictions)
45+
[1 0]
46+
47+
Save the predictor's state:
48+
>>> predictor.dump("outputs/")
49+
>>> loaded_predictor = ArgmaxPredictor()
50+
>>> loaded_predictor.load("outputs/")
51+
>>> loaded_predictions = loaded_predictor.predict(test_scores)
52+
>>> print(loaded_predictions)
53+
[1 0]
54+
"""
2755

2856
name = "argmax"
2957
n_classes: int

autointent/modules/prediction/_jinoos.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,37 @@ class JinoosPredictorDumpMetadata(BaseMetadataDict):
2626

2727

2828
class JinoosPredictor(PredictionModule):
29-
"""Jinoos predictor module."""
29+
"""
30+
Jinoos predictor module.
31+
32+
JinoosPredictor predicts the best scores for single-label classification tasks
33+
and detects out-of-scope (OOS) samples based on a threshold.
34+
35+
:ivar thresh: The optimized threshold value for OOS detection.
36+
:ivar name: Name of the predictor, defaults to "adaptive".
37+
:ivar n_classes: Number of classes determined during fitting.
38+
39+
Examples
40+
--------
41+
>>> from autointent.modules import JinoosPredictor
42+
>>> import numpy as np
43+
>>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
44+
>>> labels = [1, 0, 1]
45+
>>> search_space = [0.3, 0.5, 0.7]
46+
>>> predictor = JinoosPredictor(search_space=search_space)
47+
>>> predictor.fit(scores, labels)
48+
>>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
49+
>>> predictions = predictor.predict(test_scores)
50+
>>> print(predictions)
51+
[1 0]
52+
53+
Save and load the predictor state:
54+
>>> predictor.dump("outputs/")
55+
>>> loaded_predictor = JinoosPredictor()
56+
>>> loaded_predictor.load("outputs/")
57+
>>> print(loaded_predictor.thresh)
58+
0.5 # Example threshold from the search space
59+
"""
3060

3161
thresh: float
3262
name = "jinoos"

autointent/modules/prediction/_threshold.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,49 @@ class ThresholdPredictorDumpMetadata(BaseMetadataDict):
2929

3030

3131
class ThresholdPredictor(PredictionModule):
32-
"""Threshold predictor module."""
32+
"""
33+
Threshold predictor module.
34+
35+
ThresholdPredictor uses a predefined threshold (or array of thresholds) to predict
36+
labels for single-label or multi-label classification tasks.
37+
38+
:ivar metadata_dict_name: Filename for saving metadata to disk.
39+
:ivar multilabel: If True, the model supports multi-label classification.
40+
:ivar n_classes: Number of classes in the dataset.
41+
:ivar tags: Tags for predictions (if any).
42+
:ivar name: Name of the predictor, defaults to "adaptive".
43+
44+
Examples
45+
--------
46+
Single-label classification example:
47+
>>> from autointent.modules import ThresholdPredictor
48+
>>> import numpy as np
49+
>>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
50+
>>> labels = [1, 0, 1]
51+
>>> threshold = 0.5
52+
>>> predictor = ThresholdPredictor(thresh=threshold)
53+
>>> predictor.fit(scores, labels)
54+
>>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
55+
>>> predictions = predictor.predict(test_scores)
56+
>>> print(predictions)
57+
[1 0]
58+
59+
Multi-label classification example:
60+
>>> labels = [[1, 0], [0, 1], [1, 1]]
61+
>>> predictor = ThresholdPredictor(thresh=[0.5, 0.5])
62+
>>> predictor.fit(scores, labels)
63+
>>> test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
64+
>>> predictions = predictor.predict(test_scores)
65+
>>> print(predictions)
66+
[[0 1] [1 0]]
67+
68+
Save and load the model:
69+
>>> predictor.dump("outputs/")
70+
>>> loaded_predictor = ThresholdPredictor(thresh=0.5)
71+
>>> loaded_predictor.load("outputs/")
72+
>>> print(loaded_predictor.thresh)
73+
0.5
74+
"""
3375

3476
metadata: ThresholdPredictorDumpMetadata
3577
multilabel: bool
@@ -45,9 +87,6 @@ def __init__(
4587
Initialize threshold predictor.
4688
4789
:param thresh: Threshold for the scores, shape (n_classes,) or float
48-
:param multilabel: If multilabel classification, default False
49-
:param n_classes: Number of classes, default None
50-
:param tags: Tags for predictions, default None
5190
"""
5291
self.thresh = thresh
5392

autointent/modules/prediction/_tunable.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,48 @@ class TunablePredictorDumpMetadata(BaseMetadataDict):
3030

3131

3232
class TunablePredictor(PredictionModule):
33-
"""Tunable predictor module."""
33+
"""
34+
Tunable predictor module.
35+
36+
TunablePredictor uses an optimization process to find the best thresholds for predicting labels
37+
in single-label or multi-label classification tasks. It is designed for datasets with varying
38+
score distributions and supports out-of-scope (OOS) detection.
39+
40+
:ivar name: Name of the predictor, defaults to "tunable".
41+
:ivar multilabel: Whether the task is multi-label classification.
42+
:ivar n_classes: Number of classes determined during fitting.
43+
:ivar tags: Tags for predictions, if any.
44+
45+
Examples
46+
--------
47+
Single-label classification:
48+
>>> import numpy as np
49+
>>> from autointent.modules import TunablePredictor
50+
>>> scores = np.array([[0.2, 0.8], [0.6, 0.4], [0.1, 0.9]])
51+
>>> labels = [1, 0, 1]
52+
>>> predictor = TunablePredictor(n_trials=100, seed=42)
53+
>>> predictor.fit(scores, labels)
54+
>>> test_scores = np.array([[0.3, 0.7], [0.5, 0.5]])
55+
>>> predictions = predictor.predict(test_scores)
56+
>>> print(predictions)
57+
[1 0]
58+
59+
Multi-label classification:
60+
>>> labels = [[1, 0], [0, 1], [1, 1]]
61+
>>> predictor = TunablePredictor(n_trials=100, seed=42)
62+
>>> predictor.fit(scores, labels)
63+
>>> test_scores = np.array([[0.3, 0.7], [0.6, 0.4]])
64+
>>> predictions = predictor.predict(test_scores)
65+
>>> print(predictions)
66+
[[0 1] [1 0]]
67+
68+
Saving and loading the model:
69+
>>> predictor.dump("outputs/")
70+
>>> loaded_predictor = TunablePredictor()
71+
>>> loaded_predictor.load("outputs/")
72+
>>> print(loaded_predictor.thresh)
73+
[0.5, 0.7]
74+
"""
3475

3576
name = "tunable"
3677
multilabel: bool

autointent/modules/retrieval/_vectordb.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,39 @@ class VectorDBMetadata(BaseMetadataDict):
2424

2525

2626
class VectorDBModule(RetrievalModule):
27-
"""
27+
r"""
2828
Module for managing retrieval operations using a vector database.
2929
30-
This class provides methods for indexing, querying, and managing a vector database for tasks
30+
VectorDBModule provides methods for indexing, querying, and managing a vector database for tasks
3131
such as nearest neighbor retrieval.
32+
33+
:ivar vector_index: The vector index used for nearest neighbor retrieval.
34+
:ivar name: Name of the module, defaults to "vector_db".
35+
36+
Examples
37+
--------
38+
Creating and fitting the VectorDBModule:
39+
>>> from your_module import VectorDBModule
40+
>>> utterances = ["hello world", "how are you?", "good morning"]
41+
>>> labels = [1, 2, 3]
42+
>>> vector_db = VectorDBModule(k=2, embedder_name="some_embedder", db_dir="./db", device="cpu")
43+
>>> vector_db.fit(utterances, labels)
44+
>>> def retrieval_metric_fn(true_labels, predicted_labels):
45+
>>> # Custom metric function (e.g., accuracy or F1 score)
46+
>>> return sum([1 if true == pred else 0 for true, pred \\
47+
>>> in zip(true_labels, predicted_labels)]) / len(true_labels)
48+
>>> score = vector_db.score(context, retrieval_metric_fn)
49+
>>> print(score)
50+
51+
Performing predictions:
52+
>>> predictions = vector_db.predict(["how is the weather today?"])
53+
>>> print(predictions)
54+
55+
Saving and loading the model:
56+
>>> vector_db.dump("outputs/")
57+
>>> loaded_vector_db = VectorDBModule(k=2, embedder_name="some_embedder", db_dir="./db", device="cpu")
58+
>>> loaded_vector_db.load("outputs/")
59+
>>> print(loaded_vector_db.vector_index)
3260
"""
3361

3462
vector_index: VectorIndex

autointent/modules/scoring/_description/description.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,39 @@ class DescriptionScorerDumpMetadata(TypedDict):
2727

2828

2929
class DescriptionScorer(ScoringModule):
30-
"""Scoring module that scores utterances based on similarity to intent descriptions."""
30+
r"""
31+
Scoring module that scores utterances based on similarity to intent descriptions.
32+
33+
DescriptionScorer embeds both the utterances and the intent descriptions, then computes a similarity score
34+
between the two, using either cosine similarity and softmax.
35+
36+
:ivar weights_file_name: Filename for saving the description vectors (`description_vectors.npy`).
37+
:ivar embedder: The embedder used to generate embeddings for utterances and descriptions.
38+
:ivar precomputed_embeddings: Flag indicating whether precomputed embeddings are used.
39+
:ivar embedding_model_subdir: Directory for storing the embedder's model files.
40+
:ivar _vector_index: Internal vector index used when embeddings are precomputed.
41+
:ivar db_dir: Directory path where the vector database is stored.
42+
:ivar name: Name of the scorer, defaults to "description".
43+
44+
Examples
45+
--------
46+
Creating and fitting the DescriptionScorer
47+
>>> from autointent.modules import DescriptionScorer
48+
>>> utterances = ["what is your name?", "how old are you?"]
49+
>>> labels = [0, 1]
50+
>>> descriptions = ["greeting", "age-related question"]
51+
>>> scorer = DescriptionScorer(embedder_name="your_embedder", temperature=1.0)
52+
>>> scorer.fit(utterances, labels, descriptions)
53+
54+
Predicting scores:
55+
>>> scores = scorer.predict(["tell me about your age?"])
56+
>>> print(scores) # Outputs similarity scores for the utterance against all descriptions
57+
58+
Saving and loading the scorer:
59+
>>> scorer.dump("outputs/")
60+
>>> loaded_scorer = DescriptionScorer(embedder_name="your_embedder")
61+
>>> loaded_scorer.load("outputs/")
62+
"""
3163

3264
weights_file_name: str = "description_vectors.npy"
3365
embedder: Embedder

autointent/modules/scoring/_dnnc/dnnc.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class DNNCScorerDumpMetadata(BaseMetadataDict):
3131

3232

3333
class DNNCScorer(ScoringModule):
34-
"""
34+
r"""
3535
Scoring module for intent classification using a discriminative nearest neighbor classification (DNNC).
3636
3737
This module uses a CrossEncoder for scoring candidate intents and can optionally
@@ -50,6 +50,45 @@ class DNNCScorer(ScoringModule):
5050
url={https://arxiv.org/abs/2010.13009},
5151
}
5252
53+
:ivar crossencoder_subdir: Subdirectory for storing the cross-encoder model (`crossencoder`).
54+
:ivar model: The model used for scoring, which could be a `CrossEncoder` or a `CrossEncoderWithLogreg`.
55+
:ivar prebuilt_index: Flag indicating whether a prebuilt vector index is used.
56+
:ivar _db_dir: Path to the database directory where the vector index is stored.
57+
:ivar name: Name of the scorer, defaults to "dnnc".
58+
59+
Examples
60+
--------
61+
Creating and fitting the DNNCScorer:
62+
>>> from autointent.modules import DNNCScorer
63+
>>> utterances = ["what is your name?", "how are you?"]
64+
>>> labels = ["greeting", "greeting"]
65+
>>> scorer = DNNCScorer(
66+
>>> cross_encoder_name="cross_encoder_model",
67+
>>> embedder_name="embedder_model",
68+
>>> k=5,
69+
>>> db_dir="/path/to/database",
70+
>>> device="cuda",
71+
>>> train_head=True,
72+
>>> batch_size=32,
73+
>>> max_length=128
74+
>>> )
75+
>>> scorer.fit(utterances, labels)
76+
77+
Predicting scores:
78+
>>> test_utterances = ["Hello!", "What's up?"]
79+
>>> scores = scorer.predict(test_utterances)
80+
>>> print(scores) # Outputs similarity scores for the utterances
81+
82+
Saving and loading the scorer:
83+
>>> scorer.dump("outputs/")
84+
>>> loaded_scorer = DNNCScorer(
85+
>>> cross_encoder_name="cross_encoder_model",
86+
>>> embedder_name="embedder_model",
87+
>>> k=5,
88+
>>> db_dir="/path/to/database",
89+
>>> device="cuda"
90+
>>> )
91+
>>> loaded_scorer.load("outputs/")
5392
"""
5493

5594
name = "dnnc"

autointent/modules/scoring/_dnnc/head_training.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,36 @@ def construct_samples(
5959

6060

6161
class CrossEncoderWithLogreg:
62-
"""
62+
r"""
6363
Cross-encoder with logistic regression for binary classification.
6464
6565
This class uses a SentenceTransformers CrossEncoder model to extract features
6666
and LogisticRegressionCV for classification.
67+
68+
:ivar cross_encoder: The CrossEncoder model used to extract features.
69+
:ivar batch_size: Batch size for processing text pairs.
70+
:ivar _clf: The trained LogisticRegressionCV classifier.
71+
:ivar model_subdir: Directory for storing the cross-encoder model files.
72+
73+
Examples
74+
--------
75+
Creating and fitting the CrossEncoderWithLogreg:
76+
>>> from autointent.modules import CrossEncoderWithLogreg
77+
>>> from sentence_transformers import CrossEncoder
78+
>>> model = CrossEncoder("cross-encoder-model")
79+
>>> scorer = CrossEncoderWithLogreg(model)
80+
>>> utterances = ["What is your name?", "How old are you?"]
81+
>>> labels = [1, 0]
82+
>>> scorer.fit(utterances, labels)
83+
84+
Predicting probabilities:
85+
>>> test_pairs = [["What is your name?", "Hello!"], ["How old are you?", "What is your age?"]]
86+
>>> probs = scorer.predict(test_pairs)
87+
>>> print(probs)
88+
89+
Saving and loading the model:
90+
>>> scorer.save("outputs/")
91+
>>> loaded_scorer = CrossEncoderWithLogreg.load("outputs/")
6792
"""
6893

6994
def __init__(self, model: CrossEncoder, batch_size: int = 326) -> None:

0 commit comments

Comments
 (0)