Skip to content

Commit a624749

Browse files
authored
Add ruff rules for docstrings (#576)
1 parent f491241 commit a624749

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+434
-374
lines changed

libs/colbert/ragstack_colbert/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""
1+
"""Ragstack Colbert: A ColBERT-based text retrieval system.
2+
23
This package provides a suite of tools for encoding and retrieving text using the
34
ColBERT model, integrated with a Cassandra database for scalable storage and retrieval
45
operations. It includes classes for token embeddings, managing the vector store, and
Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
"""
2-
This module defines abstract base classes for implementing storage mechanisms for text
3-
chunk embeddings, specifically designed to work with ColBERT or similar embedding
1+
"""Base Database module.
2+
3+
This module defines abstract base classes for implementing storage mechanisms for
4+
text chunk embeddings, specifically designed to work with ColBERT or similar embedding
45
models.
56
"""
67

@@ -11,7 +12,8 @@
1112

1213

1314
class BaseDatabase(ABC):
14-
"""
15+
"""Base Database abstract class for ColBERT.
16+
1517
Abstract base class (ABC) for a storage system designed to hold vector
1618
representations of text chunks, typically generated by a ColBERT model or similar
1719
embedding model.
@@ -23,10 +25,9 @@ class BaseDatabase(ABC):
2325

2426
@abstractmethod
2527
def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
26-
"""
27-
Stores a list of embedded text chunks in the vector store
28+
"""Stores a list of embedded text chunks in the vector store.
2829
29-
Parameters:
30+
Args:
3031
chunks (List[Chunk]): A list of `Chunk` instances to be stored.
3132
3233
Returns:
@@ -35,10 +36,9 @@ def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
3536

3637
@abstractmethod
3738
def delete_chunks(self, doc_ids: List[str]) -> bool:
38-
"""
39-
Deletes chunks from the vector store based on their document id.
39+
"""Deletes chunks from the vector store based on their document id.
4040
41-
Parameters:
41+
Args:
4242
doc_ids (List[str]): A list of document identifiers specifying the chunks
4343
to be deleted.
4444
@@ -50,10 +50,9 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:
5050
async def aadd_chunks(
5151
self, chunks: List[Chunk], concurrent_inserts: Optional[int] = 100
5252
) -> List[Tuple[str, int]]:
53-
"""
54-
Stores a list of embedded text chunks in the vector store
53+
"""Stores a list of embedded text chunks in the vector store.
5554
56-
Parameters:
55+
Args:
5756
chunks (List[Chunk]): A list of `Chunk` instances to be stored.
5857
concurrent_inserts (Optional[int]): How many concurrent inserts to make to
5958
the database. Defaults to 100.
@@ -66,10 +65,9 @@ async def aadd_chunks(
6665
async def adelete_chunks(
6766
self, doc_ids: List[str], concurrent_deletes: Optional[int] = 100
6867
) -> bool:
69-
"""
70-
Deletes chunks from the vector store based on their document id.
68+
"""Deletes chunks from the vector store based on their document id.
7169
72-
Parameters:
70+
Args:
7371
doc_ids (List[str]): A list of document identifiers specifying the chunks
7472
to be deleted.
7573
concurrent_deletes (Optional[int]): How many concurrent deletes to make
@@ -81,8 +79,7 @@ async def adelete_chunks(
8179

8280
@abstractmethod
8381
async def search_relevant_chunks(self, vector: Vector, n: int) -> List[Chunk]:
84-
"""
85-
Retrieves 'n' ANN results for an embedded token vector.
82+
"""Retrieves 'n' ANN results for an embedded token vector.
8683
8784
Returns:
8885
A list of Chunks with only `doc_id` and `chunk_id` set.
@@ -91,8 +88,7 @@ async def search_relevant_chunks(self, vector: Vector, n: int) -> List[Chunk]:
9188

9289
@abstractmethod
9390
async def get_chunk_embedding(self, doc_id: str, chunk_id: int) -> Chunk:
94-
"""
95-
Retrieve the embedding data for a chunk.
91+
"""Retrieve the embedding data for a chunk.
9692
9793
Returns:
9894
A chunk with `doc_id`, `chunk_id`, and `embedding` set.
@@ -102,8 +98,7 @@ async def get_chunk_embedding(self, doc_id: str, chunk_id: int) -> Chunk:
10298
async def get_chunk_data(
10399
self, doc_id: str, chunk_id: int, include_embedding: Optional[bool]
104100
) -> Chunk:
105-
"""
106-
Retrieve the text and metadata for a chunk.
101+
"""Retrieve the text and metadata for a chunk.
107102
108103
Returns:
109104
A chunk with `doc_id`, `chunk_id`, `text`, `metadata`, and optionally
@@ -112,6 +107,4 @@ async def get_chunk_data(
112107

113108
@abstractmethod
114109
def close(self) -> None:
115-
"""
116-
Cleans up any open resources.
117-
"""
110+
"""Cleans up any open resources."""

libs/colbert/ragstack_colbert/base_embedding_model.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
"""
2-
This module defines an abstract base class (ABC) for generating token-based embeddings
3-
for text.
1+
"""Base embedding for ColBERT.
2+
3+
This module defines an abstract base class (ABC) for generating token-based
4+
embeddings for text.
45
"""
56

67
from abc import ABC, abstractmethod
@@ -10,8 +11,7 @@
1011

1112

1213
class BaseEmbeddingModel(ABC):
13-
"""
14-
Abstract base class (ABC) for token-based embedding models.
14+
"""Abstract base class (ABC) for token-based embedding models.
1515
1616
This class defines the interface for models that generate embeddings for text
1717
chunks and queries.
@@ -22,11 +22,9 @@ class BaseEmbeddingModel(ABC):
2222

2323
@abstractmethod
2424
def embed_texts(self, texts: List[str]) -> List[Embedding]:
25-
"""
26-
Embeds a list of texts into their corresponding vector embedding
27-
representations.
25+
"""Embeds a list of texts into their vector embedding representations.
2826
29-
Parameters:
27+
Args:
3028
texts (List[str]): A list of string texts.
3129
3230
Returns:
@@ -40,13 +38,12 @@ def embed_query(
4038
full_length_search: Optional[bool] = False,
4139
query_maxlen: int = -1,
4240
) -> Embedding:
43-
"""
44-
Embeds a single query text into its vector representation.
41+
"""Embeds a single query text into its vector representation.
4542
4643
If the query has fewer than query_maxlen tokens it will be padded with BERT
4744
special [mast] tokens.
4845
49-
Parameters:
46+
Args:
5047
query (str): The query text to encode.
5148
full_length_search (Optional[bool]): Indicates whether to encode the
5249
query for a full-length search. Defaults to False.

libs/colbert/ragstack_colbert/base_retriever.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""
1+
"""Base retriever module.
2+
23
This module defines abstract base classes for implementing retrieval mechanisms for
34
text chunk embeddings, specifically designed to work with ColBERT or similar embedding
45
models.
@@ -11,9 +12,10 @@
1112

1213

1314
class BaseRetriever(ABC):
14-
"""
15-
Abstract base class (ABC) for a retrieval system that operates on a ColBERT vector
16-
store, facilitating the search and retrieval of text chunks based on query
15+
"""Base Retriever abstract class for ColBERT.
16+
17+
Abstract base class (ABC) for a retrieval system that operates on a ColBERT
18+
vector store, facilitating the search and retrieval of text chunks based on query
1719
embeddings.
1820
"""
1921

@@ -26,11 +28,12 @@ def embedding_search(
2628
include_embedding: Optional[bool] = False,
2729
**kwargs: Any,
2830
) -> List[Tuple[Chunk, float]]:
29-
"""
31+
"""Search for relevant text chunks based on a query embedding.
32+
3033
Retrieves a list of text chunks relevant to a given query from the vector
3134
store, ranked by relevance or other metrics.
3235
33-
Parameters:
36+
Args:
3437
query_embedding (Embedding): The query embedding to search for relevant
3538
text chunks.
3639
k (Optional[int]): The number of top results to retrieve.
@@ -54,11 +57,12 @@ async def aembedding_search(
5457
include_embedding: Optional[bool] = False,
5558
**kwargs: Any,
5659
) -> List[Tuple[Chunk, float]]:
57-
"""
60+
"""Search for relevant text chunks based on a query embedding.
61+
5862
Retrieves a list of text chunks relevant to a given query from the vector
5963
store, ranked by relevance or other metrics.
6064
61-
Parameters:
65+
Args:
6266
query_embedding (Embedding): The query embedding to search for relevant
6367
text chunks.
6468
k (Optional[int]): The number of top results to retrieve.
@@ -83,11 +87,12 @@ def text_search(
8387
include_embedding: Optional[bool] = False,
8488
**kwargs: Any,
8589
) -> List[Tuple[Chunk, float]]:
86-
"""
90+
"""Search for relevant text chunks based on a query text.
91+
8792
Retrieves a list of text chunks relevant to a given query from the vector
8893
store, ranked by relevance or other metrics.
8994
90-
Parameters:
95+
Args:
9196
query_text (str): The query text to search for relevant text chunks.
9297
k (Optional[int]): The number of top results to retrieve.
9398
query_maxlen (Optional[int]): The maximum length of the query to consider.
@@ -113,11 +118,12 @@ async def atext_search(
113118
include_embedding: Optional[bool] = False,
114119
**kwargs: Any,
115120
) -> List[Tuple[Chunk, float]]:
116-
"""
121+
"""Search for relevant text chunks based on a query text.
122+
117123
Retrieves a list of text chunks relevant to a given query from the vector
118124
store, ranked by relevance or other metrics.
119125
120-
Parameters:
126+
Args:
121127
query_text (str): The query text to search for relevant text chunks.
122128
k (Optional[int]): The number of top results to retrieve.
123129
query_maxlen (Optional[int]): The maximum length of the query to consider.

libs/colbert/ragstack_colbert/base_vector_store.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""
1+
"""Base Vector Store module for ColBERT.
2+
23
This module defines the abstract base class for a standard vector store
34
specifically designed to work with ColBERT or similar dense embedding models,
45
and can be used to create a LangChain or LlamaIndex ColBERT vector store.
@@ -23,7 +24,8 @@
2324

2425

2526
class BaseVectorStore(ABC):
26-
"""
27+
"""Base Vector Store abstract class for ColBERT.
28+
2729
Abstract base class (ABC) for a storage system designed to hold vector
2830
representations of text chunks, typically generated by a ColBERT model or similar
2931
embedding model.
@@ -36,10 +38,9 @@ class BaseVectorStore(ABC):
3638
# handles LlamaIndex add
3739
@abstractmethod
3840
def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
39-
"""
40-
Stores a list of embedded text chunks in the vector store
41+
"""Stores a list of embedded text chunks in the vector store.
4142
42-
Parameters:
43+
Args:
4344
chunks (List[Chunk]): A list of `Chunk` instances to be stored.
4445
4546
Returns:
@@ -54,11 +55,12 @@ def add_texts(
5455
metadatas: Optional[List[Metadata]],
5556
doc_id: Optional[str] = None,
5657
) -> List[Tuple[str, int]]:
57-
"""
58+
"""Adds text chunks to the vector store.
59+
5860
Embeds and stores a list of text chunks and optional metadata into the vector
5961
store.
6062
61-
Parameters:
63+
Args:
6264
texts (List[str]): The list of text chunks to be embedded
6365
metadatas (Optional[List[Metadata]])): An optional list of Metadata to be
6466
stored. If provided, these are set 1 to 1 with the texts list.
@@ -72,10 +74,9 @@ def add_texts(
7274
# handles LangChain and LlamaIndex delete
7375
@abstractmethod
7476
def delete_chunks(self, doc_ids: List[str]) -> bool:
75-
"""
76-
Deletes chunks from the vector store based on their document id.
77+
"""Deletes chunks from the vector store based on their document id.
7778
78-
Parameters:
79+
Args:
7980
doc_ids (List[str]): A list of document identifiers specifying the chunks
8081
to be deleted.
8182
@@ -88,10 +89,9 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:
8889
async def aadd_chunks(
8990
self, chunks: List[Chunk], concurrent_inserts: Optional[int] = 100
9091
) -> List[Tuple[str, int]]:
91-
"""
92-
Stores a list of embedded text chunks in the vector store
92+
"""Stores a list of embedded text chunks in the vector store.
9393
94-
Parameters:
94+
Args:
9595
chunks (List[Chunk]): A list of `Chunk` instances to be stored.
9696
concurrent_inserts (Optional[int]): How many concurrent inserts to make to
9797
the database. Defaults to 100.
@@ -109,11 +109,12 @@ async def aadd_texts(
109109
doc_id: Optional[str] = None,
110110
concurrent_inserts: Optional[int] = 100,
111111
) -> List[Tuple[str, int]]:
112-
"""
112+
"""Adds text chunks to the vector store.
113+
113114
Embeds and stores a list of text chunks and optional metadata into the vector
114115
store.
115116
116-
Parameters:
117+
Args:
117118
texts (List[str]): The list of text chunks to be embedded
118119
metadatas (Optional[List[Metadata]])): An optional list of Metadata to be
119120
stored. If provided, these are set 1 to 1 with the texts list.
@@ -131,10 +132,9 @@ async def aadd_texts(
131132
async def adelete_chunks(
132133
self, doc_ids: List[str], concurrent_deletes: Optional[int] = 100
133134
) -> bool:
134-
"""
135-
Deletes chunks from the vector store based on their document id.
135+
"""Deletes chunks from the vector store based on their document id.
136136
137-
Parameters:
137+
Args:
138138
doc_ids (List[str]): A list of document identifiers specifying the chunks
139139
to be deleted.
140140
concurrent_deletes (Optional[int]): How many concurrent deletes to make to
@@ -147,6 +147,4 @@ async def adelete_chunks(
147147
# handles LangChain as_retriever
148148
@abstractmethod
149149
def as_retriever(self) -> BaseRetriever:
150-
"""
151-
Gets a retriever using the vector store.
152-
"""
150+
"""Gets a retriever using the vector store."""

0 commit comments

Comments
 (0)