Skip to content

Commit c85e8c2

Browse files
authored
remove unused function (#516)
1 parent 0df605f commit c85e8c2

File tree

1 file changed

+0
-19
lines changed

1 file changed

+0
-19
lines changed

fastembed/late_interaction/token_embeddings.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,6 @@ def embed(
7070
) -> Iterable[NumpyArray]:
7171
yield from super().embed(documents, batch_size=batch_size, parallel=parallel, **kwargs)
7272

73-
def tokenize_docs(self, documents: list[str]) -> list[NumpyArray]:
74-
if self.tokenizer is None:
75-
raise ValueError("Tokenizer not initialized")
76-
encoded = self.tokenizer.encode_batch(documents)
77-
return [np.array(e.ids, dtype=np.int32) for e in encoded]
78-
7973

8074
class TokensEmbeddingWorker(TextEmbeddingWorker[NumpyArray]):
8175
def init_embedding(
@@ -87,16 +81,3 @@ def init_embedding(
8781
threads=1,
8882
**kwargs,
8983
)
90-
91-
92-
if __name__ == "__main__":
93-
# Example usage
94-
print(TokenEmbeddingsModel.list_supported_models())
95-
model = TokenEmbeddingsModel(model_name="jinaai/jina-embeddings-v2-small-en-tokens")
96-
docs = ["Hello, world!", "hello", "hello hello"]
97-
98-
embeddings = model.embed(docs)
99-
for emb in embeddings:
100-
print(emb.shape)
101-
102-
print(model.tokenize_docs(docs))

0 commit comments

Comments
 (0)