File tree Expand file tree Collapse file tree 3 files changed +47
-0
lines changed Expand file tree Collapse file tree 3 files changed +47
-0
lines changed Original file line number Diff line number Diff line change 3
3
huggingface = LazyImport ("huggingface" , globals (), "modelcache.embedding.huggingface" )
4
4
data2vec = LazyImport ("data2vec" , globals (), "modelcache.embedding.data2vec" )
5
5
llmEmb = LazyImport ("llmEmb" , globals (), "modelcache.embedding.llmEmb" )
6
+ fasttext = LazyImport ("fasttext" , globals (), "gptcache.embedding.fasttext" )
6
7
7
8
8
9
def Huggingface (model = "sentence-transformers/all-mpnet-base-v2" ):
@@ -15,3 +16,7 @@ def Data2VecAudio(model="facebook/data2vec-audio-base-960h"):
15
16
16
17
def LlmEmb2vecAudio ():
17
18
return llmEmb .LlmEmb2Vec ()
19
+
20
+
21
+ def FastText (model = "en" , dim = None ):
22
+ return fasttext .FastText (model , dim )
Original file line number Diff line number Diff line change
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Alipay.com Inc.
4
+ Copyright (c) 2004-2023 All Rights Reserved.
5
+ ------------------------------------------------------
6
+ File Name : fasttext.py
7
+ Author : fuhui.phe
8
+ Create Time : 2023/12/3 15:40
9
+ Description : description what the main function of this file
10
+ Change Activity:
11
+ version0 : 2023/12/3 15:40 by fuhui.phe init
12
+ """
13
+ import numpy as np
14
+ import os
15
+ from modelcache .utils import import_fasttext
16
+ from modelcache .embedding .base import BaseEmbedding
17
+ import_fasttext ()
18
+ import fasttext .util
19
+
20
+
21
+ class FastText (BaseEmbedding ):
22
+ def __init__ (self , model : str = "en" , dim : int = None ):
23
+ self .model_path = os .path .abspath (fasttext .util .download_model (model ))
24
+ self .ft = fasttext .load_model (self .model_path )
25
+
26
+ if dim :
27
+ fasttext .util .reduce_model (self .ft , dim )
28
+ self .__dimension = self .ft .get_dimension ()
29
+
30
+ def to_embeddings (self , data , ** _ ):
31
+ assert isinstance (data , str ), "Only allow string as input."
32
+ emb = self .ft .get_sentence_vector (data )
33
+ return np .array (emb ).astype ("float32" )
34
+
35
+ @property
36
+ def dimension (self ):
37
+ return self .__dimension
38
+
Original file line number Diff line number Diff line change @@ -48,3 +48,7 @@ def import_faiss():
48
48
49
49
def import_torch ():
50
50
_check_library ("torch" )
51
+
52
+
53
+ def import_fasttext ():
54
+ _check_library ("fasttext" )
You can’t perform that action at this time.
0 commit comments