55)
66from embedders .classification .contextual import TransformerSentenceEmbedder
77from embedders .extraction .count_based import BagOfCharsTokenEmbedder
8- from embedders .extraction .contextual import (
9- SkipGramTokenEmbedder ,
10- TransformerTokenEmbedder ,
11- )
8+ from embedders .extraction .contextual import TransformerTokenEmbedder
129from embedders .classification .reduce import PCASentenceReducer
1310from embedders .extraction .reduce import PCATokenReducer
1411from embedders import Transformer
1512
1613from submodules .model .business_objects import record
1714
15+
1816def get_embedder (
19- project_id : str , embedding_type : str , config_string : str , language_code : str ,
17+ project_id : str ,
18+ embedding_type : str ,
19+ config_string : str ,
20+ language_code : str ,
2021) -> Transformer :
2122 if embedding_type == "classification" :
2223 batch_size = 128
@@ -26,18 +27,16 @@ def get_embedder(
2627 elif config_string == "bag-of-words" :
2728 embedder = BagOfWordsSentenceEmbedder (batch_size = batch_size )
2829 elif config_string == "tf-idf" :
29- embedder = TfidfSentenceEmbedder (batch_size = batch_size ),
30- elif config_string == "word2vec" :
31- return None
30+ embedder = TfidfSentenceEmbedder (batch_size = batch_size )
3231 else :
3332 embedder = TransformerSentenceEmbedder (
34- config_string = config_string , batch_size = batch_size
35- )
33+ config_string = config_string , batch_size = batch_size
34+ )
3635
3736 if record .count (project_id ) < n_components :
3837 return embedder
3938 else :
40- return PCASentenceReducer (embedder , n_components = n_components )
39+ return PCASentenceReducer (embedder , n_components = n_components )
4140
4241 else : # extraction
4342 batch_size = 32
@@ -52,12 +51,6 @@ def get_embedder(
5251 return None
5352 if config_string == "tf-idf" :
5453 return None
55- if config_string == "word2vec" :
56- return SkipGramTokenEmbedder (
57- language_code = language_code ,
58- precomputed_docs = True ,
59- batch_size = batch_size ,
60- )
6154 else :
6255 return PCATokenReducer (
6356 TransformerTokenEmbedder (
0 commit comments