|
29 | 29 | """ |
30 | 30 |
|
31 | 31 | # %% |
32 | | -from autointent.configs import EmbedderConfig |
| 32 | +from autointent.configs import get_default_embedder_config |
33 | 33 |
|
34 | 34 | # Using a dictionary for detailed configuration |
35 | 35 | advanced_embedder_config = { |
|
59 | 59 |
|
60 | 60 | from autointent.configs import TokenizerConfig |
61 | 61 |
|
62 | | -embedder_config = EmbedderConfig( |
| 62 | +embedder_config = get_default_embedder_config( |
63 | 63 | model_name="sentence-transformers/all-mpnet-base-v2", |
64 | 64 | batch_size=32, |
65 | 65 | # Device is auto-detected, but you can override if needed |
|
115 | 115 |
|
116 | 116 | # %% |
117 | 117 | # Example: Performance-optimized configuration |
118 | | -perf_config = EmbedderConfig( |
| 118 | +perf_config = get_default_embedder_config( |
119 | 119 | model_name="sentence-transformers/all-MiniLM-L6-v2", # Fast, lightweight model |
120 | 120 | batch_size=128, # Large batch for speed |
121 | 121 | # Device auto-detected by sentence-transformers |
|
133 | 133 |
|
134 | 134 | # %% |
135 | 135 | # Example: Quality-optimized configuration |
136 | | -quality_config = EmbedderConfig( |
| 136 | +quality_config = get_default_embedder_config( |
137 | 137 | model_name="sentence-transformers/all-mpnet-base-v2", # High-quality model |
138 | 138 | batch_size=16, # Smaller batch to handle longer sequences |
139 | 139 | tokenizer_config=TokenizerConfig(max_length=512), # Longer sequences for context |
|
151 | 151 |
|
152 | 152 | # %% |
153 | 153 | # Example: Multilingual setup |
154 | | -multilingual_config = EmbedderConfig( |
| 154 | +multilingual_config = get_default_embedder_config( |
155 | 155 | model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", |
156 | 156 | batch_size=32, |
157 | 157 | tokenizer_config=TokenizerConfig(max_length=256), |
|
0 commit comments