|
204 | 204 | "source": [ |
205 | 205 | "from graphrag.cache.factory import CacheFactory\n", |
206 | 206 | "from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks\n", |
207 | | - "from graphrag.config.get_vector_store_settings import get_vector_store_settings\n", |
208 | 207 | "from graphrag.index.workflows.generate_text_embeddings import generate_text_embeddings\n", |
| 208 | + "from graphrag.language_model.manager import ModelManager\n", |
| 209 | + "from graphrag.tokenizer.get_tokenizer import get_tokenizer\n", |
209 | 210 | "\n", |
210 | 211 | "# We only need to re-run the embeddings workflow, to ensure that embeddings for all required search fields are in place\n", |
211 | 212 | "# We'll construct the context and run this function flow directly to avoid everything else\n", |
212 | 213 | "\n", |
213 | | - "\n", |
214 | | - "vector_store_config = get_vector_store_settings(config)\n", |
215 | 214 | "model_config = config.get_language_model_config(config.embed_text.model_id)\n", |
216 | 215 | "callbacks = NoopWorkflowCallbacks()\n", |
217 | 216 | "cache_config = config.cache.model_dump() # type: ignore\n", |
218 | 217 | "cache = CacheFactory().create_cache(\n", |
219 | 218 | " cache_type=cache_config[\"type\"], # type: ignore\n", |
220 | 219 | " **cache_config,\n", |
221 | 220 | ")\n", |
| 221 | + "model = ModelManager().get_or_create_embedding_model(\n", |
| 222 | + " name=\"text_embedding\",\n", |
| 223 | + " model_type=model_config.type,\n", |
| 224 | + " config=model_config,\n", |
| 225 | + " callbacks=callbacks,\n", |
| 226 | + " cache=cache,\n", |
| 227 | + ")\n", |
| 228 | + "\n", |
| 229 | + "tokenizer = get_tokenizer(model_config)\n", |
222 | 230 | "\n", |
223 | 231 | "await generate_text_embeddings(\n", |
224 | 232 | " documents=None,\n", |
|
227 | 235 | " entities=final_entities,\n", |
228 | 236 | " community_reports=final_community_reports,\n", |
229 | 237 | " callbacks=callbacks,\n", |
230 | | - " cache=cache,\n", |
231 | | - " model_config=model_config,\n", |
| 238 | + " model=model,\n", |
| 239 | + " tokenizer=tokenizer,\n", |
232 | 240 | " batch_size=config.embed_text.batch_size,\n", |
233 | 241 | " batch_max_tokens=config.embed_text.batch_max_tokens,\n", |
234 | | - " vector_store_config=vector_store_config,\n", |
| 242 | + " num_threads=model_config.concurrent_requests,\n", |
| 243 | + " vector_store_config=config.vector_store,\n", |
235 | 244 | " embedded_fields=config.embed_text.names,\n", |
236 | 245 | ")" |
237 | 246 | ] |
|
0 commit comments