|
28 | 28 | from graphrag.index.config.embeddings import ( |
29 | 29 | community_full_content_embedding, |
30 | 30 | entity_description_embedding, |
| 31 | + text_unit_text_embedding, |
31 | 32 | ) |
32 | 33 | from graphrag.logger.print_progress import PrintProgressLogger |
33 | 34 | from graphrag.query.factory import ( |
| 35 | + get_basic_search_engine, |
34 | 36 | get_drift_search_engine, |
35 | 37 | get_global_search_engine, |
36 | 38 | get_local_search_engine, |
@@ -423,6 +425,109 @@ async def drift_search( |
423 | 425 | return response, context_data |
424 | 426 |
|
425 | 427 |
|
| 428 | +@validate_call(config={"arbitrary_types_allowed": True}) |
| 429 | +async def basic_search( |
| 430 | + config: GraphRagConfig, |
| 431 | + text_units: pd.DataFrame, |
| 432 | + query: str, |
| 433 | +) -> tuple[ |
| 434 | + str | dict[str, Any] | list[dict[str, Any]], |
| 435 | + str | list[pd.DataFrame] | dict[str, pd.DataFrame], |
| 436 | +]: |
| 437 | + """Perform a basic search and return the context data and response. |
| 438 | +
|
| 439 | + Parameters |
| 440 | + ---------- |
| 441 | + - config (GraphRagConfig): A graphrag configuration (from settings.yaml) |
| 442 | + - text_units (pd.DataFrame): A DataFrame containing the final text units (from create_final_text_units.parquet) |
| 443 | + - response_type (str): The response type to return. |
| 444 | + - query (str): The user query to search for. |
| 445 | +
|
| 446 | + Returns |
| 447 | + ------- |
| 448 | + TODO: Document the search response type and format. |
| 449 | +
|
| 450 | + Raises |
| 451 | + ------ |
| 452 | + TODO: Document any exceptions to expect. |
| 453 | + """ |
| 454 | + vector_store_args = config.embeddings.vector_store |
| 455 | + logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa |
| 456 | + |
| 457 | + description_embedding_store = _get_embedding_store( |
| 458 | + config_args=vector_store_args, # type: ignore |
| 459 | + embedding_name=text_unit_text_embedding, |
| 460 | + ) |
| 461 | + |
| 462 | + prompt = _load_search_prompt(config.root_dir, config.basic_search.prompt) |
| 463 | + |
| 464 | + search_engine = get_basic_search_engine( |
| 465 | + config=config, |
| 466 | + text_units=read_indexer_text_units(text_units), |
| 467 | + text_unit_embeddings=description_embedding_store, |
| 468 | + system_prompt=prompt, |
| 469 | + ) |
| 470 | + |
| 471 | + result: SearchResult = await search_engine.asearch(query=query) |
| 472 | + response = result.response |
| 473 | + context_data = _reformat_context_data(result.context_data) # type: ignore |
| 474 | + return response, context_data |
| 475 | + |
| 476 | + |
| 477 | +@validate_call(config={"arbitrary_types_allowed": True}) |
| 478 | +async def basic_search_streaming( |
| 479 | + config: GraphRagConfig, |
| 480 | + text_units: pd.DataFrame, |
| 481 | + query: str, |
| 482 | +) -> AsyncGenerator: |
| 483 | + """Perform a local search and return the context data and response via a generator. |
| 484 | +
|
| 485 | + Parameters |
| 486 | + ---------- |
| 487 | + - config (GraphRagConfig): A graphrag configuration (from settings.yaml) |
| 488 | + - text_units (pd.DataFrame): A DataFrame containing the final text units (from create_final_text_units.parquet) |
| 489 | + - query (str): The user query to search for. |
| 490 | +
|
| 491 | + Returns |
| 492 | + ------- |
| 493 | + TODO: Document the search response type and format. |
| 494 | +
|
| 495 | + Raises |
| 496 | + ------ |
| 497 | + TODO: Document any exceptions to expect. |
| 498 | + """ |
| 499 | + vector_store_args = config.embeddings.vector_store |
| 500 | + logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa |
| 501 | + |
| 502 | + description_embedding_store = _get_embedding_store( |
| 503 | + config_args=vector_store_args, # type: ignore |
| 504 | + embedding_name=text_unit_text_embedding, |
| 505 | + ) |
| 506 | + |
| 507 | + prompt = _load_search_prompt(config.root_dir, config.basic_search.prompt) |
| 508 | + |
| 509 | + search_engine = get_basic_search_engine( |
| 510 | + config=config, |
| 511 | + text_units=read_indexer_text_units(text_units), |
| 512 | + text_unit_embeddings=description_embedding_store, |
| 513 | + system_prompt=prompt, |
| 514 | + ) |
| 515 | + |
| 516 | + search_result = search_engine.astream_search(query=query) |
| 517 | + |
| 518 | + # when streaming results, a context data object is returned as the first result |
| 519 | + # and the query response in subsequent tokens |
| 520 | + context_data = None |
| 521 | + get_context_data = True |
| 522 | + async for stream_chunk in search_result: |
| 523 | + if get_context_data: |
| 524 | + context_data = _reformat_context_data(stream_chunk) # type: ignore |
| 525 | + yield context_data |
| 526 | + get_context_data = False |
| 527 | + else: |
| 528 | + yield stream_chunk |
| 529 | + |
| 530 | + |
426 | 531 | def _get_embedding_store( |
427 | 532 | config_args: dict, |
428 | 533 | embedding_name: str, |
|
0 commit comments