update code references to new locations in graphrag library

jgbradley1 · jgbradley1 · commit ff5714af1fa5 · 2025-01-02T19:27:05.000-05:00
diff --git a/backend/src/api/query.py b/backend/src/api/query.py
@@ -16,9 +16,9 @@
     APIRouter,
     HTTPException,
 )
+from graphrag.api.query import global_search, local_search
 from graphrag.config import create_graphrag_config
 from graphrag.model.types import TextEmbedder
-from graphrag.query.api import global_search, local_search
 from graphrag.vector_stores.base import (
     BaseVectorStore,
     VectorStoreDocument,
@@ -408,9 +408,9 @@ async def local_query(request: GraphRequest):
     parameters.embeddings.vector_store["index_names"] = sanitized_index_names
     # internally write over the get_embedding_description_store
     # method to use the multi-index collection.
-    import graphrag.query.api
+    import graphrag.api.query
 
-    graphrag.query.api._get_embedding_description_store = (
+    graphrag.api.query._get_embedding_description_store = (
         _get_embedding_description_store
     )
     # perform async search
diff --git a/backend/src/api/query_streaming.py b/backend/src/api/query_streaming.py
@@ -13,11 +13,13 @@
     HTTPException,
 )
 from fastapi.responses import StreamingResponse
-from graphrag.config import create_graphrag_config
-from graphrag.query.api import (
+from graphrag.api.query import (
     global_search_streaming as global_search_streaming_internal,
 )
-from graphrag.query.api import local_search_streaming as local_search_streaming_internal
+from graphrag.api.query import (
+    local_search_streaming as local_search_streaming_internal,
+)
+from graphrag.config import create_graphrag_config
 
 from src.api.azure_clients import AzureClientManager
 from src.api.common import (
diff --git a/backend/src/logger/load_logger.py b/backend/src/logger/load_logger.py
@@ -6,7 +6,7 @@
 from typing import List
 
 from datashaper import WorkflowCallbacks, WorkflowCallbacksManager
-from graphrag.index.reporting import FileWorkflowCallbacks
+from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks
 
 from src.api.azure_clients import AzureClientManager
 from src.logger.application_insights_workflow_callbacks import (
diff --git a/backend/src/logger/typing.py b/backend/src/logger/typing.py
@@ -5,9 +5,9 @@
 from enum import Enum
 from typing import Literal
 
-from graphrag.index.config import (
+from graphrag.index.config.reporting import (
     PipelineReportingConfig,
-    reporting,
+    PipelineReportingConfigTypes,
 )
 from pydantic import Field as pydantic_Field
 
@@ -46,5 +46,5 @@ class PipelineAppInsightsReportingConfig(
 
 # add the new type to the existing PipelineReportingConfigTypes
 PipelineReportingConfigTypes = (
-    reporting.PipelineReportingConfigTypes | PipelineAppInsightsReportingConfig
+    PipelineReportingConfigTypes | PipelineAppInsightsReportingConfig
 )
diff --git a/backend/src/typing/pipeline.py b/backend/src/typing/pipeline.py
@@ -4,8 +4,12 @@
 from enum import Enum
 
 
-class PipelineJobState(Enum):
+class PipelineJobState(str, Enum):
     SCHEDULED = "scheduled"
     RUNNING = "running"
     FAILED = "failed"
     COMPLETE = "complete"
+
+    def __repr__(self):
+        """Get a string representation."""
+        return f'"{self.value}"'
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
@@ -18,12 +18,13 @@
 def blob_with_data_container_name(blob_service_client: BlobServiceClient):
     # create a storage container and upload some data
     container_name = "container-with-data"
-    blob_service_client.create_container(container_name)
-    blob_client = blob_service_client.get_blob_client(container_name, "data.txt")
+    sanitized_name = sanitize_name(container_name)
+    blob_service_client.create_container(sanitized_name)
+    blob_client = blob_service_client.get_blob_client(sanitized_name, "data.txt")
     blob_client.upload_blob(data="Hello, World!", overwrite=True)
     yield container_name
     # cleanup
-    blob_service_client.delete_container(container_name)
+    blob_service_client.delete_container(sanitized_name)
 
 
 @pytest.fixture(scope="session")
diff --git a/backend/tests/integration/test_utils_pipeline.py b/backend/tests/integration/test_utils_pipeline.py
@@ -0,0 +1,71 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""
+Integration tests for the PipelineJob class.
+"""
+
+from typing import Generator
+
+import pytest
+
+from src.typing.pipeline import PipelineJobState
+from src.utils.pipeline import PipelineJob
+
+
+@pytest.fixture()
+def cosmos_index_job_entry(cosmos_client) -> Generator[str, None, None]:
+    """Create an entry for an indexing job in the appropriate CosmosDB database and container
+    that graphrag expects when first scheduling an indexing job."""
+
+    db_client = cosmos_client.get_database_client("graphrag")
+    container_client = db_client.get_container_client("jobs")
+    synthetic_job_entry = {
+        "id": "testID",
+        "epoch_request_time": 0,
+        "human_readable_index_name": "test_human_readable_index_name",
+        "sanitized_index_name": "test_sanitized_index_name",
+        "human_readable_storage_name": "test_human_readable_storage_name",
+        "sanitized_storage_name": "test_sanitized_storage_name",
+        "all_workflows": ["workflow1", "workflow2"],
+        "completed_workflows": ["workflow1"],
+        "failed_workflows": ["workflow2"],
+        "status": PipelineJobState.COMPLETE,
+        "percent_complete": 50.0,
+        "progress": "some progress",
+    }
+    container_client.upsert_item(synthetic_job_entry)
+    yield synthetic_job_entry["id"]
+    # teardown
+    container_client.delete_item(
+        synthetic_job_entry["id"], partition_key=synthetic_job_entry["id"]
+    )
+
+
+def test_pipeline_job_interface(cosmos_index_job_entry):
+    pipeline_job = PipelineJob()
+    # test creating a new entry
+    pipeline_job.create_item(
+        id="synthetic_id",
+        human_readable_index_name="test_human_readable_index_name",
+        human_readable_storage_name="test_human_readable_storage_name",
+        entity_extraction_prompt="fake entity extraction prompt",
+        community_report_prompt="fake community report prompt",
+        summarize_descriptions_prompt="fake summarize descriptions prompt",
+    )
+    assert pipeline_job.item_exist("synthetic_id")
+
+    # test loading an existing entry
+    pipeline_job = pipeline_job.load_item(cosmos_index_job_entry)
+    assert pipeline_job.id == "testID"
+    assert pipeline_job.human_readable_index_name == "test_human_readable_index_name"
+    assert pipeline_job.sanitized_index_name == "test_sanitized_index_name"
+    assert (
+        pipeline_job.human_readable_storage_name == "test_human_readable_storage_name"
+    )
+    assert pipeline_job.sanitized_storage_name == "test_sanitized_storage_name"
+    assert pipeline_job.all_workflows == ["workflow1", "workflow2"]
+    assert pipeline_job.completed_workflows == ["workflow1"]
+    assert pipeline_job.failed_workflows == ["workflow2"]
+    assert pipeline_job.status == PipelineJobState.COMPLETE
+    assert pipeline_job.percent_complete == 50.0
+    assert pipeline_job.progress == "some progress"