Skip to content
This repository was archived by the owner on May 27, 2025. It is now read-only.

Commit a1ee316

Browse files
committed
initial cleanup of code to support integration and unit test via pytest
1 parent ac726ef commit a1ee316

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1019
-710
lines changed

.devcontainer/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Fmore information about the base image visit:
1+
# For more information about the base image visit:
22
# https://mcr.microsoft.com/en-us/artifact/mar/devcontainers/python/about
33
FROM mcr.microsoft.com/devcontainers/python:3.10-bookworm
44

backend/.coveragerc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[run]
2+
omit =
3+
**/__init__.py
4+
src/api/query*
5+
src/utils/query.py
6+
src/models.py

backend/manage-indexing-jobs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
from src.api.azure_clients import AzureClientManager
2121
from src.api.common import sanitize_name
22+
from src.logger.logger_singleton import LoggerSingleton
2223
from src.models import PipelineJob
23-
from src.reporting.reporter_singleton import ReporterSingleton
2424
from src.typing.pipeline import PipelineJobState
2525

2626

@@ -47,7 +47,7 @@ def schedule_indexing_job(index_name: str):
4747
body=job_manifest, namespace=os.environ["AKS_NAMESPACE"]
4848
)
4949
except Exception:
50-
reporter = ReporterSingleton().get_instance()
50+
reporter = LoggerSingleton().get_instance()
5151
reporter.on_error(
5252
"Index job manager encountered error scheduling indexing job",
5353
)

backend/poetry.lock

Lines changed: 280 additions & 279 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ asyncio_default_fixture_loop_scope="function"
44
asyncio_mode=auto
55
; use well known credentials for Cosmos DB emulator and Azure Storage emulator
66
env =
7-
COSMOS_CONNECTION_STRING=AccountEndpoint=http://localhost:8081/;AccountKey=C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==
7+
COSMOS_CONNECTION_STRING=AccountEndpoint=http://127.0.0.1:8081/;AccountKey=C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==
88
STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;
99
TESTING=1

backend/src/api/azure_clients.py

Lines changed: 43 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@
1111
from azure.identity import DefaultAzureCredential
1212
from azure.storage.blob import BlobServiceClient
1313
from azure.storage.blob.aio import BlobServiceClient as BlobServiceClientAsync
14-
from environs import Env
1514

1615
ENDPOINT_ERROR_MSG = "Could not find connection string in environment variables"
1716

1817

19-
class CosmosClientSingleton:
18+
class _CosmosClientSingleton:
2019
"""
2120
Singleton class for a CosmosClient instance.
2221
@@ -29,19 +28,19 @@ class CosmosClientSingleton:
2928
@classmethod
3029
def get_instance(cls):
3130
if not cls._instance:
32-
endpoint = os.getenv("COSMOS_URI_ENDPOINT")
3331
conn_string = os.getenv("COSMOS_CONNECTION_STRING")
3432
if conn_string:
3533
cls._instance = CosmosClient.from_connection_string(conn_string)
3634
else:
35+
endpoint = os.getenv("COSMOS_URI_ENDPOINT")
3736
credential = DefaultAzureCredential()
3837
cls._instance = CosmosClient(endpoint, credential)
3938
return cls._instance
4039

4140

42-
class BlobServiceClientSingleton:
41+
class _BlobServiceClientSingleton:
4342
"""
44-
Singleton class for BlobServiceClient.
43+
Singleton class for a BlobServiceClient instance.
4544
4645
If a connection string is available, it will be used to create the BlobServiceClient instance.
4746
Otherwise assume managed identity is used.
@@ -61,28 +60,16 @@ def get_instance(cls) -> BlobServiceClient:
6160
cls._instance = BlobServiceClient(account_url, credential=credential)
6261
return cls._instance
6362

64-
@classmethod
65-
def get_storage_account_name(cls) -> str:
66-
conn_string = os.getenv("STORAGE_CONNECTION_STRING")
67-
if conn_string:
68-
# parse account name from the connection string
69-
meta_info = {}
70-
for meta_data in conn_string.split(";"):
71-
if not meta_data:
72-
continue
73-
m = meta_data.split("=", 1)
74-
if len(m) != 2:
75-
continue
76-
meta_info[m[0]] = m[1]
77-
return meta_info["AccountName"]
78-
else:
79-
account_url = os.getenv("STORAGE_ACCOUNT_BLOB_URL")
80-
return account_url.split("//")[1].split(".")[0]
8163

64+
class _BlobServiceClientSingletonAsync:
65+
"""
66+
Singleton class for a BlobServiceClientAsync instance.
67+
68+
If a connection string is available, it will be used to create the BlobServiceClientAsync instance.
69+
Otherwise assume managed identity is used.
70+
"""
8271

83-
class BlobServiceClientSingletonAsync:
8472
_instance = None
85-
_env = Env()
8673

8774
@classmethod
8875
def get_instance(cls) -> BlobServiceClientAsync:
@@ -100,43 +87,15 @@ def get_instance(cls) -> BlobServiceClientAsync:
10087
)
10188
return cls._instance
10289

103-
@classmethod
104-
def get_storage_account_name(cls) -> str:
105-
conn_string = os.getenv("STORAGE_CONNECTION_STRING")
106-
if conn_string:
107-
# parse account name from the connection string
108-
meta_info = {}
109-
for meta_data in conn_string.split(";"):
110-
if not meta_data:
111-
continue
112-
m = meta_data.split("=", 1)
113-
if len(m) != 2:
114-
continue
115-
meta_info[m[0]] = m[1]
116-
return meta_info["AccountName"]
117-
else:
118-
account_url = os.environ["STORAGE_ACCOUNT_BLOB_URL"]
119-
return account_url.split("//")[1].split(".")[0]
120-
121-
122-
def get_database_client(database_name: str) -> DatabaseProxy:
123-
client = CosmosClientSingleton.get_instance()
124-
return client.get_database_client(database_name)
125-
126-
127-
def get_database_container_client(
128-
database_name: str, container_name: str
129-
) -> ContainerProxy:
130-
db_client = get_database_client(database_name)
131-
return db_client.get_container_client(container_name)
132-
13390

13491
class AzureClientManager:
13592
"""
13693
Manages the clients for Azure blob storage and Cosmos DB.
13794
13895
Attributes:
139-
azure_storage_blob_url (str): The blob endpoint for azure storage.
96+
storage_blob_url (str): The blob endpoint for azure storage.
97+
storage_account_name (str): The name of the azure storage account.
98+
storage_account_hostname (str): The hostname of the azure blob storage account.
14099
cosmos_uri_endpoint (str): The uri endpoint for the Cosmos DB.
141100
_blob_service_client (BlobServiceClient): The blob service client.
142101
_blob_service_client_async (BlobServiceClientAsync): The asynchronous blob service client.
@@ -146,37 +105,34 @@ class AzureClientManager:
146105
"""
147106

148107
def __init__(self) -> None:
149-
self.azure_storage_blob_url = os.getenv("STORAGE_ACCOUNT_BLOB_URL")
150-
self.azure_storage_connection_string = os.getenv("STORAGE_CONNECTION_STRING")
108+
self.storage_blob_url = os.getenv("STORAGE_ACCOUNT_BLOB_URL")
109+
self.storage_connection_string = os.getenv("STORAGE_CONNECTION_STRING")
151110
self.cosmos_uri_endpoint = os.getenv("COSMOS_URI_ENDPOINT")
152111
self.cosmos_connection_string = os.getenv("COSMOS_CONNECTION_STRING")
153-
154-
if self.cosmos_connection_string:
155-
self._cosmos_client = CosmosClient.from_connection_string(
156-
self.cosmos_connection_string
157-
)
158-
else:
159-
self._cosmos_client = CosmosClient(
160-
self.cosmos_uri_endpoint, credential=DefaultAzureCredential()
161-
)
162-
if self.azure_storage_connection_string:
163-
self._blob_service_client = BlobServiceClient.from_connection_string(
164-
self.azure_storage_connection_string
165-
)
166-
self._blob_service_client_async = (
167-
BlobServiceClientAsync.from_connection_string(
168-
self.azure_storage_connection_string
169-
)
170-
)
112+
self._cosmos_client = _CosmosClientSingleton.get_instance()
113+
self._blob_service_client = _BlobServiceClientSingleton.get_instance()
114+
self._blob_service_client_async = (
115+
_BlobServiceClientSingletonAsync.get_instance()
116+
)
117+
118+
# parse account name from the azure storage connection string or blob url
119+
if self.storage_connection_string:
120+
meta_info = {}
121+
for meta_data in self.storage_connection_string.split(";"):
122+
if not meta_data:
123+
continue
124+
m = meta_data.split("=", 1)
125+
if len(m) != 2:
126+
continue
127+
meta_info[m[0]] = m[1]
128+
self.storage_account_name = meta_info["AccountName"]
171129
else:
172-
self._blob_service_client = BlobServiceClient(
173-
account_url=self.azure_storage_blob_url,
174-
credential=DefaultAzureCredential(),
175-
)
176-
self._blob_service_client_async = BlobServiceClientAsync(
177-
account_url=self.azure_storage_blob_url,
178-
credential=DefaultAzureCredential(),
179-
)
130+
self.storage_account_name = self.storage_blob_url.split("//")[1].split(".")[
131+
0
132+
]
133+
134+
# parse account hostname from the azure storage connection string or blob url
135+
self.storage_account_hostname = self._blob_service_client.url.split("//")[1]
180136

181137
def get_blob_service_client(self) -> BlobServiceClient:
182138
"""
@@ -215,11 +171,7 @@ def get_cosmos_database_client(self, database_name: str) -> DatabaseProxy:
215171
Returns:
216172
DatabaseProxy: The Cosmos database client.
217173
"""
218-
if not hasattr(self, "_cosmos_database_client"):
219-
self._cosmos_database_client = self._cosmos_client.get_database_client(
220-
database=database_name
221-
)
222-
return self._cosmos_database_client
174+
return self._cosmos_client.get_database_client(database=database_name)
223175

224176
def get_cosmos_container_client(
225177
self, database_name: str, container_name: str
@@ -234,8 +186,6 @@ def get_cosmos_container_client(
234186
Returns:
235187
ContainerProxy: The Cosmos DB container client.
236188
"""
237-
if not hasattr(self, "_cosmos_container_client"):
238-
self._cosmos_container_client = self.get_cosmos_database_client(
239-
database_name=database_name
240-
).get_container_client(container=container_name)
241-
return self._cosmos_container_client
189+
return self._cosmos_client.get_database_client(
190+
database=database_name
191+
).get_container_client(container=container_name)

backend/src/api/common.py

Lines changed: 29 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,37 @@
22
# Licensed under the MIT License.
33

44
import hashlib
5+
import os
56
import re
67

8+
from azure.identity import DefaultAzureCredential
79
from fastapi import HTTPException
810

9-
from src.api.azure_clients import (
10-
AzureClientManager,
11-
BlobServiceClientSingleton,
12-
)
11+
from src.api.azure_clients import AzureClientManager
1312

14-
blob_service_client = BlobServiceClientSingleton.get_instance()
15-
azure_storage_client_manager = AzureClientManager()
13+
14+
def get_pandas_storage_options() -> dict:
15+
"""Generate the storage options required by pandas to read parquet files from Storage."""
16+
# For more information on the options available, see: https://github.com/fsspec/adlfs?tab=readme-ov-file#setting-credentials
17+
azure_client_manager = AzureClientManager()
18+
options = {
19+
"account_name": azure_client_manager.storage_account_name,
20+
"account_host": azure_client_manager.storage_account_hostname,
21+
}
22+
if os.getenv("STORAGE_CONNECTION_STRING"):
23+
options["connection_string"] = os.getenv("STORAGE_CONNECTION_STRING")
24+
else:
25+
options["credential"] = DefaultAzureCredential()
26+
return options
1627

1728

1829
def delete_blob_container(container_name: str):
1930
"""
2031
Delete a blob container. If it does not exist, do nothing.
2132
If exception is raised, the calling function should catch it.
2233
"""
34+
azure_client_manager = AzureClientManager()
35+
blob_service_client = azure_client_manager.get_blob_service_client()
2336
if blob_service_client.get_container_client(container_name).exists():
2437
blob_service_client.delete_container(container_name)
2538

@@ -40,18 +53,18 @@ def validate_index_file_exist(index_name: str, file_name: str):
4053
4154
Raises: ValueError
4255
"""
43-
# verify index_name is a valid index by checking container-store in cosmos db
56+
azure_client_manager = AzureClientManager()
4457
try:
45-
container_store_client = (
46-
azure_storage_client_manager.get_cosmos_container_client(
47-
database_name="graphrag", container_name="container-store"
48-
)
58+
cosmos_container_client = azure_client_manager.get_cosmos_container_client(
59+
database_name="graphrag", container_name="container-store"
4960
)
50-
container_store_client.read_item(index_name, index_name)
61+
cosmos_container_client.read_item(index_name, index_name)
5162
except Exception:
5263
raise ValueError(f"Container {index_name} is not a valid index.")
5364
# check for file existence
54-
index_container_client = blob_service_client.get_container_client(index_name)
65+
index_container_client = (
66+
azure_client_manager.get_blob_service_client().get_container_client(index_name)
67+
)
5568
if not index_container_client.exists():
5669
raise ValueError(f"Container {index_name} not found.")
5770
if not index_container_client.get_blob_client(file_name).exists():
@@ -142,11 +155,10 @@ def retrieve_original_blob_container_name(sanitized_name: str) -> str | None:
142155
Returns: str
143156
The original human-readable name.
144157
"""
158+
azure_client_manager = AzureClientManager()
145159
try:
146-
container_store_client = (
147-
azure_storage_client_manager.get_cosmos_container_client(
148-
database_name="graphrag", container_name="container-store"
149-
)
160+
container_store_client = azure_client_manager.get_cosmos_container_client(
161+
database_name="graphrag", container_name="container-store"
150162
)
151163
for item in container_store_client.read_all_items():
152164
if item["id"] == sanitized_name:
@@ -156,31 +168,3 @@ def retrieve_original_blob_container_name(sanitized_name: str) -> str | None:
156168
status_code=500, detail="Error retrieving original blob name."
157169
)
158170
return None
159-
160-
161-
def retrieve_original_entity_config_name(sanitized_name: str) -> str | None:
162-
"""
163-
Retrieve the original human-readable entity config name of a sanitized entity config name.
164-
165-
Args:
166-
-----
167-
sanitized_name (str)
168-
The sanitized name to be converted back to the original name.
169-
170-
Returns: str
171-
The original human-readable name.
172-
"""
173-
try:
174-
container_store_client = (
175-
azure_storage_client_manager.get_cosmos_container_client(
176-
database_name="graphrag", container_name="entities"
177-
)
178-
)
179-
for item in container_store_client.read_all_items():
180-
if item["id"] == sanitized_name:
181-
return item["human_readable_name"]
182-
except Exception:
183-
raise HTTPException(
184-
status_code=500, detail="Error retrieving original entity config name."
185-
)
186-
return None

0 commit comments

Comments
 (0)