Skip to content

Commit 7a974bf

Browse files
committed
move vector_store from embeddings to top level of config and delete resolve_paths
1 parent 59bc596 commit 7a974bf

File tree

13 files changed

+135
-302
lines changed

13 files changed

+135
-302
lines changed

graphrag/api/query.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ async def local_search(
244244
------
245245
TODO: Document any exceptions to expect.
246246
"""
247-
vector_store_args = config.embeddings.vector_store
247+
vector_store_args = config.vector_store
248248
logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa
249249

250250
description_embedding_store = _get_embedding_store(
@@ -310,7 +310,7 @@ async def local_search_streaming(
310310
------
311311
TODO: Document any exceptions to expect.
312312
"""
313-
vector_store_args = config.embeddings.vector_store
313+
vector_store_args = config.vector_store
314314
logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa
315315

316316
description_embedding_store = _get_embedding_store(
@@ -381,7 +381,7 @@ async def drift_search_streaming(
381381
------
382382
TODO: Document any exceptions to expect.
383383
"""
384-
vector_store_args = config.embeddings.vector_store
384+
vector_store_args = config.vector_store
385385
logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa
386386

387387
description_embedding_store = _get_embedding_store(
@@ -465,7 +465,7 @@ async def drift_search(
465465
------
466466
TODO: Document any exceptions to expect.
467467
"""
468-
vector_store_args = config.embeddings.vector_store
468+
vector_store_args = config.vector_store
469469
logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa
470470

471471
description_embedding_store = _get_embedding_store(
@@ -531,7 +531,7 @@ async def basic_search(
531531
------
532532
TODO: Document any exceptions to expect.
533533
"""
534-
vector_store_args = config.embeddings.vector_store
534+
vector_store_args = config.vector_store
535535
logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa
536536

537537
description_embedding_store = _get_embedding_store(
@@ -576,7 +576,7 @@ async def basic_search_streaming(
576576
------
577577
TODO: Document any exceptions to expect.
578578
"""
579-
vector_store_args = config.embeddings.vector_store
579+
vector_store_args = config.vector_store
580580
logger.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore # noqa
581581

582582
description_embedding_store = _get_embedding_store(

graphrag/cli/index.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from graphrag.config.enums import CacheType
1515
from graphrag.config.load_config import load_config
1616
from graphrag.config.logging import enable_logging_with_config
17-
from graphrag.config.resolve_path import resolve_paths
1817
from graphrag.index.validate_config import validate_config_names
1918
from graphrag.logger.base import ProgressLogger
2019
from graphrag.logger.factory import LoggerFactory, LoggerType
@@ -146,7 +145,6 @@ def _run_index(
146145
config.reporting.base_dir = (
147146
str(output_dir) if output_dir else config.reporting.base_dir
148147
)
149-
resolve_paths(config, run_id)
150148

151149
if not cache:
152150
config.cache.type = CacheType.none

graphrag/cli/query.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import graphrag.api as api
1313
from graphrag.config.load_config import load_config
1414
from graphrag.config.models.graph_rag_config import GraphRagConfig
15-
from graphrag.config.resolve_path import resolve_paths
1615
from graphrag.logger.print_progress import PrintProgressLogger
1716
from graphrag.storage.factory import StorageFactory
1817
from graphrag.utils.storage import load_table_from_storage, storage_has_table
@@ -37,7 +36,6 @@ def run_global_search(
3736
root = root_dir.resolve()
3837
config = load_config(root, config_filepath)
3938
config.storage.base_dir = str(data_dir) if data_dir else config.storage.base_dir
40-
resolve_paths(config)
4139

4240
dataframe_dict = _resolve_output_files(
4341
config=config,
@@ -121,7 +119,6 @@ def run_local_search(
121119
root = root_dir.resolve()
122120
config = load_config(root, config_filepath)
123121
config.storage.base_dir = str(data_dir) if data_dir else config.storage.base_dir
124-
resolve_paths(config)
125122

126123
dataframe_dict = _resolve_output_files(
127124
config=config,
@@ -212,7 +209,6 @@ def run_drift_search(
212209
root = root_dir.resolve()
213210
config = load_config(root, config_filepath)
214211
config.storage.base_dir = str(data_dir) if data_dir else config.storage.base_dir
215-
resolve_paths(config)
216212

217213
dataframe_dict = _resolve_output_files(
218214
config=config,
@@ -297,7 +293,6 @@ def run_basic_search(
297293
root = root_dir.resolve()
298294
config = load_config(root, config_filepath)
299295
config.storage.base_dir = str(data_dir) if data_dir else config.storage.base_dir
300-
resolve_paths(config)
301296

302297
dataframe_dict = _resolve_output_files(
303298
config=config,

graphrag/config/defaults.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,11 @@
101101
UMAP_ENABLED = False
102102
UPDATE_STORAGE_BASE_DIR = "update_output"
103103

104-
VECTOR_STORE = f"""
105-
type: {VectorStoreType.LanceDB.value} # one of [lancedb, azure_ai_search, cosmosdb]
106-
db_uri: '{(Path(STORAGE_BASE_DIR) / "lancedb")!s}'
107-
collection_name: default
108-
overwrite: true\
109-
"""
110-
111-
VECTOR_STORE_DICT = {
112-
"type": VectorStoreType.LanceDB.value,
113-
"db_uri": str(Path(STORAGE_BASE_DIR) / "lancedb"),
114-
"collection_name": "default",
115-
"overwrite": True,
116-
}
104+
105+
VECTOR_STORE_TYPE = VectorStoreType.LanceDB
106+
VECTOR_STORE_DB_URI = str(Path(STORAGE_BASE_DIR) / "lancedb")
107+
VECTOR_STORE_COLLECTION_NAME = "default"
108+
VECTOR_STORE_OVERWRITE = True
117109

118110
# Local Search
119111
LOCAL_SEARCH_TEXT_UNIT_PROP = 0.5

graphrag/config/embeddings.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def get_embedding_settings(
5757
embeddings_llm_settings = settings.get_language_model_config(
5858
settings.embeddings.model_id
5959
)
60-
vector_store_settings = settings.embeddings.vector_store
60+
vector_store_settings = settings.vector_store
6161
if vector_store_settings is None:
6262
return {
6363
"strategy": settings.embeddings.resolved_strategy(embeddings_llm_settings)
@@ -71,7 +71,10 @@ def get_embedding_settings(
7171
embeddings_llm_settings
7272
) # get the default strategy
7373
strategy.update({
74-
"vector_store": {**(vector_store_params or {}), **vector_store_settings}
74+
"vector_store": {
75+
**(vector_store_params or {}),
76+
**(vector_store_settings.model_dump()),
77+
}
7578
}) # update the default strategy with the vector store settings
7679
# This ensures the vector store config is part of the strategy and not the global config
7780
return {

graphrag/config/init_content.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,13 @@
3939
# organization: <organization_id>
4040
# deployment_name: <azure_model_deployment_name>
4141
42+
vector_store:
43+
type: {defs.VECTOR_STORE_TYPE.value}
44+
db_uri: {defs.VECTOR_STORE_DB_URI}
45+
collection_name: {defs.VECTOR_STORE_COLLECTION_NAME}
46+
overwrite: {defs.VECTOR_STORE_OVERWRITE}
47+
4248
embeddings:
43-
async_mode: {defs.ASYNC_MODE.value} # or asyncio
44-
vector_store: {defs.VECTOR_STORE}
4549
model_id: {defs.DEFAULT_EMBEDDING_MODEL_ID}
4650
4751
### Input settings ###
@@ -83,28 +87,28 @@
8387
### Workflow settings ###
8488
8589
entity_extraction:
90+
model_id: {defs.ENTITY_EXTRACTION_MODEL_ID}
8691
prompt: "prompts/entity_extraction.txt"
8792
entity_types: [{",".join(defs.ENTITY_EXTRACTION_ENTITY_TYPES)}]
8893
max_gleanings: {defs.ENTITY_EXTRACTION_MAX_GLEANINGS}
89-
model_id: {defs.ENTITY_EXTRACTION_MODEL_ID}
9094
9195
summarize_descriptions:
96+
model_id: {defs.SUMMARIZE_MODEL_ID}
9297
prompt: "prompts/summarize_descriptions.txt"
9398
max_length: {defs.SUMMARIZE_DESCRIPTIONS_MAX_LENGTH}
94-
model_id: {defs.SUMMARIZE_MODEL_ID}
9599
96100
claim_extraction:
97101
enabled: false
102+
model_id: {defs.CLAIM_EXTRACTION_MODEL_ID}
98103
prompt: "prompts/claim_extraction.txt"
99104
description: "{defs.CLAIM_DESCRIPTION}"
100105
max_gleanings: {defs.CLAIM_MAX_GLEANINGS}
101-
model_id: {defs.CLAIM_EXTRACTION_MODEL_ID}
102106
103107
community_reports:
108+
model_id: {defs.COMMUNITY_REPORT_MODEL_ID}
104109
prompt: "prompts/community_report.txt"
105110
max_length: {defs.COMMUNITY_REPORT_MAX_LENGTH}
106111
max_input_length: {defs.COMMUNITY_REPORT_MAX_INPUT_LENGTH}
107-
model_id: {defs.COMMUNITY_REPORT_MODEL_ID}
108112
109113
cluster_graph:
110114
max_cluster_size: {defs.MAX_CLUSTER_SIZE}

graphrag/config/models/graph_rag_config.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
)
3232
from graphrag.config.models.text_embedding_config import TextEmbeddingConfig
3333
from graphrag.config.models.umap_config import UmapConfig
34+
from graphrag.config.models.vector_store_config import VectorStoreConfig
35+
from graphrag.vector_stores.factory import VectorStoreType
3436

3537

3638
class GraphRagConfig(BaseModel):
@@ -51,11 +53,13 @@ def __str__(self):
5153
def _validate_root_dir(self) -> None:
5254
"""Validate the root directory."""
5355
if self.root_dir.strip() == "":
54-
self.root_dir = str(Path.cwd().resolve())
56+
self.root_dir = str(Path.cwd())
5557

56-
if not Path(self.root_dir).is_dir():
58+
root_dir = Path(self.root_dir).resolve()
59+
if not root_dir.is_dir():
5760
msg = f"Invalid root directory: {self.root_dir} is not a directory."
5861
raise FileNotFoundError(msg)
62+
self.root_dir = str(root_dir)
5963

6064
models: dict[str, LanguageModelConfig] = Field(
6165
description="Available language model configurations.",
@@ -85,17 +89,50 @@ def _validate_models(self) -> None:
8589
)
8690
"""The reporting configuration."""
8791

92+
def _validate_reporting_base_dir(self) -> None:
93+
"""Validate the reporting base directory."""
94+
if self.reporting.type == defs.ReportingType.file:
95+
if self.reporting.base_dir.strip() == "":
96+
msg = "Reporting base directory is required for file reporting. Please rerun `graphrag init` and set the reporting configuration."
97+
raise ValueError(msg)
98+
self.reporting.base_dir = str(
99+
(Path(self.root_dir) / self.reporting.base_dir).resolve()
100+
)
101+
88102
storage: StorageConfig = Field(
89103
description="The storage configuration.", default=StorageConfig()
90104
)
91105
"""The storage configuration."""
92106

107+
def _validate_storage_base_dir(self) -> None:
108+
"""Validate the storage base directory."""
109+
if self.storage.type == defs.StorageType.file:
110+
if self.storage.base_dir.strip() == "":
111+
msg = "Storage base directory is required for file storage. Please rerun `graphrag init` and set the storage configuration."
112+
raise ValueError(msg)
113+
self.storage.base_dir = str(
114+
(Path(self.root_dir) / self.storage.base_dir).resolve()
115+
)
116+
93117
update_index_storage: StorageConfig | None = Field(
94118
description="The storage configuration for the updated index.",
95119
default=None,
96120
)
97121
"""The storage configuration for the updated index."""
98122

123+
def _validate_update_index_storage_base_dir(self) -> None:
124+
"""Validate the update index storage base directory."""
125+
if (
126+
self.update_index_storage
127+
and self.update_index_storage.type == defs.StorageType.file
128+
):
129+
if self.update_index_storage.base_dir.strip() == "":
130+
msg = "Update index storage base directory is required for file storage. Please rerun `graphrag init` and set the update index storage configuration."
131+
raise ValueError(msg)
132+
self.update_index_storage.base_dir = str(
133+
(Path(self.root_dir) / self.update_index_storage.base_dir).resolve()
134+
)
135+
99136
cache: CacheConfig = Field(
100137
description="The cache configuration.", default=CacheConfig()
101138
)
@@ -187,6 +224,21 @@ def _validate_models(self) -> None:
187224
)
188225
"""The basic search configuration."""
189226

227+
vector_store: VectorStoreConfig = Field(
228+
description="The vector store configuration.", default=VectorStoreConfig()
229+
)
230+
"""The vector store configuration."""
231+
232+
def _validate_vector_store_db_uri(self) -> None:
233+
"""Validate the vector store configuration."""
234+
if self.vector_store.type == VectorStoreType.LanceDB.value:
235+
if self.vector_store.db_uri.strip == "":
236+
msg = "Vector store URI is required for LanceDB. Please rerun `graphrag init` and set the vector store configuration."
237+
raise ValueError(msg)
238+
self.vector_store.db_uri = str(
239+
(Path(self.root_dir) / self.vector_store.db_uri).resolve()
240+
)
241+
190242
def get_language_model_config(self, model_id: str) -> LanguageModelConfig:
191243
"""Get a model configuration by ID.
192244
@@ -216,4 +268,8 @@ def _validate_model(self):
216268
"""Validate the model configuration."""
217269
self._validate_root_dir()
218270
self._validate_models()
271+
self._validate_reporting_base_dir()
272+
self._validate_storage_base_dir()
273+
self._validate_update_index_storage_base_dir()
274+
self._validate_vector_store_db_uri()
219275
return self

graphrag/config/models/text_embedding_config.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ class TextEmbeddingConfig(BaseModel):
2727
names: list[str] = Field(
2828
description="The specific embeddings to perform.", default=[]
2929
)
30-
vector_store: dict = Field(
31-
description="The vector storage configuration", default=defs.VECTOR_STORE_DICT
32-
)
3330
strategy: dict | None = Field(
3431
description="The override strategy to use.", default=None
3532
)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
"""Parameterization settings for the default configuration."""
5+
6+
from pydantic import BaseModel, Field
7+
8+
import graphrag.config.defaults as defs
9+
10+
11+
class VectorStoreConfig(BaseModel):
12+
"""The default configuration section for Vector Store."""
13+
14+
type: str = Field(
15+
description="The vector store type to use.", default=defs.VECTOR_STORE_TYPE
16+
)
17+
18+
db_uri: str = Field(
19+
description="The database URI to use.", default=defs.VECTOR_STORE_DB_URI
20+
)
21+
22+
collection_name: str = Field(
23+
description="The database name to use.",
24+
default=defs.VECTOR_STORE_COLLECTION_NAME,
25+
)
26+
27+
overwrite: bool = Field(
28+
description="Overwrite the existing data.", default=defs.VECTOR_STORE_OVERWRITE
29+
)

0 commit comments

Comments
 (0)