Skip to content

Commit 51475bb

Browse files
committed
rename storage to output
1 parent d355c02 commit 51475bb

File tree

15 files changed

+96
-98
lines changed

15 files changed

+96
-98
lines changed

graphrag/api/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ async def build_index(
5252
list[PipelineRunResult]
5353
The list of pipeline run results
5454
"""
55-
is_update_run = bool(config.update_index_storage)
55+
is_update_run = bool(config.update_index_output)
5656

5757
if is_resume_run and is_update_run:
5858
msg = "Cannot resume and update a run at the same time."

graphrag/cli/index.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def index_cli(
7777
"""Run the pipeline with the given config."""
7878
cli_overrides = {}
7979
if output_dir:
80-
cli_overrides["storage.base_dir"] = str(output_dir)
80+
cli_overrides["output.base_dir"] = str(output_dir)
8181
cli_overrides["reporting.base_dir"] = str(output_dir)
8282
config = load_config(root_dir, config_filepath, cli_overrides)
8383

@@ -106,18 +106,18 @@ def update_cli(
106106
"""Run the pipeline with the given config."""
107107
cli_overrides = {}
108108
if output_dir:
109-
cli_overrides["storage.base_dir"] = str(output_dir)
109+
cli_overrides["output.base_dir"] = str(output_dir)
110110
cli_overrides["reporting.base_dir"] = str(output_dir)
111111
config = load_config(root_dir, config_filepath, cli_overrides)
112112

113-
# Check if update storage exist, if not configure it with default values
114-
if not config.update_index_storage:
115-
from graphrag.config.defaults import STORAGE_TYPE, UPDATE_STORAGE_BASE_DIR
116-
from graphrag.config.models.storage_config import StorageConfig
113+
# Check if update output exist, if not configure it with default values
114+
if not config.update_index_output:
115+
from graphrag.config.defaults import OUTPUT_TYPE, UPDATE_OUTPUT_BASE_DIR
116+
from graphrag.config.models.output_config import OutputConfig
117117

118-
config.update_index_storage = StorageConfig(
119-
type=STORAGE_TYPE,
120-
base_dir=UPDATE_STORAGE_BASE_DIR,
118+
config.update_index_output = OutputConfig(
119+
type=OUTPUT_TYPE,
120+
base_dir=UPDATE_OUTPUT_BASE_DIR,
121121
)
122122

123123
_run_index(

graphrag/cli/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def _index_cli(
165165
output: Annotated[
166166
Path | None,
167167
typer.Option(
168-
help="Indexing pipeline output directory. Overrides storage.base_dir in the configuration file.",
168+
help="Indexing pipeline output directory. Overrides output.base_dir in the configuration file.",
169169
dir_okay=True,
170170
writable=True,
171171
resolve_path=True,
@@ -226,7 +226,7 @@ def _update_cli(
226226
output: Annotated[
227227
Path | None,
228228
typer.Option(
229-
help="Indexing pipeline output directory. Overrides storage.base_dir in the configuration file.",
229+
help="Indexing pipeline output directory. Overrides output.base_dir in the configuration file.",
230230
dir_okay=True,
231231
writable=True,
232232
resolve_path=True,
@@ -236,7 +236,7 @@ def _update_cli(
236236
"""
237237
Update an existing knowledge graph index.
238238
239-
Applies a default storage configuration (if not provided by config), saving the new index to the local file system in the `update_output` folder.
239+
Applies a default output configuration (if not provided by config), saving the new index to the local file system in the `update_output` folder.
240240
"""
241241
from graphrag.cli.index import update_cli
242242

graphrag/cli/query.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def run_global_search(
3636
root = root_dir.resolve()
3737
cli_overrides = {}
3838
if data_dir:
39-
cli_overrides["storage.base_dir"] = str(data_dir)
39+
cli_overrides["output.base_dir"] = str(data_dir)
4040
config = load_config(root, config_filepath, cli_overrides)
4141

4242
dataframe_dict = _resolve_output_files(
@@ -121,7 +121,7 @@ def run_local_search(
121121
root = root_dir.resolve()
122122
cli_overrides = {}
123123
if data_dir:
124-
cli_overrides["storage.base_dir"] = str(data_dir)
124+
cli_overrides["output.base_dir"] = str(data_dir)
125125
config = load_config(root, config_filepath, cli_overrides)
126126

127127
dataframe_dict = _resolve_output_files(
@@ -213,7 +213,7 @@ def run_drift_search(
213213
root = root_dir.resolve()
214214
cli_overrides = {}
215215
if data_dir:
216-
cli_overrides["storage.base_dir"] = str(data_dir)
216+
cli_overrides["output.base_dir"] = str(data_dir)
217217
config = load_config(root, config_filepath, cli_overrides)
218218

219219
dataframe_dict = _resolve_output_files(
@@ -299,7 +299,7 @@ def run_basic_search(
299299
root = root_dir.resolve()
300300
cli_overrides = {}
301301
if data_dir:
302-
cli_overrides["storage.base_dir"] = str(data_dir)
302+
cli_overrides["output.base_dir"] = str(data_dir)
303303
config = load_config(root, config_filepath, cli_overrides)
304304

305305
dataframe_dict = _resolve_output_files(
@@ -354,9 +354,9 @@ def _resolve_output_files(
354354
) -> dict[str, pd.DataFrame]:
355355
"""Read indexing output files to a dataframe dict."""
356356
dataframe_dict = {}
357-
storage_config = config.storage.model_dump() # type: ignore
357+
output_config = config.output.model_dump() # type: ignore
358358
storage_obj = StorageFactory().create_storage(
359-
storage_type=storage_config["type"], kwargs=storage_config
359+
storage_type=output_config["type"], kwargs=output_config
360360
)
361361
for name in output_list:
362362
df_value = asyncio.run(load_table_from_storage(name=name, storage=storage_obj))

graphrag/config/defaults.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
InputFileType,
1313
InputType,
1414
LLMType,
15+
OutputType,
1516
ReportingType,
16-
StorageType,
1717
TextEmbeddingTarget,
1818
)
1919
from graphrag.vector_stores.factory import VectorStoreType
@@ -94,16 +94,16 @@
9494
SNAPSHOTS_GRAPHML = False
9595
SNAPSHOTS_EMBEDDINGS = False
9696
SNAPSHOTS_TRANSIENT = False
97-
STORAGE_BASE_DIR = "output"
98-
STORAGE_TYPE = StorageType.file
97+
OUTPUT_BASE_DIR = "output"
98+
OUTPUT_TYPE = OutputType.file
9999
SUMMARIZE_DESCRIPTIONS_MAX_LENGTH = 500
100100
SUMMARIZE_MODEL_ID = DEFAULT_CHAT_MODEL_ID
101101
UMAP_ENABLED = False
102-
UPDATE_STORAGE_BASE_DIR = "update_output"
102+
UPDATE_OUTPUT_BASE_DIR = "update_output"
103103

104104

105105
VECTOR_STORE_TYPE = VectorStoreType.LanceDB.value
106-
VECTOR_STORE_DB_URI = str(Path(STORAGE_BASE_DIR) / "lancedb")
106+
VECTOR_STORE_DB_URI = str(Path(OUTPUT_BASE_DIR) / "lancedb")
107107
VECTOR_STORE_CONTAINER_NAME = "default"
108108
VECTOR_STORE_OVERWRITE = True
109109

graphrag/config/enums.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,17 @@ def __repr__(self):
5353
return f'"{self.value}"'
5454

5555

56-
class StorageType(str, Enum):
57-
"""The storage type for the pipeline."""
56+
class OutputType(str, Enum):
57+
"""The output type for the pipeline."""
5858

5959
file = "file"
60-
"""The file storage type."""
60+
"""The file output type."""
6161
memory = "memory"
62-
"""The memory storage type."""
62+
"""The memory output type."""
6363
blob = "blob"
64-
"""The blob storage type."""
64+
"""The blob output type."""
6565
cosmosdb = "cosmosdb"
66-
"""The cosmosdb storage type"""
66+
"""The cosmosdb output type"""
6767

6868
def __repr__(self):
6969
"""Get a string representation."""

graphrag/config/init_content.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
overlap: {defs.CHUNK_OVERLAP}
6363
group_by_columns: [{",".join(defs.CHUNK_GROUP_BY_COLUMNS)}]
6464
65-
### Storage settings ###
65+
### Output settings ###
6666
## If blob storage is specified in the following four sections,
6767
## connection_string and container_name must be provided
6868
@@ -74,15 +74,15 @@
7474
type: {defs.REPORTING_TYPE.value} # or console, blob
7575
base_dir: "{defs.REPORTING_BASE_DIR}"
7676
77-
storage:
78-
type: {defs.STORAGE_TYPE.value} # one of [blob, cosmosdb, file]
79-
base_dir: "{defs.STORAGE_BASE_DIR}"
77+
output:
78+
type: {defs.OUTPUT_TYPE.value} # one of [blob, cosmosdb, file]
79+
base_dir: "{defs.OUTPUT_BASE_DIR}"
8080
8181
## only turn this on if running `graphrag index` with custom settings
8282
## we normally use `graphrag update` with the defaults
83-
update_index_storage:
84-
# type: {defs.STORAGE_TYPE.value} # or blob
85-
# base_dir: "{defs.UPDATE_STORAGE_BASE_DIR}"
83+
update_index_output:
84+
# type: {defs.OUTPUT_TYPE.value} # or blob
85+
# base_dir: "{defs.UPDATE_OUTPUT_BASE_DIR}"
8686
8787
### Workflow settings ###
8888

graphrag/config/load_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def load_config(
159159
If None, searches for config file in root.
160160
cli_overrides : dict[str, Any] | None
161161
A flat dictionary of cli overrides.
162-
Example: {'storage.base_dir': 'override_value'}
162+
Example: {'output.base_dir': 'override_value'}
163163
164164
Returns
165165
-------

graphrag/config/models/graph_rag_config.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
from graphrag.config.models.input_config import InputConfig
2424
from graphrag.config.models.language_model_config import LanguageModelConfig
2525
from graphrag.config.models.local_search_config import LocalSearchConfig
26+
from graphrag.config.models.output_config import OutputConfig
2627
from graphrag.config.models.reporting_config import ReportingConfig
2728
from graphrag.config.models.snapshots_config import SnapshotsConfig
28-
from graphrag.config.models.storage_config import StorageConfig
2929
from graphrag.config.models.summarize_descriptions_config import (
3030
SummarizeDescriptionsConfig,
3131
)
@@ -99,38 +99,38 @@ def _validate_reporting_base_dir(self) -> None:
9999
(Path(self.root_dir) / self.reporting.base_dir).resolve()
100100
)
101101

102-
storage: StorageConfig = Field(
103-
description="The storage configuration.", default=StorageConfig()
102+
output: OutputConfig = Field(
103+
description="The output configuration.", default=OutputConfig()
104104
)
105-
"""The storage configuration."""
105+
"""The output configuration."""
106106

107-
def _validate_storage_base_dir(self) -> None:
108-
"""Validate the storage base directory."""
109-
if self.storage.type == defs.StorageType.file:
110-
if self.storage.base_dir.strip() == "":
111-
msg = "Storage base directory is required for file storage. Please rerun `graphrag init` and set the storage configuration."
107+
def _validate_output_base_dir(self) -> None:
108+
"""Validate the output base directory."""
109+
if self.output.type == defs.OutputType.file:
110+
if self.output.base_dir.strip() == "":
111+
msg = "output base directory is required for file output. Please rerun `graphrag init` and set the output configuration."
112112
raise ValueError(msg)
113-
self.storage.base_dir = str(
114-
(Path(self.root_dir) / self.storage.base_dir).resolve()
113+
self.output.base_dir = str(
114+
(Path(self.root_dir) / self.output.base_dir).resolve()
115115
)
116116

117-
update_index_storage: StorageConfig | None = Field(
118-
description="The storage configuration for the updated index.",
117+
update_index_output: OutputConfig | None = Field(
118+
description="The output configuration for the updated index.",
119119
default=None,
120120
)
121-
"""The storage configuration for the updated index."""
121+
"""The output configuration for the updated index."""
122122

123-
def _validate_update_index_storage_base_dir(self) -> None:
124-
"""Validate the update index storage base directory."""
123+
def _validate_update_index_output_base_dir(self) -> None:
124+
"""Validate the update index output base directory."""
125125
if (
126-
self.update_index_storage
127-
and self.update_index_storage.type == defs.StorageType.file
126+
self.update_index_output
127+
and self.update_index_output.type == defs.OutputType.file
128128
):
129-
if self.update_index_storage.base_dir.strip() == "":
130-
msg = "Update index storage base directory is required for file storage. Please rerun `graphrag init` and set the update index storage configuration."
129+
if self.update_index_output.base_dir.strip() == "":
130+
msg = "Update index output base directory is required for file output. Please rerun `graphrag init` and set the update index output configuration."
131131
raise ValueError(msg)
132-
self.update_index_storage.base_dir = str(
133-
(Path(self.root_dir) / self.update_index_storage.base_dir).resolve()
132+
self.update_index_output.base_dir = str(
133+
(Path(self.root_dir) / self.update_index_output.base_dir).resolve()
134134
)
135135

136136
cache: CacheConfig = Field(
@@ -269,7 +269,7 @@ def _validate_model(self):
269269
self._validate_root_dir()
270270
self._validate_models()
271271
self._validate_reporting_base_dir()
272-
self._validate_storage_base_dir()
273-
self._validate_update_index_storage_base_dir()
272+
self._validate_output_base_dir()
273+
self._validate_update_index_output_base_dir()
274274
self._validate_vector_store_db_uri()
275275
return self
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@
66
from pydantic import BaseModel, Field
77

88
import graphrag.config.defaults as defs
9-
from graphrag.config.enums import StorageType
9+
from graphrag.config.enums import OutputType
1010

1111

12-
class StorageConfig(BaseModel):
13-
"""The default configuration section for Storage."""
12+
class OutputConfig(BaseModel):
13+
"""The default configuration section for Output."""
1414

15-
type: StorageType = Field(
16-
description="The storage type to use.", default=defs.STORAGE_TYPE
15+
type: OutputType = Field(
16+
description="The output type to use.", default=defs.OUTPUT_TYPE
1717
)
1818
base_dir: str = Field(
19-
description="The base directory for the storage.",
20-
default=defs.STORAGE_BASE_DIR,
19+
description="The base directory for the output.",
20+
default=defs.OUTPUT_BASE_DIR,
2121
)
2222
connection_string: str | None = Field(
2323
description="The storage connection string to use.", default=None

0 commit comments

Comments
 (0)