Skip to content

Commit 3a47233

Browse files
committed
updates
1 parent 0529dfe commit 3a47233

File tree

14 files changed

+63
-40
lines changed

14 files changed

+63
-40
lines changed

docs/config/yaml.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ Our pipeline can ingest .csv, .txt, or .json data from an input location. See th
8181
#### Fields
8282

8383
- `storage` **StorageConfig**
84-
- `type` **File|AzureBlob|AzureCosmos** - The storage type to use. Default=`file`
84+
- `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
8585
- `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
8686
- `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
8787
- `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.
@@ -115,7 +115,7 @@ This section controls the storage mechanism used by the pipeline used for export
115115

116116
#### Fields
117117

118-
- `type` **File|AzureBlob|AzureCosmos** - The storage type to use. Default=`file`
118+
- `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
119119
- `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
120120
- `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
121121
- `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.
@@ -128,7 +128,7 @@ The section defines a secondary storage location for running incremental indexin
128128

129129
#### Fields
130130

131-
- `type` **File|AzureBlob|AzureCosmos** - The storage type to use. Default=`file`
131+
- `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
132132
- `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
133133
- `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
134134
- `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.

packages/graphrag-storage/README.md

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ from graphrag_storage import StorageConfig, create_storage, StorageType
99
async def run():
1010
storage = create_storage(
1111
StorageConfig(
12-
type=StorageType.FILE
12+
type=StorageType.File
1313
base_dir="output"
1414
)
1515
)
@@ -29,7 +29,7 @@ from typing import Any
2929
from graphrag_storage import Storage, StorageConfig, create_storage, register_storage
3030

3131
class MyStorage(Storage):
32-
def __init__(self, some_setting: str, optional_setting: str = "default setting"):
32+
def __init__(self, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
3333
# Validate settings and initialize
3434
...
3535

@@ -55,3 +55,28 @@ async def run():
5555

5656
if __name__ == "__main__":
5757
asyncio.run(run())
58+
```
59+
60+
### Information
61+
62+
By default, the `create_storage` comes with the following storage providers registered that correspond to the entries in the `StorageType` enum.
63+
64+
- `FileStorage`
65+
- `AzureBlobStorage`
66+
- `AzureCosmosStorage`
67+
- `MemoryStorage`
68+
69+
You can directly import `storage_factory` if you want a clean factory with no preregistered storage providers.
70+
71+
```python
72+
from graphrag_storage.storage_factory import storage_factory
73+
from graphrag_storage.file_storage import FileStorage
74+
75+
# Or register a custom implementation, see above for example.
76+
storage_factory.register("my_storage_key", FileStorage)
77+
78+
storage = storage_factory.create(strategy="my_storage_key", init_args={"base_dir": "...", "other_settings": "...",})
79+
80+
...
81+
82+
```

packages/graphrag-storage/graphrag_storage/azure_blob_storage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def __init__(
3838
azure_connection_string: str | None = None,
3939
base_dir: str | None = None,
4040
encoding: str = "utf-8",
41+
**kwargs: Any,
4142
) -> None:
4243
"""Create a new BlobStorage instance."""
4344
if azure_connection_string is not None and azure_account_url is not None:

packages/graphrag-storage/graphrag_storage/azure_cosmos_storage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(
4646
azure_connection_string: str | None = None,
4747
azure_account_url: str | None = None,
4848
encoding: str = "utf-8",
49+
**kwargs: Any,
4950
) -> None:
5051
"""Create a CosmosDB storage instance."""
5152
logger.info("Creating cosmosdb storage")

packages/graphrag-storage/graphrag_storage/file_storage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class FileStorage(Storage):
3030
_base_dir: Path
3131
_encoding: str
3232

33-
def __init__(self, base_dir: str, encoding: str = "utf-8") -> None:
33+
def __init__(self, base_dir: str, encoding: str = "utf-8", **kwargs: Any) -> None:
3434
"""Create a file based storage."""
3535
self._base_dir = Path(base_dir).resolve()
3636
self._encoding = encoding

packages/graphrag-storage/graphrag_storage/storage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
class Storage(ABC):
1414
"""Provide a storage interface."""
1515

16+
@abstractmethod
17+
def __init__(self, **kwargs: Any) -> None:
18+
"""Create a storage instance."""
19+
1620
@abstractmethod
1721
def find(
1822
self,

packages/graphrag-storage/graphrag_storage/storage_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class StorageConfig(BaseModel):
1616

1717
type: str = Field(
1818
description="The storage type to use. Builtin types include 'File', 'AzureBlob', and 'AzureCosmos'.",
19-
default=StorageType.FILE,
19+
default=StorageType.File,
2020
)
2121

2222
encoding: str | None = Field(

packages/graphrag-storage/graphrag_storage/storage_factory.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
from graphrag_storage.storage_type import StorageType
1414

1515

16-
class _StorageFactory(Factory[Storage]):
16+
class StorageFactory(Factory[Storage]):
1717
"""A factory class for storage implementations."""
1818

1919

20-
storage_factory = _StorageFactory()
20+
storage_factory = StorageFactory()
2121

2222

2323
def register_storage(
@@ -49,31 +49,26 @@ def create_storage(config: StorageConfig) -> Storage:
4949
The created storage implementation.
5050
"""
5151
config_model = config.model_dump()
52-
storage_strategy = config_model.pop("type")
53-
54-
# Check storage_strategy is a string
55-
if not isinstance(storage_strategy, str):
56-
msg = f"StorageConfig.type must be a string, got {type(storage_strategy)}"
57-
raise TypeError(msg)
52+
storage_strategy = config.type
5853

5954
if storage_strategy not in storage_factory:
6055
match storage_strategy:
61-
case StorageType.FILE:
56+
case StorageType.File:
6257
from graphrag_storage.file_storage import FileStorage
6358

64-
register_storage(StorageType.FILE, FileStorage)
65-
case StorageType.MEMORY:
59+
register_storage(StorageType.File, FileStorage)
60+
case StorageType.Memory:
6661
from graphrag_storage.memory_storage import MemoryStorage
6762

68-
register_storage(StorageType.MEMORY, MemoryStorage)
69-
case StorageType.AZURE_BLOB:
63+
register_storage(StorageType.Memory, MemoryStorage)
64+
case StorageType.AzureBlob:
7065
from graphrag_storage.azure_blob_storage import AzureBlobStorage
7166

72-
register_storage(StorageType.AZURE_BLOB, AzureBlobStorage)
73-
case StorageType.AZURE_COSMOS:
67+
register_storage(StorageType.AzureBlob, AzureBlobStorage)
68+
case StorageType.AzureCosmos:
7469
from graphrag_storage.azure_cosmos_storage import AzureCosmosStorage
7570

76-
register_storage(StorageType.AZURE_COSMOS, AzureCosmosStorage)
71+
register_storage(StorageType.AzureCosmos, AzureCosmosStorage)
7772
case _:
7873
msg = f"StorageConfig.type '{storage_strategy}' is not registered in the StorageFactory. Registered types: {', '.join(storage_factory.keys())}."
7974
raise ValueError(msg)

packages/graphrag-storage/graphrag_storage/storage_type.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
class StorageType(StrEnum):
1111
"""Enum for storage types."""
1212

13-
FILE = "File"
14-
MEMORY = "Memory"
15-
AZURE_BLOB = "AzureBlob"
16-
AZURE_COSMOS = "AzureCosmos"
13+
File = "file"
14+
Memory = "memory"
15+
AzureBlob = "blob"
16+
AzureCosmos = "cosmosdb"

packages/graphrag/graphrag/cache/factory.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ def create_file_cache(**kwargs) -> PipelineCache:
2929
"""Create a file-based cache implementation."""
3030
from graphrag_storage.file_storage import FileStorage
3131

32-
kwargs.pop("type", None)
3332
storage = FileStorage(**kwargs)
3433
return JsonPipelineCache(storage)
3534

@@ -38,7 +37,6 @@ def create_blob_cache(**kwargs) -> PipelineCache:
3837
"""Create a blob storage-based cache implementation."""
3938
from graphrag_storage.azure_blob_storage import AzureBlobStorage
4039

41-
kwargs.pop("type", None)
4240
storage = AzureBlobStorage(**kwargs)
4341
return JsonPipelineCache(storage)
4442

@@ -47,7 +45,6 @@ def create_cosmosdb_cache(**kwargs) -> PipelineCache:
4745
"""Create a CosmosDB-based cache implementation."""
4846
from graphrag_storage.azure_cosmos_storage import AzureCosmosStorage
4947

50-
kwargs.pop("type", None)
5148
storage = AzureCosmosStorage(**kwargs)
5249
return JsonPipelineCache(storage)
5350

0 commit comments

Comments
 (0)