Skip to content

Commit 9fed90c

Browse files
committed
Update cache config to support storage.
1 parent 71f9c09 commit 9fed90c

File tree

12 files changed

+63
-91
lines changed

12 files changed

+63
-91
lines changed

packages/graphrag-cache/README.md

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,14 @@ from graphrag_storage import StorageConfig, create_storage, StorageType
88
from graphrag_cache import CacheConfig, create_cache, CacheType
99

1010
async def run():
11-
# Json cache requires a storage implementation.
12-
storage = create_storage(
13-
StorageConfig(
14-
type=StorageType.File
15-
base_dir="output"
16-
)
17-
)
18-
1911
cache = create_cache(
2012
CacheConfig(
2113
type=CacheType.Json
14+
storage=StorageConfig(
15+
type=StorageType.File
16+
base_dir="cache"
17+
)
2218
),
23-
storage=storage
2419
)
2520

2621
await cache.set("my_key", {"some": "object to cache"})
@@ -39,8 +34,9 @@ from graphrag_storage import Storage
3934
from graphrag_cache import Cache, CacheConfig, create_cache, register_cache
4035

4136
class MyCache(Cache):
42-
def __init__(self, storage: Storage, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
37+
def __init__(self, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
4338
# Validate settings and initialize
39+
# View the JsonCache implementation to see how to create a cache that relies on a Storage provider.
4440
...
4541

4642
#Implement rest of interface
@@ -54,13 +50,12 @@ async def run():
5450
type="MyCache"
5551
some_setting="My Setting"
5652
)
57-
# if your cache relies on a storage implementation you can pass that here
58-
# storage=some_storage
5953
)
54+
6055
# Or use the factory directly to instantiate with a dict instead of using
6156
# CacheConfig + create_factory
6257
# from graphrag_cache.cache_factory import cache_factory
63-
# cache = cache_factory.create(strategy="MyCache", init_args={"storage": storage_implementation, "some_setting": "My Setting"})
58+
# cache = cache_factory.create(strategy="MyCache", init_args={"some_setting": "My Setting"})
6459

6560
await cache.set("my_key", {"some": "object to cache"})
6661
print(await cache.get("my_key"))

packages/graphrag-cache/graphrag_cache/cache_config.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
"""Cache configuration model."""
55

6+
from graphrag_storage import StorageConfig
67
from pydantic import BaseModel, ConfigDict, Field
78

89
from graphrag_cache.cache_type import CacheType
@@ -19,12 +20,7 @@ class CacheConfig(BaseModel):
1920
default=CacheType.Json,
2021
)
2122

22-
encoding: str | None = Field(
23-
description="The encoding to use for file-based caching.",
24-
default=None,
25-
)
26-
27-
name: str | None = Field(
28-
description="The name to use for the cache instance.",
23+
storage: StorageConfig | None = Field(
24+
description="The storage configuration to use for file-based caches such as 'Json'.",
2925
default=None,
3026
)

packages/graphrag-cache/graphrag_cache/cache_factory.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def create_cache(config: CacheConfig, storage: Storage | None = None) -> Cache:
7777
msg = f"CacheConfig.type '{cache_strategy}' is not registered in the CacheFactory. Registered types: {', '.join(cache_factory.keys())}."
7878
raise ValueError(msg)
7979

80-
return cache_factory.create(
81-
strategy=cache_strategy, init_args={"storage": storage, **config_model}
82-
)
80+
if storage:
81+
config_model["storage"] = storage
82+
83+
return cache_factory.create(strategy=cache_strategy, init_args=config_model)

packages/graphrag-cache/graphrag_cache/cache_type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ class CacheType(StrEnum):
1212

1313
Json = "json"
1414
Memory = "memory"
15-
Noop = "noop"
15+
Noop = "none"

packages/graphrag-cache/graphrag_cache/json_cache.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import json
77
from typing import Any
88

9-
from graphrag_storage import Storage
9+
from graphrag_storage import Storage, StorageConfig, create_storage
1010

1111
from graphrag_cache.cache import Cache
1212

@@ -15,18 +15,26 @@ class JsonCache(Cache):
1515
"""File pipeline cache class definition."""
1616

1717
_storage: Storage
18-
_encoding: str
1918

20-
def __init__(self, storage: Storage, encoding="utf-8", **kwargs: Any) -> None:
19+
def __init__(
20+
self,
21+
storage: Storage | dict[str, Any] | None = None,
22+
**kwargs: Any,
23+
) -> None:
2124
"""Init method definition."""
22-
self._storage = storage
23-
self._encoding = encoding
25+
if storage is None:
26+
msg = "JsonCache requires either a Storage instance to be provided or a StorageConfig to create one."
27+
raise ValueError(msg)
28+
if isinstance(storage, Storage):
29+
self._storage = storage
30+
else:
31+
self._storage = create_storage(StorageConfig(**storage))
2432

2533
async def get(self, key: str) -> Any | None:
2634
"""Get method definition."""
2735
if await self.has(key):
2836
try:
29-
data = await self._storage.get(key, encoding=self._encoding)
37+
data = await self._storage.get(key)
3038
data = json.loads(data)
3139
except UnicodeDecodeError:
3240
await self._storage.delete(key)
@@ -44,9 +52,7 @@ async def set(self, key: str, value: Any, debug_data: dict | None = None) -> Non
4452
if value is None:
4553
return
4654
data = {"result": value, **(debug_data or {})}
47-
await self._storage.set(
48-
key, json.dumps(data, ensure_ascii=False), encoding=self._encoding
49-
)
55+
await self._storage.set(key, json.dumps(data, ensure_ascii=False))
5056

5157
async def has(self, key: str) -> bool:
5258
"""Has method definition."""
@@ -63,4 +69,4 @@ async def clear(self) -> None:
6369

6470
def child(self, name: str) -> "Cache":
6571
"""Child method definition."""
66-
return JsonCache(self._storage.child(name), encoding=self._encoding)
72+
return JsonCache(storage=self._storage.child(name))

packages/graphrag-cache/graphrag_cache/memory_cache.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@ class MemoryCache(Cache):
1414
_cache: dict[str, Any]
1515
_name: str
1616

17-
def __init__(self, name: str | None = None, **kwargs: Any) -> None:
17+
def __init__(self, **kwargs: Any) -> None:
1818
"""Init method definition."""
1919
self._cache = {}
20-
self._name = name or ""
2120

2221
async def get(self, key: str) -> Any:
2322
"""Get the value for the given key.
@@ -30,7 +29,6 @@ async def get(self, key: str) -> Any:
3029
-------
3130
- output - The value for the given key.
3231
"""
33-
key = self._create_cache_key(key)
3432
return self._cache.get(key)
3533

3634
async def set(self, key: str, value: Any, debug_data: dict | None = None) -> None:
@@ -40,7 +38,6 @@ async def set(self, key: str, value: Any, debug_data: dict | None = None) -> Non
4038
- key - The key to set the value for.
4139
- value - The value to set.
4240
"""
43-
key = self._create_cache_key(key)
4441
self._cache[key] = value
4542

4643
async def has(self, key: str) -> bool:
@@ -53,7 +50,6 @@ async def has(self, key: str) -> bool:
5350
-------
5451
- output - True if the key exists in the storage, False otherwise.
5552
"""
56-
key = self._create_cache_key(key)
5753
return key in self._cache
5854

5955
async def delete(self, key: str) -> None:
@@ -62,7 +58,6 @@ async def delete(self, key: str) -> None:
6258
Args:
6359
- key - The key to delete.
6460
"""
65-
key = self._create_cache_key(key)
6661
del self._cache[key]
6762

6863
async def clear(self) -> None:
@@ -71,8 +66,4 @@ async def clear(self) -> None:
7166

7267
def child(self, name: str) -> "Cache":
7368
"""Create a sub cache with the given name."""
74-
return MemoryCache(name)
75-
76-
def _create_cache_key(self, key: str) -> str:
77-
"""Create a cache key for the given key."""
78-
return f"{self._name}{key}"
69+
return MemoryCache()

packages/graphrag/graphrag/config/defaults.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,6 @@ class BasicSearchDefaults:
5656
embedding_model_id: str = DEFAULT_EMBEDDING_MODEL_ID
5757

5858

59-
@dataclass
60-
class CacheDefaults:
61-
"""Default values for cache."""
62-
63-
type: CacheType = CacheType.Json
64-
encoding: str | None = None
65-
name: str | None = None
66-
67-
6859
@dataclass
6960
class ChunksDefaults:
7061
"""Default values for chunks."""
@@ -238,13 +229,6 @@ class StorageDefaults:
238229
azure_cosmosdb_account_url: None = None
239230

240231

241-
@dataclass
242-
class CacheStorageDefaults(StorageDefaults):
243-
"""Default values for cache storage."""
244-
245-
base_dir: str | None = DEFAULT_CACHE_BASE_DIR
246-
247-
248232
@dataclass
249233
class InputStorageDefaults(StorageDefaults):
250234
"""Default values for input storage."""
@@ -265,6 +249,21 @@ class InputDefaults:
265249
metadata: None = None
266250

267251

252+
@dataclass
253+
class CacheStorageDefaults(StorageDefaults):
254+
"""Default values for cache storage."""
255+
256+
base_dir: str | None = DEFAULT_CACHE_BASE_DIR
257+
258+
259+
@dataclass
260+
class CacheDefaults:
261+
"""Default values for cache."""
262+
263+
type: CacheType = CacheType.Json
264+
storage: CacheStorageDefaults = field(default_factory=CacheStorageDefaults)
265+
266+
268267
@dataclass
269268
class LanguageModelDefaults:
270269
"""Default values for language model."""
@@ -401,7 +400,6 @@ class GraphRagConfigDefaults:
401400
default_factory=UpdateIndexOutputDefaults
402401
)
403402
cache: CacheDefaults = field(default_factory=CacheDefaults)
404-
cache_storage: CacheStorageDefaults = field(default_factory=CacheStorageDefaults)
405403
input: InputDefaults = field(default_factory=InputDefaults)
406404
embed_text: EmbedTextDefaults = field(default_factory=EmbedTextDefaults)
407405
chunks: ChunksDefaults = field(default_factory=ChunksDefaults)

packages/graphrag/graphrag/config/init_content.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
5151
input:
5252
storage:
53-
type: {graphrag_config_defaults.input.storage.type} # or blob, cosmosdb
53+
type: {graphrag_config_defaults.input.storage.type} # [file, blob, cosmosdb]
5454
base_dir: "{graphrag_config_defaults.input.storage.base_dir}"
5555
file_type: {graphrag_config_defaults.input.file_type.value} # [csv, text, json]
5656
@@ -63,15 +63,14 @@
6363
## connection_string and container_name must be provided
6464
6565
output:
66-
type: {graphrag_config_defaults.output.type} # or blob, cosmosdb
66+
type: {graphrag_config_defaults.output.type} # [file, blob, cosmosdb]
6767
base_dir: "{graphrag_config_defaults.output.base_dir}"
68-
69-
cache_storage:
70-
type: {graphrag_config_defaults.cache_storage.type} # [file, blob, cosmosdb]
71-
base_dir: "{graphrag_config_defaults.cache_storage.base_dir}"
7268
7369
cache:
74-
type: {graphrag_config_defaults.cache.type} # [json, memory, noop]
70+
type: {graphrag_config_defaults.cache.type} # [json, memory, none]
71+
storage:
72+
type: {graphrag_config_defaults.cache.storage.type} # [file, blob, cosmosdb]
73+
base_dir: "{graphrag_config_defaults.cache.storage.base_dir}"
7574
7675
reporting:
7776
type: {graphrag_config_defaults.reporting.type.value} # [file, blob]

packages/graphrag/graphrag/config/models/graph_rag_config.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,6 @@ def _validate_update_index_output_base_dir(self) -> None:
165165
Path(self.update_index_output.base_dir).resolve()
166166
)
167167

168-
cache_storage: StorageConfig | None = Field(
169-
description="The cache storage configuration.",
170-
default=StorageConfig(
171-
**asdict(graphrag_config_defaults.cache_storage),
172-
),
173-
)
174-
"""The cache storage configuration."""
175-
176168
cache: CacheConfig = Field(
177169
description="The cache configuration.",
178170
default=CacheConfig(**asdict(graphrag_config_defaults.cache)),

packages/graphrag/graphrag/index/run/run_pipeline.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@ async def run_pipeline(
3737
"""Run all workflows using a simplified pipeline."""
3838
input_storage = create_storage(config.input.storage)
3939
output_storage = create_storage(config.output)
40-
cache_storage: Storage | None = None
41-
if config.cache_storage:
42-
cache_storage = create_storage(config.cache_storage)
43-
cache = create_cache(config.cache, storage=cache_storage)
40+
cache = create_cache(config.cache)
4441

4542
# load existing state in case any workflows are stateful
4643
state_json = await output_storage.get("context.json")

0 commit comments

Comments
 (0)