Skip to content

Commit 71f9c09

Browse files
committed
Add GraphRAG Cache package.
1 parent 4404668 commit 71f9c09

File tree

36 files changed

+489
-262
lines changed

36 files changed

+489
-262
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

packages/graphrag-cache/README.md

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# GraphRAG Cache
2+
3+
## Basic
4+
5+
```python
6+
import asyncio
7+
from graphrag_storage import StorageConfig, create_storage, StorageType
8+
from graphrag_cache import CacheConfig, create_cache, CacheType
9+
10+
async def run():
11+
# Json cache requires a storage implementation.
12+
storage = create_storage(
13+
StorageConfig(
14+
type=StorageType.File
15+
base_dir="output"
16+
)
17+
)
18+
19+
cache = create_cache(
20+
CacheConfig(
21+
type=CacheType.Json
22+
),
23+
storage=storage
24+
)
25+
26+
await cache.set("my_key", {"some": "object to cache"})
27+
print(await cache.get("my_key"))
28+
29+
if __name__ == "__main__":
30+
asyncio.run(run())
31+
```
32+
33+
## Custom Cache
34+
35+
```python
36+
import asyncio
37+
from typing import Any
38+
from graphrag_storage import Storage
39+
from graphrag_cache import Cache, CacheConfig, create_cache, register_cache
40+
41+
class MyCache(Cache):
42+
def __init__(self, storage: Storage, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
43+
# Validate settings and initialize
44+
...
45+
46+
#Implement rest of interface
47+
...
48+
49+
register_cache("MyCache", MyCache)
50+
51+
async def run():
52+
cache = create_cache(
53+
CacheConfig(
54+
type="MyCache"
55+
some_setting="My Setting"
56+
)
57+
# if your cache relies on a storage implementation you can pass that here
58+
# storage=some_storage
59+
)
60+
# Or use the factory directly to instantiate with a dict instead of using
61+
# CacheConfig + create_factory
62+
# from graphrag_cache.cache_factory import cache_factory
63+
# cache = cache_factory.create(strategy="MyCache", init_args={"storage": storage_implementation, "some_setting": "My Setting"})
64+
65+
await cache.set("my_key", {"some": "object to cache"})
66+
print(await cache.get("my_key"))
67+
68+
if __name__ == "__main__":
69+
asyncio.run(run())
70+
```
71+
72+
### Details
73+
74+
By default, the `create_cache` comes with the following cache providers registered that correspond to the entries in the `CacheType` enum.
75+
76+
- `JsonCache`
77+
- `MemoryCache`
78+
- `NoopCache`
79+
80+
The preregistration happens dynamically, e.g., `JsonCache` is only imported and registered if you request a `JsonCache` with `create_cache(CacheType.Json, ...)`. There is no need to manually import and register builtin cache providers when using `create_cache`.
81+
82+
If you want a clean factory with no preregistered cache providers then directly import `cache_factory` and bypass using `create_cache`. The downside is that `cache_factory.create` uses a dict for init args instead of the strongly typed `CacheConfig` used with `create_cache`.
83+
84+
```python
85+
from graphrag_cache.cache_factory import cache_factory
86+
from graphrag_cache.json_cache import JsonCache
87+
88+
# cache_factory has no preregistered providers so you must register any
89+
# providers you plan on using.
90+
# May also register a custom implementation, see above for example.
91+
cache_factory.register("my_cache_impl", JsonCache)
92+
93+
cache = cache_factory.create(strategy="my_cache_impl", init_args={"some_setting": "..."})
94+
95+
...
96+
97+
```
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
"""The GraphRAG Cache package."""
5+
6+
from graphrag_cache.cache import Cache
7+
from graphrag_cache.cache_config import CacheConfig
8+
from graphrag_cache.cache_factory import create_cache, register_cache
9+
from graphrag_cache.cache_type import CacheType
10+
11+
__all__ = [
12+
"Cache",
13+
"CacheConfig",
14+
"CacheType",
15+
"create_cache",
16+
"register_cache",
17+
]

packages/graphrag/graphrag/cache/pipeline_cache.py renamed to packages/graphrag-cache/graphrag_cache/cache.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
# Copyright (c) 2024 Microsoft Corporation.
22
# Licensed under the MIT License
33

4-
"""A module containing 'PipelineCache' model."""
4+
"""Abstract base class for cache."""
55

66
from __future__ import annotations
77

8-
from abc import ABCMeta, abstractmethod
8+
from abc import ABC, abstractmethod
99
from typing import Any
1010

1111

12-
class PipelineCache(metaclass=ABCMeta):
12+
class Cache(ABC):
1313
"""Provide a cache interface for the pipeline."""
1414

15+
@abstractmethod
16+
def __init__(self, **kwargs: Any) -> None:
17+
"""Create a cache instance."""
18+
1519
@abstractmethod
1620
async def get(self, key: str) -> Any:
1721
"""Get the value for the given key.
@@ -59,7 +63,7 @@ async def clear(self) -> None:
5963
"""Clear the cache."""
6064

6165
@abstractmethod
62-
def child(self, name: str) -> PipelineCache:
66+
def child(self, name: str) -> Cache:
6367
"""Create a child cache with the given name.
6468
6569
Args:
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
"""Cache configuration model."""
5+
6+
from pydantic import BaseModel, ConfigDict, Field
7+
8+
from graphrag_cache.cache_type import CacheType
9+
10+
11+
class CacheConfig(BaseModel):
12+
"""The configuration section for cache."""
13+
14+
model_config = ConfigDict(extra="allow")
15+
"""Allow extra fields to support custom cache implementations."""
16+
17+
type: str = Field(
18+
description="The cache type to use. Builtin types include 'Json', 'Memory', and 'Noop'.",
19+
default=CacheType.Json,
20+
)
21+
22+
encoding: str | None = Field(
23+
description="The encoding to use for file-based caching.",
24+
default=None,
25+
)
26+
27+
name: str | None = Field(
28+
description="The name to use for the cache instance.",
29+
default=None,
30+
)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
5+
"""Cache factory implementation."""
6+
7+
from collections.abc import Callable
8+
9+
from graphrag_common.factory import Factory, ServiceScope
10+
from graphrag_storage import Storage
11+
12+
from graphrag_cache.cache import Cache
13+
from graphrag_cache.cache_config import CacheConfig
14+
from graphrag_cache.cache_type import CacheType
15+
16+
17+
class CacheFactory(Factory[Cache]):
18+
"""A factory class for cache implementations."""
19+
20+
21+
cache_factory = CacheFactory()
22+
23+
24+
def register_cache(
25+
cache_type: str,
26+
cache_initializer: Callable[..., Cache],
27+
scope: ServiceScope = "transient",
28+
) -> None:
29+
"""Register a custom storage implementation.
30+
31+
Args
32+
----
33+
- storage_type: str
34+
The storage id to register.
35+
- storage_initializer: Callable[..., Storage]
36+
The storage initializer to register.
37+
"""
38+
cache_factory.register(cache_type, cache_initializer, scope)
39+
40+
41+
def create_cache(config: CacheConfig, storage: Storage | None = None) -> Cache:
42+
"""Create a cache implementation based on the given configuration.
43+
44+
Args
45+
----
46+
- config: CacheConfig
47+
The cache configuration to use.
48+
- storage: Storage | None
49+
The storage implementation to use for file-based caches such as 'Json'.
50+
51+
Returns
52+
-------
53+
Cache
54+
The created cache implementation.
55+
"""
56+
config_model = config.model_dump()
57+
cache_strategy = config.type
58+
59+
if cache_strategy not in cache_factory:
60+
match cache_strategy:
61+
case "json":
62+
from graphrag_cache.json_cache import JsonCache
63+
64+
register_cache(CacheType.Json, JsonCache)
65+
66+
case "memory":
67+
from graphrag_cache.memory_cache import MemoryCache
68+
69+
register_cache(CacheType.Memory, MemoryCache)
70+
71+
case "noop":
72+
from graphrag_cache.noop_cache import NoopCache
73+
74+
register_cache(CacheType.Noop, NoopCache)
75+
76+
case _:
77+
msg = f"CacheConfig.type '{cache_strategy}' is not registered in the CacheFactory. Registered types: {', '.join(cache_factory.keys())}."
78+
raise ValueError(msg)
79+
80+
return cache_factory.create(
81+
strategy=cache_strategy, init_args={"storage": storage, **config_model}
82+
)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
5+
"""Builtin cache implementation types."""
6+
7+
from enum import StrEnum
8+
9+
10+
class CacheType(StrEnum):
11+
"""Enum for cache types."""
12+
13+
Json = "json"
14+
Memory = "memory"
15+
Noop = "noop"

packages/graphrag/graphrag/cache/json_pipeline_cache.py renamed to packages/graphrag-cache/graphrag_cache/json_cache.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,21 @@
88

99
from graphrag_storage import Storage
1010

11-
from graphrag.cache.pipeline_cache import PipelineCache
11+
from graphrag_cache.cache import Cache
1212

1313

14-
class JsonPipelineCache(PipelineCache):
14+
class JsonCache(Cache):
1515
"""File pipeline cache class definition."""
1616

1717
_storage: Storage
1818
_encoding: str
1919

20-
def __init__(self, storage: Storage, encoding="utf-8"):
20+
def __init__(self, storage: Storage, encoding="utf-8", **kwargs: Any) -> None:
2121
"""Init method definition."""
2222
self._storage = storage
2323
self._encoding = encoding
2424

25-
async def get(self, key: str) -> str | None:
25+
async def get(self, key: str) -> Any | None:
2626
"""Get method definition."""
2727
if await self.has(key):
2828
try:
@@ -61,6 +61,6 @@ async def clear(self) -> None:
6161
"""Clear method definition."""
6262
await self._storage.clear()
6363

64-
def child(self, name: str) -> "JsonPipelineCache":
64+
def child(self, name: str) -> "Cache":
6565
"""Child method definition."""
66-
return JsonPipelineCache(self._storage.child(name), encoding=self._encoding)
66+
return JsonCache(self._storage.child(name), encoding=self._encoding)

packages/graphrag/graphrag/cache/memory_pipeline_cache.py renamed to packages/graphrag-cache/graphrag_cache/memory_cache.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
# Copyright (c) 2024 Microsoft Corporation.
22
# Licensed under the MIT License
33

4-
"""A module containing 'InMemoryCache' model."""
4+
"""MemoryCache implementation."""
55

66
from typing import Any
77

8-
from graphrag.cache.pipeline_cache import PipelineCache
8+
from graphrag_cache.cache import Cache
99

1010

11-
class InMemoryCache(PipelineCache):
11+
class MemoryCache(Cache):
1212
"""In memory cache class definition."""
1313

1414
_cache: dict[str, Any]
1515
_name: str
1616

17-
def __init__(self, name: str | None = None):
17+
def __init__(self, name: str | None = None, **kwargs: Any) -> None:
1818
"""Init method definition."""
1919
self._cache = {}
2020
self._name = name or ""
@@ -69,9 +69,9 @@ async def clear(self) -> None:
6969
"""Clear the storage."""
7070
self._cache.clear()
7171

72-
def child(self, name: str) -> PipelineCache:
72+
def child(self, name: str) -> "Cache":
7373
"""Create a sub cache with the given name."""
74-
return InMemoryCache(name)
74+
return MemoryCache(name)
7575

7676
def _create_cache_key(self, key: str) -> str:
7777
"""Create a cache key for the given key."""

packages/graphrag/graphrag/cache/noop_pipeline_cache.py renamed to packages/graphrag-cache/graphrag_cache/noop_cache.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
# Copyright (c) 2024 Microsoft Corporation.
22
# Licensed under the MIT License
33

4-
"""Module containing the NoopPipelineCache implementation."""
4+
"""NoopCache implementation."""
55

66
from typing import Any
77

8-
from graphrag.cache.pipeline_cache import PipelineCache
8+
from graphrag_cache.cache import Cache
99

1010

11-
class NoopPipelineCache(PipelineCache):
12-
"""A no-op implementation of the pipeline cache, usually useful for testing."""
11+
class NoopCache(Cache):
12+
"""A no-op implementation of Cache, usually useful for testing."""
13+
14+
def __init__(self, **kwargs: Any) -> None:
15+
"""Init method definition."""
1316

1417
async def get(self, key: str) -> Any:
1518
"""Get the value for the given key.
@@ -56,7 +59,7 @@ async def delete(self, key: str) -> None:
5659
async def clear(self) -> None:
5760
"""Clear the cache."""
5861

59-
def child(self, name: str) -> PipelineCache:
62+
def child(self, name: str) -> "Cache":
6063
"""Create a child cache with the given name.
6164
6265
Args:

0 commit comments

Comments
 (0)