Skip to content

Commit c09412e

Browse files
committed
working file caching
1 parent f58aa56 commit c09412e

File tree

8 files changed

+309
-69
lines changed

8 files changed

+309
-69
lines changed

src/firebolt/async_db/connection.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ async def connect(
237237
if not auth:
238238
raise ConfigurationError("auth is required to connect.")
239239

240+
if account_name:
241+
auth._account_name = account_name
242+
240243
api_endpoint = fix_url_schema(api_endpoint)
241244
# Type checks
242245
assert auth is not None

src/firebolt/client/auth/base.py

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@
88
from httpx import Auth as HttpxAuth
99
from httpx import Request, Response, codes
1010

11-
from firebolt.utils.token_storage import TokenSecureStorage
12-
from firebolt.utils.util import Timer, cached_property, get_internal_error_code
11+
from firebolt.utils.cache import (
12+
ConnectionInfo,
13+
SecureCacheKey,
14+
_firebolt_cache,
15+
)
16+
from firebolt.utils.util import Timer, get_internal_error_code
1317

1418
logger = logging.getLogger(__name__)
1519

@@ -38,6 +42,7 @@ class Auth(HttpxAuth):
3842

3943
__slots__ = (
4044
"_token",
45+
"_account_name",
4146
"_expires",
4247
"_use_token_cache",
4348
)
@@ -47,7 +52,8 @@ class Auth(HttpxAuth):
4752

4853
def __init__(self, use_token_cache: bool = True):
4954
self._use_token_cache = use_token_cache
50-
self._token: Optional[str] = self._get_cached_token()
55+
self._account_name: Optional[str] = None
56+
self._token: Optional[str] = None
5157
self._expires: Optional[int] = None
5258
self._lock = Lock()
5359

@@ -103,36 +109,49 @@ def expired(self) -> bool:
103109
"""
104110
return self._expires is not None and self._expires <= int(time())
105111

106-
@cached_property
107-
def _token_storage(self) -> Optional[TokenSecureStorage]:
108-
"""Token filesystem cache storage.
109-
110-
This is evaluated lazily, only if caching is enabled.
111-
112-
Returns:
113-
Optional[TokenSecureStorage]: Token filesystem cache storage if any
114-
"""
115-
return None
116-
117112
def _get_cached_token(self) -> Optional[str]:
118-
"""If caching is enabled, get token from filesystem cache.
113+
"""If caching is enabled, get token from cache.
119114
120115
If caching is disabled, None is returned.
121116
122117
Returns:
123118
Optional[str]: Token if any, and if caching is enabled; None otherwise
124119
"""
125-
if not self._use_token_cache or not self._token_storage:
120+
if not self._use_token_cache:
126121
return None
127-
return self._token_storage.get_cached_token()
122+
123+
cache_key = SecureCacheKey(
124+
[self.principal, self.secret, self._account_name], self.secret
125+
)
126+
connection_info = _firebolt_cache.get(cache_key)
127+
128+
if connection_info and connection_info.token:
129+
return connection_info.token
130+
131+
return None
128132

129133
def _cache_token(self) -> None:
130-
"""If caching isenabled, cache token to filesystem."""
131-
if not self._use_token_cache or not self._token_storage:
134+
"""If caching is enabled, cache token."""
135+
if not self._use_token_cache:
132136
return
133-
# Only cache if token and expiration are retrieved
134-
if self._token and self._expires:
135-
self._token_storage.cache_token(self._token, self._expires)
137+
# Only cache if token is retrieved
138+
if self._token:
139+
cache_key = SecureCacheKey(
140+
[self.principal, self.secret, self._account_name], self.secret
141+
)
142+
143+
# Get existing connection info or create new one
144+
connection_info = _firebolt_cache.get(cache_key)
145+
if connection_info is None:
146+
connection_info = ConnectionInfo(
147+
id="NONE"
148+
) # This is triggered first so there will be no id
149+
150+
# Update token information
151+
connection_info.token = self._token
152+
153+
# Cache it
154+
_firebolt_cache.set(cache_key, connection_info)
136155

137156
@abstractmethod
138157
def get_new_token_generator(self) -> Generator[Request, Response, None]:

src/firebolt/client/auth/client_credentials.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
from typing import Optional
2-
31
from firebolt.client.auth.base import AuthRequest, FireboltAuthVersion
42
from firebolt.client.auth.request_auth_base import _RequestBasedAuth
5-
from firebolt.utils.token_storage import TokenSecureStorage
63
from firebolt.utils.urls import AUTH_SERVICE_ACCOUNT_URL
7-
from firebolt.utils.util import cached_property
84

95

106
class ClientCredentials(_RequestBasedAuth):
@@ -79,17 +75,6 @@ def get_firebolt_version(self) -> FireboltAuthVersion:
7975
"""
8076
return FireboltAuthVersion.V2
8177

82-
@cached_property
83-
def _token_storage(self) -> Optional[TokenSecureStorage]:
84-
"""Token filesystem cache storage.
85-
86-
This is evaluated lazily, only if caching is enabled
87-
88-
Returns:
89-
TokenSecureStorage: Token filesystem cache storage
90-
"""
91-
return TokenSecureStorage(username=self.client_id, password=self.client_secret)
92-
9378
def _make_auth_request(self) -> AuthRequest:
9479
"""Get new token using username and password.
9580

src/firebolt/client/auth/service_account.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
from typing import Optional
2-
31
from firebolt.client.auth.base import AuthRequest, FireboltAuthVersion
42
from firebolt.client.auth.request_auth_base import _RequestBasedAuth
5-
from firebolt.utils.token_storage import TokenSecureStorage
63
from firebolt.utils.urls import AUTH_SERVICE_ACCOUNT_URL
7-
from firebolt.utils.util import cached_property
84

95

106
class ServiceAccount(_RequestBasedAuth):
@@ -77,17 +73,6 @@ def copy(self) -> "ServiceAccount":
7773
"""
7874
return ServiceAccount(self.client_id, self.client_secret, self._use_token_cache)
7975

80-
@cached_property
81-
def _token_storage(self) -> Optional[TokenSecureStorage]:
82-
"""Token filesystem cache storage.
83-
84-
This is evaluated lazily, only if caching is enabled
85-
86-
Returns:
87-
TokenSecureStorage: Token filesystem cache storage
88-
"""
89-
return TokenSecureStorage(username=self.client_id, password=self.client_secret)
90-
9176
def _make_auth_request(self) -> AuthRequest:
9277
"""Get new token using username and password.
9378

src/firebolt/client/auth/username_password.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
from typing import Optional
2-
31
from firebolt.client.auth.base import AuthRequest, FireboltAuthVersion
42
from firebolt.client.auth.request_auth_base import _RequestBasedAuth
5-
from firebolt.utils.token_storage import TokenSecureStorage
63
from firebolt.utils.urls import AUTH_URL
7-
from firebolt.utils.util import cached_property
84

95

106
class UsernamePassword(_RequestBasedAuth):
@@ -77,17 +73,6 @@ def copy(self) -> "UsernamePassword":
7773
"""
7874
return UsernamePassword(self.username, self.password, self._use_token_cache)
7975

80-
@cached_property
81-
def _token_storage(self) -> Optional[TokenSecureStorage]:
82-
"""Token filesystem cache storage.
83-
84-
This is evaluated lazily, only if caching is enabled
85-
86-
Returns:
87-
TokenSecureStorage: Token filesystem cache storage
88-
"""
89-
return TokenSecureStorage(username=self.username, password=self.password)
90-
9176
def _make_auth_request(self) -> AuthRequest:
9277
"""Get new token using username and password.
9378

src/firebolt/db/connection.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ def connect(
6666
if not auth:
6767
raise ConfigurationError("auth is required to connect.")
6868

69+
if account_name:
70+
auth._account_name = account_name
71+
6972
api_endpoint = fix_url_schema(api_endpoint)
7073
# Type checks
7174
assert auth is not None

src/firebolt/utils/cache.py

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
import logging
12
import os
23
import time
3-
from dataclasses import dataclass, field
4+
from dataclasses import asdict, dataclass, field
5+
from json import JSONDecodeError
6+
from json import dumps as json_dumps
7+
from json import loads as json_loads
8+
from os import makedirs, path
49
from typing import (
510
Any,
611
Callable,
@@ -12,10 +17,21 @@
1217
TypeVar,
1318
)
1419

20+
from appdirs import user_data_dir
21+
22+
from firebolt.utils.file_operations import (
23+
FernetEncrypter,
24+
generate_encrypted_file_name,
25+
generate_salt,
26+
)
27+
1528
T = TypeVar("T")
1629

1730
# Cache expiry configuration
1831
CACHE_EXPIRY_SECONDS = 3600 # 1 hour
32+
APPNAME = "firebolt"
33+
34+
logger = logging.getLogger(__name__)
1935

2036

2137
class ReprCacheable(Protocol):
@@ -47,6 +63,22 @@ class ConnectionInfo:
4763
system_engine: Optional[EngineInfo] = None
4864
databases: Dict[str, DatabaseInfo] = field(default_factory=dict)
4965
engines: Dict[str, EngineInfo] = field(default_factory=dict)
66+
token: Optional[str] = None
67+
68+
def __post_init__(self) -> None:
69+
"""
70+
Post-initialization processing to convert dicts to dataclasses.
71+
"""
72+
if self.system_engine and isinstance(self.system_engine, dict):
73+
self.system_engine = EngineInfo(**self.system_engine)
74+
self.databases = {
75+
k: DatabaseInfo(**v)
76+
for k, v in self.databases.items()
77+
if isinstance(v, dict)
78+
}
79+
self.engines = {
80+
k: EngineInfo(**v) for k, v in self.engines.items() if isinstance(v, dict)
81+
}
5082

5183

5284
def noop_if_disabled(func: Callable) -> Callable:
@@ -150,4 +182,118 @@ def __hash__(self) -> int:
150182
return hash(self.key)
151183

152184

153-
_firebolt_cache = UtilCache[ConnectionInfo](cache_name="connection_info")
185+
class FileBasedCache(UtilCache[ConnectionInfo]):
186+
"""
187+
File-based cache that persists to disk with encryption.
188+
Extends UtilCache to provide persistent storage using encrypted files.
189+
"""
190+
191+
def __init__(self, cache_name: str = ""):
192+
super().__init__(cache_name)
193+
self._data_dir = user_data_dir(appname=APPNAME) # TODO: change to new dir
194+
makedirs(self._data_dir, exist_ok=True)
195+
196+
def _get_file_path(self, key: SecureCacheKey) -> str:
197+
"""Get the file path for a cache key."""
198+
cache_key = self.create_key(key)
199+
encrypted_filename = generate_encrypted_file_name(cache_key, key.encryption_key)
200+
return path.join(self._data_dir, encrypted_filename)
201+
202+
def _read_data_json(self, file_path: str, encrypter: FernetEncrypter) -> dict:
203+
"""Read and decrypt JSON data from file."""
204+
if not path.exists(file_path):
205+
return {}
206+
207+
try:
208+
with open(file_path, "r") as f:
209+
encrypted_data = f.read()
210+
211+
decrypted_data = encrypter.decrypt(encrypted_data)
212+
if decrypted_data is None:
213+
logger.debug("Decryption failed for %s", file_path)
214+
return {}
215+
216+
return json_loads(decrypted_data) if decrypted_data else {}
217+
except (JSONDecodeError, IOError) as e:
218+
logger.debug(
219+
"Failed to read or decode data from %s error: %s", file_path, e
220+
)
221+
return {}
222+
223+
def _write_data_json(
224+
self, file_path: str, data: dict, encrypter: FernetEncrypter
225+
) -> None:
226+
"""Encrypt and write JSON data to file."""
227+
try:
228+
json_str = json_dumps(data)
229+
logger.debug("Writing data to %s", file_path)
230+
encrypted_data = encrypter.encrypt(json_str)
231+
with open(file_path, "w") as f:
232+
f.write(encrypted_data)
233+
except (IOError, OSError) as e:
234+
# Silently proceed if we can't write to disk
235+
logger.debug("Failed to write data to %s error: %s", file_path, e)
236+
237+
def get(self, key: SecureCacheKey) -> Optional[ConnectionInfo]:
238+
"""Get value from cache, checking both memory and disk."""
239+
if self.disabled:
240+
return None
241+
242+
# First try memory cache
243+
memory_result = super().get(key)
244+
if memory_result is not None:
245+
logger.debug("Cache hit in memory")
246+
return memory_result
247+
248+
# If not in memory, try to load from disk
249+
file_path = self._get_file_path(key)
250+
encrypter = FernetEncrypter(generate_salt(), key.encryption_key)
251+
raw_data = self._read_data_json(file_path, encrypter)
252+
if not raw_data:
253+
return None
254+
logger.debug("Cache hit on disk")
255+
data = ConnectionInfo(**raw_data)
256+
257+
# Add to memory cache and return
258+
super().set(key, data)
259+
return data
260+
261+
def set(self, key: SecureCacheKey, value: ConnectionInfo) -> None:
262+
"""Set value in both memory and disk cache."""
263+
if self.disabled:
264+
return
265+
266+
logger.debug("Setting value in cache")
267+
# First set in memory
268+
super().set(key, value)
269+
270+
file_path = self._get_file_path(key)
271+
encrypter = FernetEncrypter(generate_salt(), key.encryption_key)
272+
data = asdict(value)
273+
274+
self._write_data_json(file_path, data, encrypter)
275+
276+
def delete(self, key: SecureCacheKey) -> None:
277+
"""Delete value from both memory and disk cache."""
278+
if self.disabled:
279+
return
280+
281+
# Delete from memory
282+
super().delete(key)
283+
284+
# Delete from disk
285+
file_path = self._get_file_path(key)
286+
try:
287+
if path.exists(file_path):
288+
os.remove(file_path)
289+
except OSError:
290+
logger.debug("Failed to delete file %s", file_path)
291+
# Silently proceed if we can't delete the file
292+
293+
def clear(self) -> None:
294+
# Clear memory only, as deleting every file is not safe
295+
logger.debug("Clearing memory cache")
296+
super().clear()
297+
298+
299+
_firebolt_cache = FileBasedCache(cache_name="connection_info")

0 commit comments

Comments
 (0)