Skip to content

Commit 290bcc1

Browse files
authored
feat(cache): use configurable hash algorithm for flask-caching (apache#37361)
1 parent 26ac832 commit 290bcc1

File tree

3 files changed

+307
-8
lines changed

3 files changed

+307
-8
lines changed

superset/utils/cache.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from superset.constants import CACHE_DISABLED_TIMEOUT
3232
from superset.extensions import cache_manager
3333
from superset.models.cache import CacheKey
34+
from superset.utils.cache_manager import configurable_hash_method
3435
from superset.utils.hashing import hash_from_dict
3536
from superset.utils.json import json_int_dttm_ser
3637

@@ -273,7 +274,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Response: # noqa: C901
273274
wrapper.uncached = f # type: ignore
274275
wrapper.cache_timeout = timeout # type: ignore
275276
wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access
276-
make_name=None, timeout=timeout
277+
make_name=None, timeout=timeout, hash_method=configurable_hash_method
277278
)
278279

279280
return wrapper

superset/utils/cache_manager.py

Lines changed: 134 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
import hashlib
1718
import logging
18-
from typing import Any, Optional, Union
19+
from typing import Any, Callable, Optional, Union
1920

20-
from flask import Flask
21+
from flask import current_app, Flask
2122
from flask_caching import Cache
2223
from markupsafe import Markup
2324

@@ -27,8 +28,134 @@
2728

2829
CACHE_IMPORT_PATH = "superset.extensions.metastore_cache.SupersetMetastoreCache"
2930

31+
# Hash function lookup table matching superset.utils.hashing
32+
_HASH_METHODS: dict[str, Callable[..., Any]] = {
33+
"sha256": hashlib.sha256,
34+
"md5": hashlib.md5,
35+
}
36+
37+
38+
class ConfigurableHashMethod:
39+
"""
40+
A callable that defers hash algorithm selection to runtime.
41+
42+
Flask-caching's memoize decorator evaluates hash_method at decoration time
43+
(module import), but we need to read HASH_ALGORITHM config at function call
44+
time when the app context is available.
45+
46+
This class acts like a hashlib function but looks up the configured
47+
algorithm when called.
48+
"""
49+
50+
def __call__(self, data: bytes = b"") -> Any:
51+
"""
52+
Create a hash object using the configured algorithm.
53+
54+
Args:
55+
data: Optional initial data to hash
56+
57+
Returns:
58+
A hashlib hash object (e.g., sha256 or md5)
59+
60+
Raises:
61+
ValueError: If HASH_ALGORITHM is set to an unsupported value
62+
"""
63+
algorithm = current_app.config["HASH_ALGORITHM"]
64+
hash_func = _HASH_METHODS.get(algorithm)
65+
if hash_func is None:
66+
raise ValueError(f"Unsupported hash algorithm: {algorithm}")
67+
return hash_func(data)
68+
69+
70+
# Singleton instance to use as default hash_method
71+
configurable_hash_method = ConfigurableHashMethod()
72+
73+
74+
class SupersetCache(Cache):
75+
"""
76+
Cache subclass that uses the configured HASH_ALGORITHM instead of MD5.
77+
78+
Flask-caching uses MD5 by default for cache key generation, which fails
79+
in FIPS mode where MD5 is disabled. This class overrides the default
80+
hash method to use the algorithm specified by HASH_ALGORITHM config.
81+
82+
Note: Switching hash algorithms will invalidate existing cache keys,
83+
causing a one-time cache miss on upgrade.
84+
"""
85+
86+
def memoize(
87+
self,
88+
timeout: int | None = None,
89+
make_name: Callable[..., Any] | None = None,
90+
unless: Callable[..., bool] | None = None,
91+
forced_update: Callable[..., bool] | None = None,
92+
response_filter: Callable[..., Any] | None = None,
93+
hash_method: Callable[..., Any] = configurable_hash_method,
94+
cache_none: bool = False,
95+
source_check: bool | None = None,
96+
args_to_ignore: Any | None = None,
97+
) -> Callable[..., Any]:
98+
return super().memoize(
99+
timeout=timeout,
100+
make_name=make_name,
101+
unless=unless,
102+
forced_update=forced_update,
103+
response_filter=response_filter,
104+
hash_method=hash_method,
105+
cache_none=cache_none,
106+
source_check=source_check,
107+
args_to_ignore=args_to_ignore,
108+
)
109+
110+
def cached(
111+
self,
112+
timeout: int | None = None,
113+
key_prefix: str = "view/%s",
114+
unless: Callable[..., bool] | None = None,
115+
forced_update: Callable[..., bool] | None = None,
116+
response_filter: Callable[..., Any] | None = None,
117+
query_string: bool = False,
118+
hash_method: Callable[..., Any] = configurable_hash_method,
119+
cache_none: bool = False,
120+
make_cache_key: Callable[..., Any] | None = None,
121+
source_check: bool | None = None,
122+
response_hit_indication: bool | None = False,
123+
) -> Callable[..., Any]:
124+
return super().cached(
125+
timeout=timeout,
126+
key_prefix=key_prefix,
127+
unless=unless,
128+
forced_update=forced_update,
129+
response_filter=response_filter,
130+
query_string=query_string,
131+
hash_method=hash_method,
132+
cache_none=cache_none,
133+
make_cache_key=make_cache_key,
134+
source_check=source_check,
135+
response_hit_indication=response_hit_indication,
136+
)
137+
138+
# pylint: disable=protected-access
139+
def _memoize_make_cache_key(
140+
self,
141+
make_name: Callable[..., Any] | None = None,
142+
timeout: Callable[..., Any] | None = None,
143+
forced_update: bool = False,
144+
hash_method: Callable[..., Any] = configurable_hash_method,
145+
source_check: bool | None = False,
146+
args_to_ignore: Any | None = None,
147+
) -> Callable[..., Any]:
148+
return super()._memoize_make_cache_key(
149+
make_name=make_name,
150+
timeout=timeout,
151+
forced_update=forced_update,
152+
hash_method=hash_method,
153+
source_check=source_check,
154+
args_to_ignore=args_to_ignore,
155+
)
156+
30157

31-
class ExploreFormDataCache(Cache):
158+
class ExploreFormDataCache(SupersetCache):
32159
def get(self, *args: Any, **kwargs: Any) -> Optional[Union[str, Markup]]:
33160
cache = self.cache.get(*args, **kwargs)
34161

@@ -53,10 +180,10 @@ class CacheManager:
53180
def __init__(self) -> None:
54181
super().__init__()
55182

56-
self._cache = Cache()
57-
self._data_cache = Cache()
58-
self._thumbnail_cache = Cache()
59-
self._filter_state_cache = Cache()
183+
self._cache = SupersetCache()
184+
self._data_cache = SupersetCache()
185+
self._thumbnail_cache = SupersetCache()
186+
self._filter_state_cache = SupersetCache()
60187
self._explore_form_data_cache = ExploreFormDataCache()
61188

62189
@staticmethod
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
import hashlib
18+
from unittest.mock import MagicMock, patch
19+
20+
import pytest
21+
22+
from superset.utils.cache_manager import (
23+
configurable_hash_method,
24+
ConfigurableHashMethod,
25+
SupersetCache,
26+
)
27+
28+
29+
def test_configurable_hash_method_uses_sha256():
30+
"""Test ConfigurableHashMethod uses sha256 when configured."""
31+
mock_app = MagicMock()
32+
mock_app.config = {"HASH_ALGORITHM": "sha256"}
33+
34+
with patch("superset.utils.cache_manager.current_app", mock_app):
35+
hash_obj = configurable_hash_method(b"test")
36+
# Verify it returns a sha256 hash object
37+
assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest()
38+
39+
40+
def test_configurable_hash_method_uses_md5():
41+
"""Test ConfigurableHashMethod uses md5 when configured."""
42+
mock_app = MagicMock()
43+
mock_app.config = {"HASH_ALGORITHM": "md5"}
44+
45+
with patch("superset.utils.cache_manager.current_app", mock_app):
46+
hash_obj = configurable_hash_method(b"test")
47+
# Verify it returns a md5 hash object
48+
assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest() # noqa: S324
49+
50+
51+
def test_configurable_hash_method_empty_data():
52+
"""Test ConfigurableHashMethod with empty data."""
53+
mock_app = MagicMock()
54+
mock_app.config = {"HASH_ALGORITHM": "sha256"}
55+
56+
with patch("superset.utils.cache_manager.current_app", mock_app):
57+
hash_obj = configurable_hash_method()
58+
assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest()
59+
60+
61+
def test_configurable_hash_method_is_callable():
62+
"""Test that ConfigurableHashMethod instance is callable."""
63+
method = ConfigurableHashMethod()
64+
assert callable(method)
65+
66+
67+
def test_superset_cache_memoize_uses_configurable_hash():
68+
"""Test that SupersetCache.memoize uses configurable_hash_method by default."""
69+
cache = SupersetCache()
70+
71+
with patch.object(
72+
cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
73+
) as mock_memoize:
74+
cache.memoize(timeout=300)
75+
76+
mock_memoize.assert_called_once()
77+
call_kwargs = mock_memoize.call_args[1]
78+
assert call_kwargs["hash_method"] is configurable_hash_method
79+
80+
81+
def test_superset_cache_memoize_allows_explicit_hash_method():
82+
"""Test that SupersetCache.memoize allows explicit hash_method override."""
83+
cache = SupersetCache()
84+
85+
with patch.object(
86+
cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
87+
) as mock_memoize:
88+
cache.memoize(timeout=300, hash_method=hashlib.md5)
89+
90+
mock_memoize.assert_called_once()
91+
call_kwargs = mock_memoize.call_args[1]
92+
assert call_kwargs["hash_method"] == hashlib.md5
93+
94+
95+
def test_superset_cache_cached_uses_configurable_hash():
96+
"""Test that SupersetCache.cached uses configurable_hash_method by default."""
97+
cache = SupersetCache()
98+
99+
with patch.object(
100+
cache.__class__.__bases__[0], "cached", return_value=lambda f: f
101+
) as mock_cached:
102+
cache.cached(timeout=300)
103+
104+
mock_cached.assert_called_once()
105+
call_kwargs = mock_cached.call_args[1]
106+
assert call_kwargs["hash_method"] is configurable_hash_method
107+
108+
109+
def test_superset_cache_cached_allows_explicit_hash_method():
110+
"""Test that SupersetCache.cached allows explicit hash_method override."""
111+
cache = SupersetCache()
112+
113+
with patch.object(
114+
cache.__class__.__bases__[0], "cached", return_value=lambda f: f
115+
) as mock_cached:
116+
cache.cached(timeout=300, hash_method=hashlib.md5)
117+
118+
mock_cached.assert_called_once()
119+
call_kwargs = mock_cached.call_args[1]
120+
assert call_kwargs["hash_method"] == hashlib.md5
121+
122+
123+
def test_superset_cache_memoize_make_cache_key_uses_configurable_hash():
124+
"""Test _memoize_make_cache_key uses configurable_hash_method by default."""
125+
cache = SupersetCache()
126+
127+
with patch.object(
128+
cache.__class__.__bases__[0],
129+
"_memoize_make_cache_key",
130+
return_value=lambda *args, **kwargs: "cache_key",
131+
) as mock_make_key:
132+
cache._memoize_make_cache_key(make_name=None, timeout=300)
133+
134+
mock_make_key.assert_called_once()
135+
call_kwargs = mock_make_key.call_args[1]
136+
assert call_kwargs["hash_method"] is configurable_hash_method
137+
138+
139+
def test_superset_cache_memoize_make_cache_key_allows_explicit_hash():
140+
"""Test _memoize_make_cache_key allows explicit hash_method override."""
141+
cache = SupersetCache()
142+
143+
with patch.object(
144+
cache.__class__.__bases__[0],
145+
"_memoize_make_cache_key",
146+
return_value=lambda *args, **kwargs: "cache_key",
147+
) as mock_make_key:
148+
cache._memoize_make_cache_key(
149+
make_name=None, timeout=300, hash_method=hashlib.md5
150+
)
151+
152+
mock_make_key.assert_called_once()
153+
call_kwargs = mock_make_key.call_args[1]
154+
assert call_kwargs["hash_method"] == hashlib.md5
155+
156+
157+
@pytest.mark.parametrize(
158+
"algorithm,expected_digest",
159+
[
160+
("sha256", hashlib.sha256(b"test_data").hexdigest()),
161+
("md5", hashlib.md5(b"test_data").hexdigest()), # noqa: S324
162+
],
163+
)
164+
def test_configurable_hash_method_parametrized(algorithm, expected_digest):
165+
"""Parametrized test for ConfigurableHashMethod with different algorithms."""
166+
mock_app = MagicMock()
167+
mock_app.config = {"HASH_ALGORITHM": algorithm}
168+
169+
with patch("superset.utils.cache_manager.current_app", mock_app):
170+
hash_obj = configurable_hash_method(b"test_data")
171+
assert hash_obj.hexdigest() == expected_digest

0 commit comments

Comments
 (0)