mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
feat(cache): use configurable hash algorithm for flask-caching (#37361)
This commit is contained in:
committed by
GitHub
parent
26ac832138
commit
290bcc1dbb
@@ -31,6 +31,7 @@ from superset import db
|
||||
from superset.constants import CACHE_DISABLED_TIMEOUT
|
||||
from superset.extensions import cache_manager
|
||||
from superset.models.cache import CacheKey
|
||||
from superset.utils.cache_manager import configurable_hash_method
|
||||
from superset.utils.hashing import hash_from_dict
|
||||
from superset.utils.json import json_int_dttm_ser
|
||||
|
||||
@@ -273,7 +274,7 @@ def etag_cache( # noqa: C901
|
||||
wrapper.uncached = f # type: ignore
|
||||
wrapper.cache_timeout = timeout # type: ignore
|
||||
wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access
|
||||
make_name=None, timeout=timeout
|
||||
make_name=None, timeout=timeout, hash_method=configurable_hash_method
|
||||
)
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -14,10 +14,11 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import hashlib
|
||||
import logging
|
||||
from typing import Any, Optional, Union
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
from flask import Flask
|
||||
from flask import current_app, Flask
|
||||
from flask_caching import Cache
|
||||
from markupsafe import Markup
|
||||
|
||||
@@ -27,8 +28,134 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
CACHE_IMPORT_PATH = "superset.extensions.metastore_cache.SupersetMetastoreCache"
|
||||
|
||||
# Hash function lookup table matching superset.utils.hashing
|
||||
_HASH_METHODS: dict[str, Callable[..., Any]] = {
|
||||
"sha256": hashlib.sha256,
|
||||
"md5": hashlib.md5,
|
||||
}
|
||||
|
||||
class ExploreFormDataCache(Cache):
|
||||
|
||||
class ConfigurableHashMethod:
|
||||
"""
|
||||
A callable that defers hash algorithm selection to runtime.
|
||||
|
||||
Flask-caching's memoize decorator evaluates hash_method at decoration time
|
||||
(module import), but we need to read HASH_ALGORITHM config at function call
|
||||
time when the app context is available.
|
||||
|
||||
This class acts like a hashlib function but looks up the configured
|
||||
algorithm when called.
|
||||
"""
|
||||
|
||||
def __call__(self, data: bytes = b"") -> Any:
|
||||
"""
|
||||
Create a hash object using the configured algorithm.
|
||||
|
||||
Args:
|
||||
data: Optional initial data to hash
|
||||
|
||||
Returns:
|
||||
A hashlib hash object (e.g., sha256 or md5)
|
||||
|
||||
Raises:
|
||||
ValueError: If HASH_ALGORITHM is set to an unsupported value
|
||||
"""
|
||||
algorithm = current_app.config["HASH_ALGORITHM"]
|
||||
hash_func = _HASH_METHODS.get(algorithm)
|
||||
if hash_func is None:
|
||||
raise ValueError(f"Unsupported hash algorithm: {algorithm}")
|
||||
return hash_func(data)
|
||||
|
||||
|
||||
# Singleton instance to use as default hash_method
|
||||
configurable_hash_method = ConfigurableHashMethod()
|
||||
|
||||
|
||||
class SupersetCache(Cache):
|
||||
"""
|
||||
Cache subclass that uses the configured HASH_ALGORITHM instead of MD5.
|
||||
|
||||
Flask-caching uses MD5 by default for cache key generation, which fails
|
||||
in FIPS mode where MD5 is disabled. This class overrides the default
|
||||
hash method to use the algorithm specified by HASH_ALGORITHM config.
|
||||
|
||||
Note: Switching hash algorithms will invalidate existing cache keys,
|
||||
causing a one-time cache miss on upgrade.
|
||||
"""
|
||||
|
||||
def memoize(
|
||||
self,
|
||||
timeout: int | None = None,
|
||||
make_name: Callable[..., Any] | None = None,
|
||||
unless: Callable[..., bool] | None = None,
|
||||
forced_update: Callable[..., bool] | None = None,
|
||||
response_filter: Callable[..., Any] | None = None,
|
||||
hash_method: Callable[..., Any] = configurable_hash_method,
|
||||
cache_none: bool = False,
|
||||
source_check: bool | None = None,
|
||||
args_to_ignore: Any | None = None,
|
||||
) -> Callable[..., Any]:
|
||||
return super().memoize(
|
||||
timeout=timeout,
|
||||
make_name=make_name,
|
||||
unless=unless,
|
||||
forced_update=forced_update,
|
||||
response_filter=response_filter,
|
||||
hash_method=hash_method,
|
||||
cache_none=cache_none,
|
||||
source_check=source_check,
|
||||
args_to_ignore=args_to_ignore,
|
||||
)
|
||||
|
||||
def cached(
|
||||
self,
|
||||
timeout: int | None = None,
|
||||
key_prefix: str = "view/%s",
|
||||
unless: Callable[..., bool] | None = None,
|
||||
forced_update: Callable[..., bool] | None = None,
|
||||
response_filter: Callable[..., Any] | None = None,
|
||||
query_string: bool = False,
|
||||
hash_method: Callable[..., Any] = configurable_hash_method,
|
||||
cache_none: bool = False,
|
||||
make_cache_key: Callable[..., Any] | None = None,
|
||||
source_check: bool | None = None,
|
||||
response_hit_indication: bool | None = False,
|
||||
) -> Callable[..., Any]:
|
||||
return super().cached(
|
||||
timeout=timeout,
|
||||
key_prefix=key_prefix,
|
||||
unless=unless,
|
||||
forced_update=forced_update,
|
||||
response_filter=response_filter,
|
||||
query_string=query_string,
|
||||
hash_method=hash_method,
|
||||
cache_none=cache_none,
|
||||
make_cache_key=make_cache_key,
|
||||
source_check=source_check,
|
||||
response_hit_indication=response_hit_indication,
|
||||
)
|
||||
|
||||
# pylint: disable=protected-access
|
||||
def _memoize_make_cache_key(
|
||||
self,
|
||||
make_name: Callable[..., Any] | None = None,
|
||||
timeout: Callable[..., Any] | None = None,
|
||||
forced_update: bool = False,
|
||||
hash_method: Callable[..., Any] = configurable_hash_method,
|
||||
source_check: bool | None = False,
|
||||
args_to_ignore: Any | None = None,
|
||||
) -> Callable[..., Any]:
|
||||
return super()._memoize_make_cache_key(
|
||||
make_name=make_name,
|
||||
timeout=timeout,
|
||||
forced_update=forced_update,
|
||||
hash_method=hash_method,
|
||||
source_check=source_check,
|
||||
args_to_ignore=args_to_ignore,
|
||||
)
|
||||
|
||||
|
||||
class ExploreFormDataCache(SupersetCache):
|
||||
def get(self, *args: Any, **kwargs: Any) -> Optional[Union[str, Markup]]:
|
||||
cache = self.cache.get(*args, **kwargs)
|
||||
|
||||
@@ -53,10 +180,10 @@ class CacheManager:
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
self._cache = Cache()
|
||||
self._data_cache = Cache()
|
||||
self._thumbnail_cache = Cache()
|
||||
self._filter_state_cache = Cache()
|
||||
self._cache = SupersetCache()
|
||||
self._data_cache = SupersetCache()
|
||||
self._thumbnail_cache = SupersetCache()
|
||||
self._filter_state_cache = SupersetCache()
|
||||
self._explore_form_data_cache = ExploreFormDataCache()
|
||||
|
||||
@staticmethod
|
||||
|
||||
171
tests/unit_tests/utils/test_cache_manager.py
Normal file
171
tests/unit_tests/utils/test_cache_manager.py
Normal file
@@ -0,0 +1,171 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import hashlib
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from superset.utils.cache_manager import (
|
||||
configurable_hash_method,
|
||||
ConfigurableHashMethod,
|
||||
SupersetCache,
|
||||
)
|
||||
|
||||
|
||||
def test_configurable_hash_method_uses_sha256():
|
||||
"""Test ConfigurableHashMethod uses sha256 when configured."""
|
||||
mock_app = MagicMock()
|
||||
mock_app.config = {"HASH_ALGORITHM": "sha256"}
|
||||
|
||||
with patch("superset.utils.cache_manager.current_app", mock_app):
|
||||
hash_obj = configurable_hash_method(b"test")
|
||||
# Verify it returns a sha256 hash object
|
||||
assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest()
|
||||
|
||||
|
||||
def test_configurable_hash_method_uses_md5():
|
||||
"""Test ConfigurableHashMethod uses md5 when configured."""
|
||||
mock_app = MagicMock()
|
||||
mock_app.config = {"HASH_ALGORITHM": "md5"}
|
||||
|
||||
with patch("superset.utils.cache_manager.current_app", mock_app):
|
||||
hash_obj = configurable_hash_method(b"test")
|
||||
# Verify it returns a md5 hash object
|
||||
assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest() # noqa: S324
|
||||
|
||||
|
||||
def test_configurable_hash_method_empty_data():
|
||||
"""Test ConfigurableHashMethod with empty data."""
|
||||
mock_app = MagicMock()
|
||||
mock_app.config = {"HASH_ALGORITHM": "sha256"}
|
||||
|
||||
with patch("superset.utils.cache_manager.current_app", mock_app):
|
||||
hash_obj = configurable_hash_method()
|
||||
assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest()
|
||||
|
||||
|
||||
def test_configurable_hash_method_is_callable():
|
||||
"""Test that ConfigurableHashMethod instance is callable."""
|
||||
method = ConfigurableHashMethod()
|
||||
assert callable(method)
|
||||
|
||||
|
||||
def test_superset_cache_memoize_uses_configurable_hash():
|
||||
"""Test that SupersetCache.memoize uses configurable_hash_method by default."""
|
||||
cache = SupersetCache()
|
||||
|
||||
with patch.object(
|
||||
cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
|
||||
) as mock_memoize:
|
||||
cache.memoize(timeout=300)
|
||||
|
||||
mock_memoize.assert_called_once()
|
||||
call_kwargs = mock_memoize.call_args[1]
|
||||
assert call_kwargs["hash_method"] is configurable_hash_method
|
||||
|
||||
|
||||
def test_superset_cache_memoize_allows_explicit_hash_method():
|
||||
"""Test that SupersetCache.memoize allows explicit hash_method override."""
|
||||
cache = SupersetCache()
|
||||
|
||||
with patch.object(
|
||||
cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
|
||||
) as mock_memoize:
|
||||
cache.memoize(timeout=300, hash_method=hashlib.md5)
|
||||
|
||||
mock_memoize.assert_called_once()
|
||||
call_kwargs = mock_memoize.call_args[1]
|
||||
assert call_kwargs["hash_method"] == hashlib.md5
|
||||
|
||||
|
||||
def test_superset_cache_cached_uses_configurable_hash():
|
||||
"""Test that SupersetCache.cached uses configurable_hash_method by default."""
|
||||
cache = SupersetCache()
|
||||
|
||||
with patch.object(
|
||||
cache.__class__.__bases__[0], "cached", return_value=lambda f: f
|
||||
) as mock_cached:
|
||||
cache.cached(timeout=300)
|
||||
|
||||
mock_cached.assert_called_once()
|
||||
call_kwargs = mock_cached.call_args[1]
|
||||
assert call_kwargs["hash_method"] is configurable_hash_method
|
||||
|
||||
|
||||
def test_superset_cache_cached_allows_explicit_hash_method():
|
||||
"""Test that SupersetCache.cached allows explicit hash_method override."""
|
||||
cache = SupersetCache()
|
||||
|
||||
with patch.object(
|
||||
cache.__class__.__bases__[0], "cached", return_value=lambda f: f
|
||||
) as mock_cached:
|
||||
cache.cached(timeout=300, hash_method=hashlib.md5)
|
||||
|
||||
mock_cached.assert_called_once()
|
||||
call_kwargs = mock_cached.call_args[1]
|
||||
assert call_kwargs["hash_method"] == hashlib.md5
|
||||
|
||||
|
||||
def test_superset_cache_memoize_make_cache_key_uses_configurable_hash():
|
||||
"""Test _memoize_make_cache_key uses configurable_hash_method by default."""
|
||||
cache = SupersetCache()
|
||||
|
||||
with patch.object(
|
||||
cache.__class__.__bases__[0],
|
||||
"_memoize_make_cache_key",
|
||||
return_value=lambda *args, **kwargs: "cache_key",
|
||||
) as mock_make_key:
|
||||
cache._memoize_make_cache_key(make_name=None, timeout=300)
|
||||
|
||||
mock_make_key.assert_called_once()
|
||||
call_kwargs = mock_make_key.call_args[1]
|
||||
assert call_kwargs["hash_method"] is configurable_hash_method
|
||||
|
||||
|
||||
def test_superset_cache_memoize_make_cache_key_allows_explicit_hash():
|
||||
"""Test _memoize_make_cache_key allows explicit hash_method override."""
|
||||
cache = SupersetCache()
|
||||
|
||||
with patch.object(
|
||||
cache.__class__.__bases__[0],
|
||||
"_memoize_make_cache_key",
|
||||
return_value=lambda *args, **kwargs: "cache_key",
|
||||
) as mock_make_key:
|
||||
cache._memoize_make_cache_key(
|
||||
make_name=None, timeout=300, hash_method=hashlib.md5
|
||||
)
|
||||
|
||||
mock_make_key.assert_called_once()
|
||||
call_kwargs = mock_make_key.call_args[1]
|
||||
assert call_kwargs["hash_method"] == hashlib.md5
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"algorithm,expected_digest",
|
||||
[
|
||||
("sha256", hashlib.sha256(b"test_data").hexdigest()),
|
||||
("md5", hashlib.md5(b"test_data").hexdigest()), # noqa: S324
|
||||
],
|
||||
)
|
||||
def test_configurable_hash_method_parametrized(algorithm, expected_digest):
|
||||
"""Parametrized test for ConfigurableHashMethod with different algorithms."""
|
||||
mock_app = MagicMock()
|
||||
mock_app.config = {"HASH_ALGORITHM": algorithm}
|
||||
|
||||
with patch("superset.utils.cache_manager.current_app", mock_app):
|
||||
hash_obj = configurable_hash_method(b"test_data")
|
||||
assert hash_obj.hexdigest() == expected_digest
|
||||
Reference in New Issue
Block a user