feat(cache): use configurable hash algorithm for flask-caching (#37361)

This commit is contained in:
Daniel Vaz Gaspar
2026-01-26 18:19:51 +00:00
committed by GitHub
parent 26ac832138
commit 290bcc1dbb
3 changed files with 307 additions and 8 deletions

View File

@@ -31,6 +31,7 @@ from superset import db
from superset.constants import CACHE_DISABLED_TIMEOUT
from superset.extensions import cache_manager
from superset.models.cache import CacheKey
from superset.utils.cache_manager import configurable_hash_method
from superset.utils.hashing import hash_from_dict
from superset.utils.json import json_int_dttm_ser
@@ -273,7 +274,7 @@ def etag_cache( # noqa: C901
wrapper.uncached = f # type: ignore
wrapper.cache_timeout = timeout # type: ignore
wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access
make_name=None, timeout=timeout
make_name=None, timeout=timeout, hash_method=configurable_hash_method
)
return wrapper

View File

@@ -14,10 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import hashlib
import logging
from typing import Any, Optional, Union
from typing import Any, Callable, Optional, Union
from flask import Flask
from flask import current_app, Flask
from flask_caching import Cache
from markupsafe import Markup
@@ -27,8 +28,134 @@ logger = logging.getLogger(__name__)
CACHE_IMPORT_PATH = "superset.extensions.metastore_cache.SupersetMetastoreCache"
# Hash function lookup table matching superset.utils.hashing
_HASH_METHODS: dict[str, Callable[..., Any]] = {
"sha256": hashlib.sha256,
"md5": hashlib.md5,
}
class ExploreFormDataCache(Cache):
class ConfigurableHashMethod:
"""
A callable that defers hash algorithm selection to runtime.
Flask-caching's memoize decorator evaluates hash_method at decoration time
(module import), but we need to read HASH_ALGORITHM config at function call
time when the app context is available.
This class acts like a hashlib function but looks up the configured
algorithm when called.
"""
def __call__(self, data: bytes = b"") -> Any:
"""
Create a hash object using the configured algorithm.
Args:
data: Optional initial data to hash
Returns:
A hashlib hash object (e.g., sha256 or md5)
Raises:
ValueError: If HASH_ALGORITHM is set to an unsupported value
"""
algorithm = current_app.config["HASH_ALGORITHM"]
hash_func = _HASH_METHODS.get(algorithm)
if hash_func is None:
raise ValueError(f"Unsupported hash algorithm: {algorithm}")
return hash_func(data)
# Singleton instance to use as default hash_method
configurable_hash_method = ConfigurableHashMethod()
class SupersetCache(Cache):
"""
Cache subclass that uses the configured HASH_ALGORITHM instead of MD5.
Flask-caching uses MD5 by default for cache key generation, which fails
in FIPS mode where MD5 is disabled. This class overrides the default
hash method to use the algorithm specified by HASH_ALGORITHM config.
Note: Switching hash algorithms will invalidate existing cache keys,
causing a one-time cache miss on upgrade.
"""
def memoize(
self,
timeout: int | None = None,
make_name: Callable[..., Any] | None = None,
unless: Callable[..., bool] | None = None,
forced_update: Callable[..., bool] | None = None,
response_filter: Callable[..., Any] | None = None,
hash_method: Callable[..., Any] = configurable_hash_method,
cache_none: bool = False,
source_check: bool | None = None,
args_to_ignore: Any | None = None,
) -> Callable[..., Any]:
return super().memoize(
timeout=timeout,
make_name=make_name,
unless=unless,
forced_update=forced_update,
response_filter=response_filter,
hash_method=hash_method,
cache_none=cache_none,
source_check=source_check,
args_to_ignore=args_to_ignore,
)
def cached(
self,
timeout: int | None = None,
key_prefix: str = "view/%s",
unless: Callable[..., bool] | None = None,
forced_update: Callable[..., bool] | None = None,
response_filter: Callable[..., Any] | None = None,
query_string: bool = False,
hash_method: Callable[..., Any] = configurable_hash_method,
cache_none: bool = False,
make_cache_key: Callable[..., Any] | None = None,
source_check: bool | None = None,
response_hit_indication: bool | None = False,
) -> Callable[..., Any]:
return super().cached(
timeout=timeout,
key_prefix=key_prefix,
unless=unless,
forced_update=forced_update,
response_filter=response_filter,
query_string=query_string,
hash_method=hash_method,
cache_none=cache_none,
make_cache_key=make_cache_key,
source_check=source_check,
response_hit_indication=response_hit_indication,
)
# pylint: disable=protected-access
def _memoize_make_cache_key(
self,
make_name: Callable[..., Any] | None = None,
timeout: Callable[..., Any] | None = None,
forced_update: bool = False,
hash_method: Callable[..., Any] = configurable_hash_method,
source_check: bool | None = False,
args_to_ignore: Any | None = None,
) -> Callable[..., Any]:
return super()._memoize_make_cache_key(
make_name=make_name,
timeout=timeout,
forced_update=forced_update,
hash_method=hash_method,
source_check=source_check,
args_to_ignore=args_to_ignore,
)
class ExploreFormDataCache(SupersetCache):
def get(self, *args: Any, **kwargs: Any) -> Optional[Union[str, Markup]]:
cache = self.cache.get(*args, **kwargs)
@@ -53,10 +180,10 @@ class CacheManager:
def __init__(self) -> None:
super().__init__()
self._cache = Cache()
self._data_cache = Cache()
self._thumbnail_cache = Cache()
self._filter_state_cache = Cache()
self._cache = SupersetCache()
self._data_cache = SupersetCache()
self._thumbnail_cache = SupersetCache()
self._filter_state_cache = SupersetCache()
self._explore_form_data_cache = ExploreFormDataCache()
@staticmethod

View File

@@ -0,0 +1,171 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import hashlib
from unittest.mock import MagicMock, patch
import pytest
from superset.utils.cache_manager import (
configurable_hash_method,
ConfigurableHashMethod,
SupersetCache,
)
def test_configurable_hash_method_uses_sha256():
"""Test ConfigurableHashMethod uses sha256 when configured."""
mock_app = MagicMock()
mock_app.config = {"HASH_ALGORITHM": "sha256"}
with patch("superset.utils.cache_manager.current_app", mock_app):
hash_obj = configurable_hash_method(b"test")
# Verify it returns a sha256 hash object
assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest()
def test_configurable_hash_method_uses_md5():
"""Test ConfigurableHashMethod uses md5 when configured."""
mock_app = MagicMock()
mock_app.config = {"HASH_ALGORITHM": "md5"}
with patch("superset.utils.cache_manager.current_app", mock_app):
hash_obj = configurable_hash_method(b"test")
# Verify it returns a md5 hash object
assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest() # noqa: S324
def test_configurable_hash_method_empty_data():
"""Test ConfigurableHashMethod with empty data."""
mock_app = MagicMock()
mock_app.config = {"HASH_ALGORITHM": "sha256"}
with patch("superset.utils.cache_manager.current_app", mock_app):
hash_obj = configurable_hash_method()
assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest()
def test_configurable_hash_method_is_callable():
"""Test that ConfigurableHashMethod instance is callable."""
method = ConfigurableHashMethod()
assert callable(method)
def test_superset_cache_memoize_uses_configurable_hash():
"""Test that SupersetCache.memoize uses configurable_hash_method by default."""
cache = SupersetCache()
with patch.object(
cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
) as mock_memoize:
cache.memoize(timeout=300)
mock_memoize.assert_called_once()
call_kwargs = mock_memoize.call_args[1]
assert call_kwargs["hash_method"] is configurable_hash_method
def test_superset_cache_memoize_allows_explicit_hash_method():
"""Test that SupersetCache.memoize allows explicit hash_method override."""
cache = SupersetCache()
with patch.object(
cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
) as mock_memoize:
cache.memoize(timeout=300, hash_method=hashlib.md5)
mock_memoize.assert_called_once()
call_kwargs = mock_memoize.call_args[1]
assert call_kwargs["hash_method"] == hashlib.md5
def test_superset_cache_cached_uses_configurable_hash():
"""Test that SupersetCache.cached uses configurable_hash_method by default."""
cache = SupersetCache()
with patch.object(
cache.__class__.__bases__[0], "cached", return_value=lambda f: f
) as mock_cached:
cache.cached(timeout=300)
mock_cached.assert_called_once()
call_kwargs = mock_cached.call_args[1]
assert call_kwargs["hash_method"] is configurable_hash_method
def test_superset_cache_cached_allows_explicit_hash_method():
"""Test that SupersetCache.cached allows explicit hash_method override."""
cache = SupersetCache()
with patch.object(
cache.__class__.__bases__[0], "cached", return_value=lambda f: f
) as mock_cached:
cache.cached(timeout=300, hash_method=hashlib.md5)
mock_cached.assert_called_once()
call_kwargs = mock_cached.call_args[1]
assert call_kwargs["hash_method"] == hashlib.md5
def test_superset_cache_memoize_make_cache_key_uses_configurable_hash():
"""Test _memoize_make_cache_key uses configurable_hash_method by default."""
cache = SupersetCache()
with patch.object(
cache.__class__.__bases__[0],
"_memoize_make_cache_key",
return_value=lambda *args, **kwargs: "cache_key",
) as mock_make_key:
cache._memoize_make_cache_key(make_name=None, timeout=300)
mock_make_key.assert_called_once()
call_kwargs = mock_make_key.call_args[1]
assert call_kwargs["hash_method"] is configurable_hash_method
def test_superset_cache_memoize_make_cache_key_allows_explicit_hash():
"""Test _memoize_make_cache_key allows explicit hash_method override."""
cache = SupersetCache()
with patch.object(
cache.__class__.__bases__[0],
"_memoize_make_cache_key",
return_value=lambda *args, **kwargs: "cache_key",
) as mock_make_key:
cache._memoize_make_cache_key(
make_name=None, timeout=300, hash_method=hashlib.md5
)
mock_make_key.assert_called_once()
call_kwargs = mock_make_key.call_args[1]
assert call_kwargs["hash_method"] == hashlib.md5
@pytest.mark.parametrize(
"algorithm,expected_digest",
[
("sha256", hashlib.sha256(b"test_data").hexdigest()),
("md5", hashlib.md5(b"test_data").hexdigest()), # noqa: S324
],
)
def test_configurable_hash_method_parametrized(algorithm, expected_digest):
"""Parametrized test for ConfigurableHashMethod with different algorithms."""
mock_app = MagicMock()
mock_app.config = {"HASH_ALGORITHM": algorithm}
with patch("superset.utils.cache_manager.current_app", mock_app):
hash_obj = configurable_hash_method(b"test_data")
assert hash_obj.hexdigest() == expected_digest