diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 76294696e43..706a74dbde4 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -31,6 +31,7 @@ from superset import db from superset.constants import CACHE_DISABLED_TIMEOUT from superset.extensions import cache_manager from superset.models.cache import CacheKey +from superset.utils.cache_manager import configurable_hash_method from superset.utils.hashing import hash_from_dict from superset.utils.json import json_int_dttm_ser @@ -273,7 +274,7 @@ def etag_cache( # noqa: C901 wrapper.uncached = f # type: ignore wrapper.cache_timeout = timeout # type: ignore wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access - make_name=None, timeout=timeout + make_name=None, timeout=timeout, hash_method=configurable_hash_method ) return wrapper diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py index d3b2dbdb00d..0804e0d4b5d 100644 --- a/superset/utils/cache_manager.py +++ b/superset/utils/cache_manager.py @@ -14,10 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import hashlib import logging -from typing import Any, Optional, Union +from typing import Any, Callable, Optional, Union -from flask import Flask +from flask import current_app, Flask from flask_caching import Cache from markupsafe import Markup @@ -27,8 +28,134 @@ logger = logging.getLogger(__name__) CACHE_IMPORT_PATH = "superset.extensions.metastore_cache.SupersetMetastoreCache" +# Hash function lookup table matching superset.utils.hashing +_HASH_METHODS: dict[str, Callable[..., Any]] = { + "sha256": hashlib.sha256, + "md5": hashlib.md5, +} -class ExploreFormDataCache(Cache): + +class ConfigurableHashMethod: + """ + A callable that defers hash algorithm selection to runtime. + + Flask-caching's memoize decorator evaluates hash_method at decoration time + (module import), but we need to read HASH_ALGORITHM config at function call + time when the app context is available. + + This class acts like a hashlib function but looks up the configured + algorithm when called. + """ + + def __call__(self, data: bytes = b"") -> Any: + """ + Create a hash object using the configured algorithm. + + Args: + data: Optional initial data to hash + + Returns: + A hashlib hash object (e.g., sha256 or md5) + + Raises: + ValueError: If HASH_ALGORITHM is set to an unsupported value + """ + algorithm = current_app.config["HASH_ALGORITHM"] + hash_func = _HASH_METHODS.get(algorithm) + if hash_func is None: + raise ValueError(f"Unsupported hash algorithm: {algorithm}") + return hash_func(data) + + +# Singleton instance to use as default hash_method +configurable_hash_method = ConfigurableHashMethod() + + +class SupersetCache(Cache): + """ + Cache subclass that uses the configured HASH_ALGORITHM instead of MD5. + + Flask-caching uses MD5 by default for cache key generation, which fails + in FIPS mode where MD5 is disabled. This class overrides the default + hash method to use the algorithm specified by HASH_ALGORITHM config. + + Note: Switching hash algorithms will invalidate existing cache keys, + causing a one-time cache miss on upgrade. + """ + + def memoize( + self, + timeout: int | None = None, + make_name: Callable[..., Any] | None = None, + unless: Callable[..., bool] | None = None, + forced_update: Callable[..., bool] | None = None, + response_filter: Callable[..., Any] | None = None, + hash_method: Callable[..., Any] = configurable_hash_method, + cache_none: bool = False, + source_check: bool | None = None, + args_to_ignore: Any | None = None, + ) -> Callable[..., Any]: + return super().memoize( + timeout=timeout, + make_name=make_name, + unless=unless, + forced_update=forced_update, + response_filter=response_filter, + hash_method=hash_method, + cache_none=cache_none, + source_check=source_check, + args_to_ignore=args_to_ignore, + ) + + def cached( + self, + timeout: int | None = None, + key_prefix: str = "view/%s", + unless: Callable[..., bool] | None = None, + forced_update: Callable[..., bool] | None = None, + response_filter: Callable[..., Any] | None = None, + query_string: bool = False, + hash_method: Callable[..., Any] = configurable_hash_method, + cache_none: bool = False, + make_cache_key: Callable[..., Any] | None = None, + source_check: bool | None = None, + response_hit_indication: bool | None = False, + ) -> Callable[..., Any]: + return super().cached( + timeout=timeout, + key_prefix=key_prefix, + unless=unless, + forced_update=forced_update, + response_filter=response_filter, + query_string=query_string, + hash_method=hash_method, + cache_none=cache_none, + make_cache_key=make_cache_key, + source_check=source_check, + response_hit_indication=response_hit_indication, + ) + + # pylint: disable=protected-access + def _memoize_make_cache_key( + self, + make_name: Callable[..., Any] | None = None, + timeout: Callable[..., Any] | None = None, + forced_update: bool = False, + hash_method: Callable[..., Any] = configurable_hash_method, + source_check: bool | None = False, + args_to_ignore: Any | None = None, + ) -> Callable[..., Any]: + return super()._memoize_make_cache_key( + make_name=make_name, + timeout=timeout, + forced_update=forced_update, + hash_method=hash_method, + source_check=source_check, + args_to_ignore=args_to_ignore, + ) + + +class ExploreFormDataCache(SupersetCache): def get(self, *args: Any, **kwargs: Any) -> Optional[Union[str, Markup]]: cache = self.cache.get(*args, **kwargs) @@ -53,10 +180,10 @@ class CacheManager: def __init__(self) -> None: super().__init__() - self._cache = Cache() - self._data_cache = Cache() - self._thumbnail_cache = Cache() - self._filter_state_cache = Cache() + self._cache = SupersetCache() + self._data_cache = SupersetCache() + self._thumbnail_cache = SupersetCache() + self._filter_state_cache = SupersetCache() self._explore_form_data_cache = ExploreFormDataCache() @staticmethod diff --git a/tests/unit_tests/utils/test_cache_manager.py b/tests/unit_tests/utils/test_cache_manager.py new file mode 100644 index 00000000000..b7b10e4506e --- /dev/null +++ b/tests/unit_tests/utils/test_cache_manager.py @@ -0,0 +1,171 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import hashlib +from unittest.mock import MagicMock, patch + +import pytest + +from superset.utils.cache_manager import ( + configurable_hash_method, + ConfigurableHashMethod, + SupersetCache, +) + + +def test_configurable_hash_method_uses_sha256(): + """Test ConfigurableHashMethod uses sha256 when configured.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": "sha256"} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method(b"test") + # Verify it returns a sha256 hash object + assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest() + + +def test_configurable_hash_method_uses_md5(): + """Test ConfigurableHashMethod uses md5 when configured.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": "md5"} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method(b"test") + # Verify it returns a md5 hash object + assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest() # noqa: S324 + + +def test_configurable_hash_method_empty_data(): + """Test ConfigurableHashMethod with empty data.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": "sha256"} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method() + assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest() + + +def test_configurable_hash_method_is_callable(): + """Test that ConfigurableHashMethod instance is callable.""" + method = ConfigurableHashMethod() + assert callable(method) + + +def test_superset_cache_memoize_uses_configurable_hash(): + """Test that SupersetCache.memoize uses configurable_hash_method by default.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "memoize", return_value=lambda f: f + ) as mock_memoize: + cache.memoize(timeout=300) + + mock_memoize.assert_called_once() + call_kwargs = mock_memoize.call_args[1] + assert call_kwargs["hash_method"] is configurable_hash_method + + +def test_superset_cache_memoize_allows_explicit_hash_method(): + """Test that SupersetCache.memoize allows explicit hash_method override.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "memoize", return_value=lambda f: f + ) as mock_memoize: + cache.memoize(timeout=300, hash_method=hashlib.md5) + + mock_memoize.assert_called_once() + call_kwargs = mock_memoize.call_args[1] + assert call_kwargs["hash_method"] == hashlib.md5 + + +def test_superset_cache_cached_uses_configurable_hash(): + """Test that SupersetCache.cached uses configurable_hash_method by default.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "cached", return_value=lambda f: f + ) as mock_cached: + cache.cached(timeout=300) + + mock_cached.assert_called_once() + call_kwargs = mock_cached.call_args[1] + assert call_kwargs["hash_method"] is configurable_hash_method + + +def test_superset_cache_cached_allows_explicit_hash_method(): + """Test that SupersetCache.cached allows explicit hash_method override.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], "cached", return_value=lambda f: f + ) as mock_cached: + cache.cached(timeout=300, hash_method=hashlib.md5) + + mock_cached.assert_called_once() + call_kwargs = mock_cached.call_args[1] + assert call_kwargs["hash_method"] == hashlib.md5 + + +def test_superset_cache_memoize_make_cache_key_uses_configurable_hash(): + """Test _memoize_make_cache_key uses configurable_hash_method by default.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], + "_memoize_make_cache_key", + return_value=lambda *args, **kwargs: "cache_key", + ) as mock_make_key: + cache._memoize_make_cache_key(make_name=None, timeout=300) + + mock_make_key.assert_called_once() + call_kwargs = mock_make_key.call_args[1] + assert call_kwargs["hash_method"] is configurable_hash_method + + +def test_superset_cache_memoize_make_cache_key_allows_explicit_hash(): + """Test _memoize_make_cache_key allows explicit hash_method override.""" + cache = SupersetCache() + + with patch.object( + cache.__class__.__bases__[0], + "_memoize_make_cache_key", + return_value=lambda *args, **kwargs: "cache_key", + ) as mock_make_key: + cache._memoize_make_cache_key( + make_name=None, timeout=300, hash_method=hashlib.md5 + ) + + mock_make_key.assert_called_once() + call_kwargs = mock_make_key.call_args[1] + assert call_kwargs["hash_method"] == hashlib.md5 + + +@pytest.mark.parametrize( + "algorithm,expected_digest", + [ + ("sha256", hashlib.sha256(b"test_data").hexdigest()), + ("md5", hashlib.md5(b"test_data").hexdigest()), # noqa: S324 + ], +) +def test_configurable_hash_method_parametrized(algorithm, expected_digest): + """Parametrized test for ConfigurableHashMethod with different algorithms.""" + mock_app = MagicMock() + mock_app.config = {"HASH_ALGORITHM": algorithm} + + with patch("superset.utils.cache_manager.current_app", mock_app): + hash_obj = configurable_hash_method(b"test_data") + assert hash_obj.hexdigest() == expected_digest