diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 577fc66de1e..2e1e00526a3 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -22,7 +22,6 @@ from datetime import datetime from pprint import pformat from typing import Any, NamedTuple, TYPE_CHECKING -from flask import g from flask_babel import gettext as _ from jinja2.exceptions import TemplateError from pandas import DataFrame @@ -38,6 +37,7 @@ from superset.extensions import event_logger from superset.sql.parse import sanitize_clause, transpile_to_dialect from superset.superset_typing import Column, Metric, OrderBy, QueryObjectDict from superset.utils import json, pandas_postprocessing +from superset.utils.cache_keys import add_impersonation_cache_key_if_needed from superset.utils.core import ( DTTM_ALIAS, find_duplicates, @@ -479,24 +479,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes # or if the CACHE_QUERY_BY_USER flag is on or per_user_caching is enabled on # the database try: - database = self.datasource.database # type: ignore - extra = json.loads(database.extra or "{}") - if ( - ( - feature_flag_manager.is_feature_enabled("CACHE_IMPERSONATION") - and database.impersonate_user - ) - or feature_flag_manager.is_feature_enabled("CACHE_QUERY_BY_USER") - or extra.get("per_user_caching", False) - ): - if key := database.db_engine_spec.get_impersonation_key( - getattr(g, "user", None) - ): - logger.debug( - "Adding impersonation key to QueryObject cache dict: %s", key - ) - - cache_dict["impersonation_key"] = key + add_impersonation_cache_key_if_needed(self.datasource.database, cache_dict) # type: ignore except AttributeError: # datasource or database do not exist pass diff --git a/superset/utils/cache_keys.py b/superset/utils/cache_keys.py new file mode 100644 index 00000000000..2140e5df5de --- /dev/null +++ b/superset/utils/cache_keys.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import Any, TYPE_CHECKING + +from flask import g + +from superset import feature_flag_manager +from superset.utils.json import loads as json_loads + +if TYPE_CHECKING: + from superset.models.core import Database + +logger = logging.getLogger(__name__) + + +def add_impersonation_cache_key_if_needed( + database: Database, + cache_dict: dict[str, Any], +) -> None: + """ + Add a per-user cache-key when the DB connection is configured for + per-user caching, no-op otherwise. + """ + extra = json_loads(database.extra or "{}") + if ( + ( + feature_flag_manager.is_feature_enabled("CACHE_IMPERSONATION") + and database.impersonate_user + ) + or feature_flag_manager.is_feature_enabled("CACHE_QUERY_BY_USER") + or extra.get("per_user_caching", False) + ): + if key := database.db_engine_spec.get_impersonation_key( + getattr(g, "user", None) + ): + logger.debug("Adding impersonation key to cache dict: %s", key) + cache_dict["impersonation_key"] = key diff --git a/superset/viz.py b/superset/viz.py index ab5f3f03458..0706aec104a 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -65,6 +65,7 @@ from superset.superset_typing import ( ) from superset.utils import core as utils, csv, json from superset.utils.cache import set_and_log_cache +from superset.utils.cache_keys import add_impersonation_cache_key_if_needed from superset.utils.core import ( apply_max_row_limit, DateColumn, @@ -472,6 +473,16 @@ class BaseViz: # pylint: disable=too-many-public-methods cache_dict["extra_cache_keys"] = self.datasource.get_extra_cache_keys(query_obj) cache_dict["rls"] = security_manager.get_rls_cache_key(self.datasource) cache_dict["changed_on"] = self.datasource.changed_on + + # Add an impersonation key to cache if impersonation is enabled on the db + # or if the CACHE_QUERY_BY_USER flag is on or per_user_caching is enabled on + # the database + try: + add_impersonation_cache_key_if_needed(self.datasource.database, cache_dict) + except AttributeError: + # datasource or database do not exist + pass + json_data = self.json_dumps(cache_dict, sort_keys=True) return hash_from_str(json_data) diff --git a/tests/unit_tests/queries/query_object_test.py b/tests/unit_tests/queries/query_object_test.py index cc2aa7bf0c3..08b926e3e1a 100644 --- a/tests/unit_tests/queries/query_object_test.py +++ b/tests/unit_tests/queries/query_object_test.py @@ -95,7 +95,7 @@ def test_cache_key_changes_for_new_query_object_same_params(): assert query_object2.cache_key() == cache_key1 -@patch("superset.common.query_object.feature_flag_manager") +@patch("superset.utils.cache_keys.feature_flag_manager") def test_cache_key_cache_query_by_user_on_no_datasource(feature_flag_mock): """ When CACHE_QUERY_BY_USER flag is on and there is no datasource, @@ -112,7 +112,7 @@ def test_cache_key_cache_query_by_user_on_no_datasource(feature_flag_mock): assert query_object.cache_key() == cache_key -@patch("superset.common.query_object.feature_flag_manager") +@patch("superset.utils.cache_keys.feature_flag_manager") @patch("superset.common.query_object.logger") def test_cache_key_cache_query_by_user_on_no_user(logger_mock, feature_flag_mock): """ @@ -140,16 +140,13 @@ def test_cache_key_cache_query_by_user_on_no_user(logger_mock, feature_flag_mock logger_mock.debug.assert_called() -@patch("superset.common.query_object.feature_flag_manager") -@patch("superset.common.query_object.logger") +@patch("superset.utils.cache_keys.feature_flag_manager") +@patch("superset.utils.cache_keys.logger") def test_cache_key_cache_query_by_user_on_with_user(logger_mock, feature_flag_mock): """ When the same user is requesting a cache key with CACHE_QUERY_BY_USER flag on, the key will be the same """ - # Configure logger to enable DEBUG level for isEnabledFor check - logger_mock.isEnabledFor.return_value = True - datasource = SqlaTable( table_name="test_table", columns=[], @@ -167,17 +164,17 @@ def test_cache_key_cache_query_by_user_on_with_user(logger_mock, feature_flag_mo cache_key1 = query_object.cache_key() assert query_object.cache_key() == cache_key1 - # Should have both impersonation and cache key generation logs + # Should have impersonation log emitted by the cache_keys helper logger_mock.debug.assert_has_calls( [ - call("Adding impersonation key to QueryObject cache dict: %s", "test_user"), + call("Adding impersonation key to cache dict: %s", "test_user"), ], any_order=True, ) -@patch("superset.common.query_object.feature_flag_manager") -@patch("superset.common.query_object.logger") +@patch("superset.utils.cache_keys.feature_flag_manager") +@patch("superset.utils.cache_keys.logger") def test_cache_key_cache_query_by_user_on_with_different_user( logger_mock, feature_flag_mock ): @@ -185,9 +182,6 @@ def test_cache_key_cache_query_by_user_on_with_different_user( When two different users are requesting a cache key with CACHE_QUERY_BY_USER flag on, the key will be different """ - # Configure logger to enable DEBUG level for isEnabledFor check - logger_mock.isEnabledFor.return_value = True - datasource = SqlaTable( table_name="test_table", columns=[], @@ -209,21 +203,17 @@ def test_cache_key_cache_query_by_user_on_with_different_user( assert cache_key1 != cache_key2 - # Should have both impersonation and cache key generation logs (any order) + # Should have impersonation logs emitted by the cache_keys helper logger_mock.debug.assert_has_calls( [ - call( - "Adding impersonation key to QueryObject cache dict: %s", "test_user1" - ), - call( - "Adding impersonation key to QueryObject cache dict: %s", "test_user2" - ), + call("Adding impersonation key to cache dict: %s", "test_user1"), + call("Adding impersonation key to cache dict: %s", "test_user2"), ], any_order=True, ) -@patch("superset.common.query_object.feature_flag_manager") +@patch("superset.utils.cache_keys.feature_flag_manager") @patch("superset.common.query_object.logger") def test_cache_key_cache_impersonation_on_no_user(logger_mock, feature_flag_mock): """ @@ -251,7 +241,7 @@ def test_cache_key_cache_impersonation_on_no_user(logger_mock, feature_flag_mock logger_mock.debug.assert_called() -@patch("superset.common.query_object.feature_flag_manager") +@patch("superset.utils.cache_keys.feature_flag_manager") @patch("superset.common.query_object.logger") def test_cache_key_cache_impersonation_on_with_user(logger_mock, feature_flag_mock): """ @@ -290,7 +280,7 @@ def test_cache_key_cache_impersonation_on_with_user(logger_mock, feature_flag_mo assert len(impersonation_calls) == 0 -@patch("superset.common.query_object.feature_flag_manager") +@patch("superset.utils.cache_keys.feature_flag_manager") @patch("superset.common.query_object.logger") def test_cache_key_cache_impersonation_on_with_different_user( logger_mock, feature_flag_mock @@ -335,8 +325,8 @@ def test_cache_key_cache_impersonation_on_with_different_user( assert len(impersonation_calls) == 0 -@patch("superset.common.query_object.feature_flag_manager") -@patch("superset.common.query_object.logger") +@patch("superset.utils.cache_keys.feature_flag_manager") +@patch("superset.utils.cache_keys.logger") def test_cache_key_cache_impersonation_on_with_different_user_and_db_impersonation( logger_mock, feature_flag_mock, @@ -346,9 +336,6 @@ def test_cache_key_cache_impersonation_on_with_different_user_and_db_impersonati flag on, and cache_impersonation is enabled on the database, the keys will be different """ - # Configure logger to enable DEBUG level for isEnabledFor check - logger_mock.isEnabledFor.return_value = True - datasource = SqlaTable( table_name="test_table", columns=[], @@ -374,15 +361,11 @@ def test_cache_key_cache_impersonation_on_with_different_user_and_db_impersonati assert cache_key1 != cache_key2 - # Should have both impersonation and cache key generation logs (any order) + # Should have impersonation logs emitted by the cache_keys helper logger_mock.debug.assert_has_calls( [ - call( - "Adding impersonation key to QueryObject cache dict: %s", "test_user1" - ), - call( - "Adding impersonation key to QueryObject cache dict: %s", "test_user2" - ), + call("Adding impersonation key to cache dict: %s", "test_user1"), + call("Adding impersonation key to cache dict: %s", "test_user2"), ], any_order=True, ) diff --git a/tests/unit_tests/test_viz_cache_key.py b/tests/unit_tests/test_viz_cache_key.py new file mode 100644 index 00000000000..d6eb614265c --- /dev/null +++ b/tests/unit_tests/test_viz_cache_key.py @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Behavioral tests for ``viz.BaseViz.cache_key`` covering per-user cache-key +inclusion. +""" + +from typing import Any +from unittest.mock import patch + +from flask_appbuilder.security.sqla.models import User + +from superset import viz +from superset.connectors.sqla.models import SqlaTable +from superset.models.core import Database +from superset.utils.core import override_user + +QUERY_OBJ: dict[str, Any] = {"row_limit": 100, "from_dttm": None, "to_dttm": None} + + +def _viz_for(database: Database) -> viz.BaseViz: + datasource = SqlaTable( + table_name="t", + columns=[], + metrics=[], + main_dttm_col=None, + database=database, + ) + return viz.BaseViz(datasource=datasource, form_data={"viz_type": "table"}) + + +def test_no_per_user_opt_in_keys_match_across_users(): + """ + Without any per-user caching opt-in, two different users on the same + database/query must produce the *same* cache key (regression guard — we + must not accidentally make every cache key per-user). + """ + database = Database(database_name="d", sqlalchemy_uri="sqlite://") + obj = _viz_for(database) + + with override_user(User(username="alice")): + key_a = obj.cache_key(QUERY_OBJ) + with override_user(User(username="bob")): + key_b = obj.cache_key(QUERY_OBJ) + + assert key_a == key_b + + +def test_per_user_caching_in_extra_yields_distinct_keys_per_user(): + """ + With ``per_user_caching: true`` set on the database, two different users + must produce *different* cache keys for the same query. + """ + database = Database( + database_name="d", + sqlalchemy_uri="sqlite://", + extra='{"per_user_caching": true}', + ) + obj = _viz_for(database) + + with override_user(User(username="alice")): + key_a = obj.cache_key(QUERY_OBJ) + with override_user(User(username="bob")): + key_b = obj.cache_key(QUERY_OBJ) + + assert key_a != key_b + + +def test_same_user_same_query_idempotent(): + database = Database( + database_name="d", + sqlalchemy_uri="sqlite://", + extra='{"per_user_caching": true}', + ) + obj = _viz_for(database) + + with override_user(User(username="alice")): + assert obj.cache_key(QUERY_OBJ) == obj.cache_key(QUERY_OBJ) + + +@patch("superset.utils.cache_keys.feature_flag_manager") +def test_cache_query_by_user_flag_yields_distinct_keys(feature_flag_mock): + """ + Global ``CACHE_QUERY_BY_USER`` flag also reaches the legacy viz path. + """ + feature_flag_mock.is_feature_enabled.side_effect = ( + lambda feature=None: feature == "CACHE_QUERY_BY_USER" + ) + database = Database(database_name="d", sqlalchemy_uri="sqlite://") + obj = _viz_for(database) + + with override_user(User(username="alice")): + key_a = obj.cache_key(QUERY_OBJ) + with override_user(User(username="bob")): + key_b = obj.cache_key(QUERY_OBJ) + + assert key_a != key_b diff --git a/tests/unit_tests/utils/test_impersonation_cache_key.py b/tests/unit_tests/utils/test_impersonation_cache_key.py new file mode 100644 index 00000000000..6203dd37d48 --- /dev/null +++ b/tests/unit_tests/utils/test_impersonation_cache_key.py @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import Any +from unittest.mock import patch + +from flask_appbuilder.security.sqla.models import User + +from superset.models.core import Database +from superset.utils.cache_keys import add_impersonation_cache_key_if_needed +from superset.utils.core import override_user + + +def _flag(name: str): + """Build a feature-flag side_effect that returns True only for ``name``.""" + + def side_effect(feature=None): + return feature == name + + return side_effect + + +def _run(database: Database) -> dict[str, Any]: + """Run the helper against a fresh dict and return that dict.""" + cache_dict: dict[str, Any] = {} + add_impersonation_cache_key_if_needed(database, cache_dict) + return cache_dict + + +def test_no_per_user_caching_yields_no_key(): + database = Database(database_name="d", sqlalchemy_uri="sqlite://") + with override_user(User(username="u")): + assert "impersonation_key" not in _run(database) + + +@patch("superset.utils.cache_keys.feature_flag_manager") +def test_cache_query_by_user_adds_username(feature_flag_mock): + feature_flag_mock.is_feature_enabled.side_effect = _flag("CACHE_QUERY_BY_USER") + database = Database(database_name="d", sqlalchemy_uri="sqlite://") + with override_user(User(username="alice")): + assert _run(database)["impersonation_key"] == "alice" + + +@patch("superset.utils.cache_keys.feature_flag_manager") +def test_cache_query_by_user_distinct_per_user(feature_flag_mock): + feature_flag_mock.is_feature_enabled.side_effect = _flag("CACHE_QUERY_BY_USER") + database = Database(database_name="d", sqlalchemy_uri="sqlite://") + with override_user(User(username="alice")): + key_a = _run(database)["impersonation_key"] + with override_user(User(username="bob")): + key_b = _run(database)["impersonation_key"] + assert key_a != key_b + + +@patch("superset.utils.cache_keys.feature_flag_manager") +def test_cache_impersonation_requires_database_flag(feature_flag_mock): + """ + CACHE_IMPERSONATION alone is not enough; ``database.impersonate_user`` must + also be set on the database for the per-user key to apply. + """ + feature_flag_mock.is_feature_enabled.side_effect = _flag("CACHE_IMPERSONATION") + + db_no_impersonation = Database(database_name="d", sqlalchemy_uri="sqlite://") + db_with_impersonation = Database( + database_name="d", sqlalchemy_uri="sqlite://", impersonate_user=True + ) + + with override_user(User(username="alice")): + assert "impersonation_key" not in _run(db_no_impersonation) + assert _run(db_with_impersonation)["impersonation_key"] == "alice" + + +def test_per_user_caching_in_extra_json_enables_key(): + database = Database( + database_name="d", + sqlalchemy_uri="sqlite://", + extra='{"per_user_caching": true}', + ) + with override_user(User(username="alice")): + assert _run(database)["impersonation_key"] == "alice" + + +def test_no_user_yields_no_key(app_context): # noqa: ARG001 + """ + With no logged-in user, the engine spec returns None even when per-user + caching is enabled — there's no identity to key on. + """ + database = Database( + database_name="d", + sqlalchemy_uri="sqlite://", + extra='{"per_user_caching": true}', + ) + # No override_user — g.user is unset + assert "impersonation_key" not in _run(database)