diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index cd24cfbbcd4..35a25b22068 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -299,20 +299,24 @@ class QueryContextProcessor: ) -> dict[str, Any]: """Returns the query results with both metadata and data""" - self.ensure_totals_available() + # Skip ensure_totals_available when force_cached=True + # This prevents recalculating contribution_totals from cached results + if not force_cached: + self.ensure_totals_available() - # Update cache_values to reflect modifications made by ensure_totals_available() - # This ensures cache keys are generated from the actual query state - # We merge the original query dict with the updated query dict to preserve - # any fields that might not be in to_dict() but were in the original request - self._query_context.cache_values["queries"] = [ - {**cached_query, **query.to_dict()} - for cached_query, query in zip( - self._query_context.cache_values["queries"], - self._query_context.queries, - strict=True, - ) - ] + # Update cache_values to reflect modifications made by + # ensure_totals_available() + # This ensures cache keys are generated from the actual query state + # We merge the original query dict with the updated query dict to preserve + # any fields that might not be in to_dict() but were in the original request + self._query_context.cache_values["queries"] = [ + {**cached_query, **query.to_dict()} + for cached_query, query in zip( + self._query_context.cache_values["queries"], + self._query_context.queries, + strict=True, + ) + ] query_results = [ get_query_results( diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 1a215df8fda..c691ad30e53 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -436,7 +436,18 @@ class QueryObject: # pylint: disable=too-many-instance-attributes if self.time_range: cache_dict["time_range"] = self.time_range if self.post_processing: - cache_dict["post_processing"] = self.post_processing + # Exclude contribution_totals from post_processing as it's computed at + # runtime and varies per request, which would cause cache key mismatches + post_processing_for_cache = [] + for pp in self.post_processing: + pp_copy = dict(pp) + if pp_copy.get("operation") == "contribution" and "options" in pp_copy: + options = dict(pp_copy["options"]) + # Remove contribution_totals as it's dynamically calculated + options.pop("contribution_totals", None) + pp_copy["options"] = options + post_processing_for_cache.append(pp_copy) + cache_dict["post_processing"] = post_processing_for_cache if self.time_offsets: cache_dict["time_offsets"] = self.time_offsets diff --git a/superset/utils/cache.py b/superset/utils/cache.py index f5c38e8d7c6..03e0b57dbd0 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -105,8 +105,6 @@ def set_and_log_cache( # to specify a "far future" date. ONE_YEAR = 365 * 24 * 60 * 60 # 1 year in seconds -logger = logging.getLogger(__name__) - def memoized_func(key: str, cache: Cache = cache_manager.cache) -> Callable[..., Any]: """ diff --git a/tests/unit_tests/common/test_query_context_processor.py b/tests/unit_tests/common/test_query_context_processor.py index 4647bd65e58..0dfc47c4614 100644 --- a/tests/unit_tests/common/test_query_context_processor.py +++ b/tests/unit_tests/common/test_query_context_processor.py @@ -1066,3 +1066,154 @@ def test_cache_values_sync_after_ensure_totals_available(): # Verify that the main query row_limit is still 1000 (only totals query # should be modified) assert updated_cache_queries[0]["row_limit"] == 1000 + + +def test_cache_key_excludes_contribution_totals(): + """ + Test that cache_key() excludes contribution_totals from post_processing. + + contribution_totals is computed at runtime by ensure_totals_available() and + varies per request. Including it in the cache key would cause mismatches + between workers that compute different totals for the same query. + """ + from superset.common.query_object import QueryObject + + mock_datasource = MagicMock() + mock_datasource.uid = "test_datasource" + mock_datasource.database.extra = "{}" + mock_datasource.get_extra_cache_keys.return_value = [] + + # Create query with contribution post-processing that includes contribution_totals + query_with_totals = QueryObject( + datasource=mock_datasource, + columns=["region"], + metrics=["sales", "profit"], + post_processing=[ + { + "operation": "contribution", + "options": { + "columns": ["sales", "profit"], + "rename_columns": ["%sales", "%profit"], + "contribution_totals": {"sales": 1000.0, "profit": 200.0}, + }, + } + ], + ) + + # Create identical query without contribution_totals + query_without_totals = QueryObject( + datasource=mock_datasource, + columns=["region"], + metrics=["sales", "profit"], + post_processing=[ + { + "operation": "contribution", + "options": { + "columns": ["sales", "profit"], + "rename_columns": ["%sales", "%profit"], + }, + } + ], + ) + + # Cache keys should be identical since contribution_totals is excluded + cache_key_with = query_with_totals.cache_key() + cache_key_without = query_without_totals.cache_key() + + assert cache_key_with == cache_key_without, ( + "Cache keys should match regardless of contribution_totals. " + f"With totals: {cache_key_with}, Without totals: {cache_key_without}" + ) + + +def test_cache_key_preserves_other_post_processing_options(): + """ + Test that cache_key() only excludes contribution_totals, not other options. + """ + from superset.common.query_object import QueryObject + + mock_datasource = MagicMock() + mock_datasource.uid = "test_datasource" + mock_datasource.database.extra = "{}" + mock_datasource.get_extra_cache_keys.return_value = [] + + # Create query with contribution post-processing + query1 = QueryObject( + datasource=mock_datasource, + columns=["region"], + metrics=["sales"], + post_processing=[ + { + "operation": "contribution", + "options": { + "columns": ["sales"], + "rename_columns": ["%sales"], + "contribution_totals": {"sales": 1000.0}, + }, + } + ], + ) + + # Create query with different rename_columns + query2 = QueryObject( + datasource=mock_datasource, + columns=["region"], + metrics=["sales"], + post_processing=[ + { + "operation": "contribution", + "options": { + "columns": ["sales"], + "rename_columns": ["%sales_pct"], # Different! + "contribution_totals": {"sales": 1000.0}, + }, + } + ], + ) + + # Cache keys should differ because rename_columns is different + assert query1.cache_key() != query2.cache_key(), ( + "Cache keys should differ when other post_processing options differ" + ) + + +def test_cache_key_non_contribution_post_processing_unchanged(): + """ + Test that non-contribution post_processing operations are unchanged in cache key. + """ + from superset.common.query_object import QueryObject + + mock_datasource = MagicMock() + mock_datasource.uid = "test_datasource" + mock_datasource.database.extra = "{}" + mock_datasource.get_extra_cache_keys.return_value = [] + + # Create query with non-contribution post-processing + query1 = QueryObject( + datasource=mock_datasource, + columns=["region"], + metrics=["sales"], + post_processing=[ + { + "operation": "pivot", + "options": {"columns": ["region"], "aggregates": {"sales": "sum"}}, + } + ], + ) + + query2 = QueryObject( + datasource=mock_datasource, + columns=["region"], + metrics=["sales"], + post_processing=[ + { + "operation": "pivot", + "options": {"columns": ["region"], "aggregates": {"sales": "mean"}}, + } + ], + ) + + # Cache keys should differ because aggregates option is different + assert query1.cache_key() != query2.cache_key(), ( + "Cache keys should differ for different non-contribution post_processing" + )