diff --git a/superset/semantic_layers/cache.py b/superset/semantic_layers/cache.py index ef5cdc68eb3..a57db368c3f 100644 --- a/superset/semantic_layers/cache.py +++ b/superset/semantic_layers/cache.py @@ -376,6 +376,8 @@ def _projection_allowed( return False if entry.group_limit_key: return False + if query.group_limit is not None: + return False # Cached HAVING dropped sub-aggregate rows; the rolled-up totals would be # off. Conservative: skip the projection path when cached has any HAVING. if any(f.type == PredicateType.HAVING for f in entry.filters): @@ -623,17 +625,15 @@ def _apply_post_processing( aggregates = { m.name: { "column": m.name, - "operator": _AGGREGATION_TO_PANDAS[ - # Guarded by ``_projection_allowed`` — non-None and additive. - m.aggregation # type: ignore[index] - ], + "operator": _AGGREGATION_TO_PANDAS[m.aggregation], } for m in query.metrics } df = aggregate(df, groupby=groupby, aggregates=aggregates) - df = _apply_order(df, query.order) note_def = "Served from semantic view smart cache (re-aggregated locally)" + df = _apply_order(df, query.order) + if query.limit is not None: df = df.head(query.limit) diff --git a/tests/unit_tests/semantic_layers/cache_test.py b/tests/unit_tests/semantic_layers/cache_test.py index c3634511cd5..6a43168d8d6 100644 --- a/tests/unit_tests/semantic_layers/cache_test.py +++ b/tests/unit_tests/semantic_layers/cache_test.py @@ -27,6 +27,7 @@ from superset_core.semantic_layers.types import ( AggregationType, Dimension, Filter, + GroupLimit, Metric, Operator, OrderDirection, @@ -445,6 +446,7 @@ def _projection_query( new_filters: set[Filter] | None = None, new_limit: int | None = None, new_order: Any = None, + new_group_limit: GroupLimit | None = None, ) -> tuple[CachedEntry, SemanticQuery]: cached_q = SemanticQuery( metrics=metrics, @@ -458,6 +460,7 @@ def _projection_query( filters=new_filters, limit=new_limit, order=new_order, + group_limit=new_group_limit, ) return entry_from(cached_q), new_q @@ -594,6 +597,24 @@ def test_projection_with_order_and_limit() -> None: assert df["sum_x"].tolist() == [100] +def test_apply_post_processing_sorts_before_limit_for_non_projection() -> None: + cached_df = pd.DataFrame({"a": ["x", "y", "z"], "x": [1.0, 100.0, 50.0]}) + cached = SemanticResult( + requests=[], + results=pa.Table.from_pandas(cached_df, preserve_index=False), + ) + new_q = SemanticQuery( + metrics=[M_X], + dimensions=[COL_A], + order=[(M_X, OrderDirection.DESC)], + limit=2, + ) + + out = _apply_post_processing(cached, new_q, set(), False) + df = out.results.to_pandas() + assert df["x"].tolist() == [100.0, 50.0] + + def test_projection_rejected_when_metric_aggregation_unknown() -> None: entry, new_q = _projection_query( metrics=[M_UNKNOWN], @@ -637,6 +658,23 @@ def test_projection_rejected_when_cached_has_having() -> None: assert ok is False +def test_projection_rejected_when_new_query_has_group_limit() -> None: + group_limit = GroupLimit( + dimensions=[COL_A], + top=2, + metric=M_SUM, + direction=OrderDirection.DESC, + ) + entry, new_q = _projection_query( + metrics=[M_SUM], + new_dimensions=[COL_A], + cached_dimensions=[COL_A, COL_B], + new_group_limit=group_limit, + ) + ok, _, _ = can_satisfy(entry, new_q) + assert ok is False + + def test_projection_rejected_when_order_references_dropped_dim() -> None: entry, new_q = _projection_query( metrics=[M_SUM],