Improvements

This commit is contained in:
Beto Dealmeida
2026-05-13 09:15:51 -04:00
parent 209b44522d
commit dca18116ae
2 changed files with 43 additions and 5 deletions

View File

@@ -376,6 +376,8 @@ def _projection_allowed(
return False
if entry.group_limit_key:
return False
if query.group_limit is not None:
return False
# Cached HAVING dropped sub-aggregate rows; the rolled-up totals would be
# off. Conservative: skip the projection path when cached has any HAVING.
if any(f.type == PredicateType.HAVING for f in entry.filters):
@@ -623,17 +625,15 @@ def _apply_post_processing(
aggregates = {
m.name: {
"column": m.name,
"operator": _AGGREGATION_TO_PANDAS[
# Guarded by ``_projection_allowed`` — non-None and additive.
m.aggregation # type: ignore[index]
],
"operator": _AGGREGATION_TO_PANDAS[m.aggregation],
}
for m in query.metrics
}
df = aggregate(df, groupby=groupby, aggregates=aggregates)
df = _apply_order(df, query.order)
note_def = "Served from semantic view smart cache (re-aggregated locally)"
df = _apply_order(df, query.order)
if query.limit is not None:
df = df.head(query.limit)

View File

@@ -27,6 +27,7 @@ from superset_core.semantic_layers.types import (
AggregationType,
Dimension,
Filter,
GroupLimit,
Metric,
Operator,
OrderDirection,
@@ -445,6 +446,7 @@ def _projection_query(
new_filters: set[Filter] | None = None,
new_limit: int | None = None,
new_order: Any = None,
new_group_limit: GroupLimit | None = None,
) -> tuple[CachedEntry, SemanticQuery]:
cached_q = SemanticQuery(
metrics=metrics,
@@ -458,6 +460,7 @@ def _projection_query(
filters=new_filters,
limit=new_limit,
order=new_order,
group_limit=new_group_limit,
)
return entry_from(cached_q), new_q
@@ -594,6 +597,24 @@ def test_projection_with_order_and_limit() -> None:
assert df["sum_x"].tolist() == [100]
def test_apply_post_processing_sorts_before_limit_for_non_projection() -> None:
cached_df = pd.DataFrame({"a": ["x", "y", "z"], "x": [1.0, 100.0, 50.0]})
cached = SemanticResult(
requests=[],
results=pa.Table.from_pandas(cached_df, preserve_index=False),
)
new_q = SemanticQuery(
metrics=[M_X],
dimensions=[COL_A],
order=[(M_X, OrderDirection.DESC)],
limit=2,
)
out = _apply_post_processing(cached, new_q, set(), False)
df = out.results.to_pandas()
assert df["x"].tolist() == [100.0, 50.0]
def test_projection_rejected_when_metric_aggregation_unknown() -> None:
entry, new_q = _projection_query(
metrics=[M_UNKNOWN],
@@ -637,6 +658,23 @@ def test_projection_rejected_when_cached_has_having() -> None:
assert ok is False
def test_projection_rejected_when_new_query_has_group_limit() -> None:
group_limit = GroupLimit(
dimensions=[COL_A],
top=2,
metric=M_SUM,
direction=OrderDirection.DESC,
)
entry, new_q = _projection_query(
metrics=[M_SUM],
new_dimensions=[COL_A],
cached_dimensions=[COL_A, COL_B],
new_group_limit=group_limit,
)
ok, _, _ = can_satisfy(entry, new_q)
assert ok is False
def test_projection_rejected_when_order_references_dropped_dim() -> None:
entry, new_q = _projection_query(
metrics=[M_SUM],