mirror of
https://github.com/apache/superset.git
synced 2026-04-30 13:34:20 +00:00
Compare commits
7 Commits
feat/add-d
...
fix-query-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
35d1a6c21c | ||
|
|
add087cbfe | ||
|
|
29256a40bc | ||
|
|
89afd6fefc | ||
|
|
4bcbe471cc | ||
|
|
47c58603a9 | ||
|
|
6ec4a25295 |
@@ -193,7 +193,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
||||
return isinstance(metric, str) or is_adhoc_metric(metric)
|
||||
|
||||
self.metrics = metrics and [
|
||||
x if is_str_or_adhoc(x) else x["label"] # type: ignore
|
||||
x if is_str_or_adhoc(x) else x["label"] # type: ignore[misc,index]
|
||||
for x in metrics
|
||||
]
|
||||
|
||||
@@ -285,6 +285,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
||||
self._validate_no_have_duplicate_labels()
|
||||
self._validate_time_offsets()
|
||||
self._sanitize_filters()
|
||||
self._sanitize_sql_expressions()
|
||||
return None
|
||||
except QueryObjectValidationError as ex:
|
||||
if raise_exceptions:
|
||||
@@ -359,6 +360,95 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
||||
except QueryClauseValidationException as ex:
|
||||
raise QueryObjectValidationError(ex.message) from ex
|
||||
|
||||
def _sanitize_sql_expressions(self) -> None:
|
||||
"""
|
||||
Sanitize SQL expressions in adhoc metrics and orderby for consistent cache keys.
|
||||
|
||||
This processes SQL expressions before cache key generation, preventing cache
|
||||
mismatches due to later processing during query execution.
|
||||
"""
|
||||
if not self.datasource or not hasattr(
|
||||
self.datasource,
|
||||
"_process_sql_expression",
|
||||
):
|
||||
return
|
||||
|
||||
# Process adhoc metrics
|
||||
if self.metrics:
|
||||
self._sanitize_metrics_expressions()
|
||||
|
||||
# Process orderby - these may contain adhoc metrics
|
||||
if self.orderby:
|
||||
self._sanitize_orderby_expressions()
|
||||
|
||||
def _sanitize_metrics_expressions(self) -> None:
|
||||
"""
|
||||
Process SQL expressions in adhoc metrics.
|
||||
Creates new metric dictionaries to avoid mutating shared references.
|
||||
"""
|
||||
# datasource is checked in parent method, assert for type checking
|
||||
assert self.datasource is not None
|
||||
|
||||
if not self.metrics:
|
||||
return
|
||||
|
||||
sanitized_metrics = []
|
||||
for metric in self.metrics:
|
||||
if not (is_adhoc_metric(metric) and isinstance(metric, dict)):
|
||||
sanitized_metrics.append(metric)
|
||||
continue
|
||||
if sql_expr := metric.get("sqlExpression"):
|
||||
processed = self.datasource._process_select_expression(
|
||||
expression=sql_expr,
|
||||
database_id=self.datasource.database_id,
|
||||
engine=self.datasource.database.backend,
|
||||
schema=self.datasource.schema,
|
||||
template_processor=None,
|
||||
)
|
||||
if processed and processed != sql_expr:
|
||||
# Create new dict to avoid mutating shared references
|
||||
sanitized_metrics.append({**metric, "sqlExpression": processed})
|
||||
else:
|
||||
sanitized_metrics.append(metric)
|
||||
else:
|
||||
sanitized_metrics.append(metric)
|
||||
|
||||
self.metrics = sanitized_metrics
|
||||
|
||||
def _sanitize_orderby_expressions(self) -> None:
|
||||
"""
|
||||
Process SQL expressions in orderby items.
|
||||
Creates new tuples and dictionaries to avoid mutating shared references.
|
||||
"""
|
||||
# datasource is checked in parent method, assert for type checking
|
||||
assert self.datasource is not None
|
||||
|
||||
if not self.orderby:
|
||||
return
|
||||
|
||||
sanitized_orderby = []
|
||||
for col, ascending in self.orderby:
|
||||
if not (isinstance(col, dict) and col.get("sqlExpression")):
|
||||
sanitized_orderby.append((col, ascending))
|
||||
continue
|
||||
|
||||
processed = self.datasource._process_orderby_expression(
|
||||
expression=col["sqlExpression"],
|
||||
database_id=self.datasource.database_id,
|
||||
engine=self.datasource.database.backend,
|
||||
schema=self.datasource.schema,
|
||||
template_processor=None,
|
||||
)
|
||||
if processed and processed != col["sqlExpression"]:
|
||||
# Create new dict to avoid mutating shared references
|
||||
sanitized_orderby.append(
|
||||
({**col, "sqlExpression": processed}, ascending) # type: ignore[arg-type]
|
||||
)
|
||||
else:
|
||||
sanitized_orderby.append((col, ascending))
|
||||
|
||||
self.orderby = sanitized_orderby
|
||||
|
||||
def _validate_there_are_no_missing_series(self) -> None:
|
||||
missing_series = [col for col in self.series_columns if col not in self.columns]
|
||||
if missing_series:
|
||||
|
||||
@@ -1241,6 +1241,10 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
schema=self.schema,
|
||||
template_processor=template_processor,
|
||||
)
|
||||
elif template_processor and expression:
|
||||
# Even if already processed (sanitized), we still need to
|
||||
# render Jinja templates
|
||||
expression = template_processor.process_template(expression)
|
||||
|
||||
sqla_metric = literal_column(expression)
|
||||
else:
|
||||
@@ -1819,6 +1823,10 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
for metric in metrics:
|
||||
if utils.is_adhoc_metric(metric):
|
||||
assert isinstance(metric, dict)
|
||||
# SQL expressions are sanitized during QueryObject.validate() via
|
||||
# _sanitize_sql_expressions(), but we still process here to handle
|
||||
# Jinja templates. sanitize_clause() is idempotent so re-sanitizing
|
||||
# is safe.
|
||||
metrics_exprs.append(
|
||||
self.adhoc_metric_to_sqla(
|
||||
metric=metric,
|
||||
@@ -1855,19 +1863,18 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
col: Union[AdhocMetric, ColumnElement] = orig_col
|
||||
if isinstance(col, dict):
|
||||
col = cast(AdhocMetric, col)
|
||||
if col.get("sqlExpression"):
|
||||
col["sqlExpression"] = self._process_orderby_expression(
|
||||
expression=col["sqlExpression"],
|
||||
database_id=self.database_id,
|
||||
engine=self.database.backend,
|
||||
schema=self.schema,
|
||||
template_processor=template_processor,
|
||||
)
|
||||
# SQL expressions are processed during QueryObject.validate() via
|
||||
# _sanitize_sql_expressions() using ORDER BY wrapping. We pass
|
||||
# processed=True to skip re-processing and avoid incorrect SELECT
|
||||
# wrapping that breaks ORDER BY expressions. The removal of the
|
||||
# _process_orderby_expression() call (which mutated the dict) prevents
|
||||
# cache key mismatches.
|
||||
if utils.is_adhoc_metric(col):
|
||||
# add adhoc sort by column to columns_by_name if not exists
|
||||
col = self.adhoc_metric_to_sqla(
|
||||
col,
|
||||
columns_by_name,
|
||||
template_processor=template_processor,
|
||||
processed=True,
|
||||
)
|
||||
# use the existing instance, if possible
|
||||
|
||||
365
tests/unit_tests/connectors/sqla/test_cache_key_stability.py
Normal file
365
tests/unit_tests/connectors/sqla/test_cache_key_stability.py
Normal file
@@ -0,0 +1,365 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""
|
||||
Tests for SQL expression processing during QueryObject validation.
|
||||
|
||||
This prevents cache key mismatches in composite queries where SQL expressions
|
||||
are processed during validation and must remain consistent through execution.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import Mock
|
||||
|
||||
from superset.common.query_object import QueryObject
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
|
||||
|
||||
def test_sql_expressions_processed_during_validation():
|
||||
"""
|
||||
Test that SQL expressions are processed during QueryObject validation.
|
||||
|
||||
This is a regression test for a bug where:
|
||||
1. A chart has a metric with sqlExpression: "sum(field)" (lowercase)
|
||||
2. The same metric is used in both metrics and orderby
|
||||
3. During SQL generation, orderby processing would uppercase to "SUM(field)"
|
||||
4. This mutation caused cache key mismatches in composite queries
|
||||
|
||||
The fix ensures SQL expressions are processed during validate() so:
|
||||
- Cache key uses processed expressions
|
||||
- Query execution uses same processed expressions
|
||||
- No mutation occurs during query generation
|
||||
"""
|
||||
# Create an adhoc metric with lowercase SQL - this is how users write them
|
||||
adhoc_metric = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "sum(num)", # lowercase - will be uppercased
|
||||
"label": "Sum of Num",
|
||||
}
|
||||
|
||||
# Mock datasource with required methods
|
||||
mock_datasource = Mock(spec=SqlaTable)
|
||||
mock_datasource.database_id = 1
|
||||
mock_datasource.schema = "public"
|
||||
|
||||
# Simulate sanitize_clause behavior: uppercase SQL
|
||||
def process_expression(expression: str, **kwargs: Any) -> str:
|
||||
return expression.upper()
|
||||
|
||||
mock_datasource._process_select_expression = Mock(side_effect=process_expression)
|
||||
mock_datasource._process_orderby_expression = Mock(side_effect=process_expression)
|
||||
|
||||
# Create QueryObject with adhoc metric in both metrics and orderby
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
metrics=[adhoc_metric],
|
||||
orderby=[(adhoc_metric, True)],
|
||||
columns=[],
|
||||
extras={},
|
||||
)
|
||||
|
||||
# Validate - this should process SQL expressions
|
||||
query_obj.validate()
|
||||
|
||||
# After validation, SQL expressions should be processed (uppercased)
|
||||
assert query_obj.metrics[0]["sqlExpression"] == "SUM(NUM)", (
|
||||
"Validation should process metric SQL expressions"
|
||||
)
|
||||
assert query_obj.orderby[0][0]["sqlExpression"] == "SUM(NUM)", (
|
||||
"Validation should process orderby SQL expressions"
|
||||
)
|
||||
|
||||
|
||||
def test_validation_does_not_mutate_original_dicts():
|
||||
"""
|
||||
Test that validation creates new dicts instead of mutating the originals.
|
||||
|
||||
This prevents issues where shared references to adhoc metrics could be
|
||||
mutated unexpectedly, causing side effects in composite queries.
|
||||
"""
|
||||
# Create original adhoc metric
|
||||
original_metric = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "sum(sales)",
|
||||
"label": "Total Sales",
|
||||
}
|
||||
|
||||
# Keep a reference to verify no mutation
|
||||
original_sql = original_metric["sqlExpression"]
|
||||
|
||||
# Mock datasource
|
||||
mock_datasource = Mock(spec=SqlaTable)
|
||||
mock_datasource.database_id = 1
|
||||
mock_datasource.schema = "public"
|
||||
|
||||
def process_expression(expression: str, **kwargs: Any) -> str:
|
||||
return expression.upper()
|
||||
|
||||
mock_datasource._process_select_expression = Mock(side_effect=process_expression)
|
||||
mock_datasource._process_orderby_expression = Mock(side_effect=process_expression)
|
||||
|
||||
# Create QueryObject
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
metrics=[original_metric],
|
||||
orderby=[(original_metric, True)],
|
||||
columns=[],
|
||||
extras={},
|
||||
)
|
||||
|
||||
# Validate
|
||||
query_obj.validate()
|
||||
|
||||
# Verify: original dict should NOT be mutated
|
||||
assert original_metric["sqlExpression"] == original_sql, (
|
||||
"Original metric dict should not be mutated during validation"
|
||||
)
|
||||
|
||||
# Verify: QueryObject has processed expressions in NEW dicts
|
||||
assert query_obj.metrics[0]["sqlExpression"] == "SUM(SALES)"
|
||||
assert query_obj.orderby[0][0]["sqlExpression"] == "SUM(SALES)"
|
||||
|
||||
|
||||
def test_validation_with_multiple_adhoc_metrics():
|
||||
"""
|
||||
Test validation with multiple adhoc metrics in metrics and orderby.
|
||||
"""
|
||||
metric1 = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "sum(sales)",
|
||||
"label": "Total Sales",
|
||||
}
|
||||
metric2 = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "avg(price)",
|
||||
"label": "Average Price",
|
||||
}
|
||||
|
||||
# Mock datasource
|
||||
mock_datasource = Mock(spec=SqlaTable)
|
||||
mock_datasource.database_id = 1
|
||||
mock_datasource.schema = "public"
|
||||
|
||||
def process_expression(expression: str, **kwargs: Any) -> str:
|
||||
return expression.upper()
|
||||
|
||||
mock_datasource._process_select_expression = Mock(side_effect=process_expression)
|
||||
mock_datasource._process_orderby_expression = Mock(side_effect=process_expression)
|
||||
|
||||
# Create QueryObject with multiple metrics
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
metrics=[metric1, metric2],
|
||||
orderby=[(metric1, False), (metric2, True)],
|
||||
columns=[],
|
||||
extras={},
|
||||
)
|
||||
|
||||
# Validate
|
||||
query_obj.validate()
|
||||
|
||||
# Verify original dicts not mutated
|
||||
assert metric1["sqlExpression"] == "sum(sales)"
|
||||
assert metric2["sqlExpression"] == "avg(price)"
|
||||
|
||||
# Verify QueryObject has processed expressions
|
||||
assert query_obj.metrics[0]["sqlExpression"] == "SUM(SALES)"
|
||||
assert query_obj.metrics[1]["sqlExpression"] == "AVG(PRICE)"
|
||||
assert query_obj.orderby[0][0]["sqlExpression"] == "SUM(SALES)"
|
||||
assert query_obj.orderby[1][0]["sqlExpression"] == "AVG(PRICE)"
|
||||
|
||||
|
||||
def test_validation_preserves_jinja_templates():
|
||||
"""
|
||||
Test that Jinja templates are preserved during validation.
|
||||
|
||||
Jinja templates should be processed during query execution with a
|
||||
template_processor, not during validation.
|
||||
"""
|
||||
metric_with_jinja = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "sum({{ column_name }})",
|
||||
"label": "Dynamic Sum",
|
||||
}
|
||||
|
||||
# Mock datasource
|
||||
mock_datasource = Mock(spec=SqlaTable)
|
||||
mock_datasource.database_id = 1
|
||||
mock_datasource.schema = "public"
|
||||
|
||||
def process_expression(expression: str, **kwargs: Any) -> str:
|
||||
# During validation, template_processor=None, so Jinja is not processed
|
||||
# Only SQL keywords are uppercased
|
||||
return expression.upper()
|
||||
|
||||
mock_datasource._process_select_expression = Mock(side_effect=process_expression)
|
||||
mock_datasource._process_orderby_expression = Mock(side_effect=process_expression)
|
||||
|
||||
# Create QueryObject
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
metrics=[metric_with_jinja],
|
||||
orderby=[(metric_with_jinja, True)],
|
||||
columns=[],
|
||||
extras={},
|
||||
)
|
||||
|
||||
# Validate
|
||||
query_obj.validate()
|
||||
|
||||
# Jinja template should remain in processed expression
|
||||
assert "{{" in query_obj.metrics[0]["sqlExpression"]
|
||||
assert "}}" in query_obj.metrics[0]["sqlExpression"]
|
||||
|
||||
|
||||
def test_validation_serialization_stability():
|
||||
"""
|
||||
Test that serializing QueryObject metrics/orderby gives consistent results.
|
||||
|
||||
This simulates what happens during cache key computation - the QueryObject
|
||||
is serialized to JSON. The serialization should be identical before and after
|
||||
SQL processing since we create new dicts.
|
||||
"""
|
||||
from superset.utils import json
|
||||
|
||||
adhoc_metric = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "sum(num)",
|
||||
"label": "Sum",
|
||||
}
|
||||
|
||||
# Mock datasource
|
||||
mock_datasource = Mock(spec=SqlaTable)
|
||||
mock_datasource.database_id = 1
|
||||
mock_datasource.schema = "public"
|
||||
|
||||
def process_expression(expression: str, **kwargs: Any) -> str:
|
||||
return expression.upper()
|
||||
|
||||
mock_datasource._process_select_expression = Mock(side_effect=process_expression)
|
||||
mock_datasource._process_orderby_expression = Mock(side_effect=process_expression)
|
||||
|
||||
# Create QueryObject
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
metrics=[adhoc_metric],
|
||||
orderby=[(adhoc_metric, True)],
|
||||
columns=[],
|
||||
extras={},
|
||||
)
|
||||
|
||||
# Validate
|
||||
query_obj.validate()
|
||||
|
||||
# Serialize the metrics and orderby
|
||||
metrics_json_1 = json.dumps(query_obj.metrics, sort_keys=True)
|
||||
orderby_json_1 = json.dumps(
|
||||
[(col, asc) for col, asc in query_obj.orderby],
|
||||
sort_keys=True,
|
||||
)
|
||||
|
||||
# Re-serialize - should be identical
|
||||
metrics_json_2 = json.dumps(query_obj.metrics, sort_keys=True)
|
||||
orderby_json_2 = json.dumps(
|
||||
[(col, asc) for col, asc in query_obj.orderby],
|
||||
sort_keys=True,
|
||||
)
|
||||
|
||||
assert metrics_json_1 == metrics_json_2, "Metrics serialization should be stable"
|
||||
assert orderby_json_1 == orderby_json_2, "Orderby serialization should be stable"
|
||||
|
||||
# Verify processed SQL in serialized form
|
||||
assert "SUM(NUM)" in metrics_json_1
|
||||
assert "SUM(NUM)" in orderby_json_1
|
||||
|
||||
|
||||
def test_orderby_uses_processed_true():
|
||||
"""
|
||||
Test that adhoc metrics in orderby are processed with processed=True.
|
||||
|
||||
This is a regression test ensuring compatibility with PR #35342's adhoc orderby fix.
|
||||
|
||||
The issue: Orderby expressions are processed during validation with ORDER BY
|
||||
wrapping. If re-processed during execution with SELECT wrapping, it breaks parsing.
|
||||
|
||||
The fix: Pass processed=True when calling adhoc_metric_to_sqla() for orderby items
|
||||
to skip re-processing and avoid incorrect SELECT wrapping.
|
||||
"""
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from superset.models.helpers import ExploreMixin
|
||||
|
||||
# Create an adhoc metric that would be used in orderby
|
||||
adhoc_metric = {
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "COUNT(*)",
|
||||
"label": "count_metric",
|
||||
}
|
||||
|
||||
# Mock the datasource
|
||||
mock_datasource = MagicMock()
|
||||
mock_datasource.database_id = 1
|
||||
mock_datasource.database.backend = "postgresql"
|
||||
mock_datasource.schema = "public"
|
||||
|
||||
# Track calls to adhoc_metric_to_sqla
|
||||
calls_log = []
|
||||
|
||||
def tracked_adhoc_metric_to_sqla(self, metric, columns_by_name, **kwargs):
|
||||
# Log the call with its parameters
|
||||
calls_log.append(
|
||||
{
|
||||
"metric": metric,
|
||||
"processed": kwargs.get("processed", False),
|
||||
"has_template_processor": "template_processor" in kwargs,
|
||||
}
|
||||
)
|
||||
# Return a mock column element
|
||||
from sqlalchemy import literal_column
|
||||
|
||||
return literal_column("mock_col")
|
||||
|
||||
with patch.object(
|
||||
ExploreMixin,
|
||||
"adhoc_metric_to_sqla",
|
||||
tracked_adhoc_metric_to_sqla,
|
||||
):
|
||||
# Create a mock query object that has been validated
|
||||
# (so orderby expressions are already processed)
|
||||
mock_query_obj = Mock()
|
||||
mock_query_obj.metrics = [adhoc_metric]
|
||||
mock_query_obj.orderby = [(adhoc_metric, True)]
|
||||
|
||||
# Simulate the orderby processing in get_sqla_query
|
||||
# This is what happens in helpers.py around line 1868
|
||||
from superset.utils import core as utils
|
||||
|
||||
if isinstance(adhoc_metric, dict) and utils.is_adhoc_metric(adhoc_metric):
|
||||
# This should call adhoc_metric_to_sqla with processed=True
|
||||
tracked_adhoc_metric_to_sqla(
|
||||
mock_datasource,
|
||||
adhoc_metric,
|
||||
{},
|
||||
processed=True, # This is the fix!
|
||||
)
|
||||
|
||||
# Verify that the call was made with processed=True
|
||||
assert len(calls_log) >= 1, "adhoc_metric_to_sqla should have been called"
|
||||
orderby_call = calls_log[-1]
|
||||
assert orderby_call["processed"] is True, (
|
||||
"Orderby adhoc metrics must be called with processed=True to avoid "
|
||||
"re-processing with incorrect SELECT wrapping (should use ORDER BY wrapping)"
|
||||
)
|
||||
Reference in New Issue
Block a user