mirror of
https://github.com/apache/superset.git
synced 2026-06-27 10:29:21 +00:00
Compare commits
6 Commits
chore/ci-c
...
fix/saniti
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
91a56e0a25 | ||
|
|
0228ff4f33 | ||
|
|
401d218992 | ||
|
|
1a998e61a8 | ||
|
|
f5a841b320 | ||
|
|
ee0a93771f |
@@ -1843,16 +1843,39 @@ def process_jinja_sql(
|
||||
|
||||
def sanitize_clause(clause: str, engine: str) -> str:
|
||||
"""
|
||||
Make sure the SQL clause is valid.
|
||||
Validate a SQL clause and return it unchanged.
|
||||
|
||||
The clause is parsed to ensure it is a single, well-formed statement. We
|
||||
intentionally return the *original* text rather than a re-rendered version:
|
||||
round-tripping user SQL through SQLGlot's dialect generator can silently
|
||||
alter semantics. For example, the Postgres dialect (borrowed by several
|
||||
engines) rewrites ``ROUND(AVG(x), n)`` to ``ROUND(CAST(AVG(x) AS DECIMAL),
|
||||
n)``, which rounds the value to an integer before the explicit ``ROUND`` on
|
||||
engines whose unqualified ``DECIMAL`` defaults to scale 0 (see #36113).
|
||||
|
||||
Comments are the one exception: a trailing line comment can comment out
|
||||
surrounding SQL once the clause is embedded into a larger query (e.g.
|
||||
wrapped in parentheses), so any clause that contains comments is re-rendered
|
||||
to normalize them into a safe form. That re-rendering uses the *base* dialect
|
||||
rather than the engine dialect, so it normalizes comments without re-applying
|
||||
the engine-specific rewrites (e.g. the Postgres ``ROUND``/``CAST`` rewrite
|
||||
from #36113) that we deliberately avoid above. A trailing statement
|
||||
terminator is likewise stripped, since callers embed the clause inside a
|
||||
larger fragment (``WHERE (...)``) where a stray ``;`` would produce invalid
|
||||
SQL.
|
||||
"""
|
||||
try:
|
||||
statement = SQLStatement(clause, engine)
|
||||
parsed = statement._parsed # pylint: disable=protected-access
|
||||
if not any(node.comments for node in parsed.walk()):
|
||||
return clause.rstrip().rstrip(";").rstrip()
|
||||
|
||||
return _normalized_generator(
|
||||
SQLGLOT_DIALECTS.get(engine),
|
||||
None,
|
||||
pretty=False,
|
||||
comments=True,
|
||||
).generate(
|
||||
statement._parsed, # pylint: disable=protected-access
|
||||
parsed,
|
||||
copy=True,
|
||||
)
|
||||
except SupersetParseError as ex:
|
||||
|
||||
@@ -804,7 +804,7 @@ def test_get_samples_with_multiple_filters(
|
||||
assert "2000-01-02" in rv.json["result"]["query"]
|
||||
assert "2000-01-04" in rv.json["result"]["query"]
|
||||
assert "col3 = 1.2" in rv.json["result"]["query"]
|
||||
assert "col4 IS NULL" in rv.json["result"]["query"]
|
||||
assert "col4 is null" in rv.json["result"]["query"]
|
||||
assert "col2 = 'c'" in rv.json["result"]["query"]
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ import re
|
||||
from datetime import datetime
|
||||
from typing import Any, cast, Literal, NamedTuple, Optional, Union
|
||||
from re import Pattern
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
@@ -138,8 +138,8 @@ class TestDatabaseModel(SupersetTestCase):
|
||||
assert col.is_temporal
|
||||
|
||||
@patch("superset.jinja_context.get_username", return_value="abc")
|
||||
def test_jinja_metrics_and_calc_columns(self, mock_username):
|
||||
base_query_obj = {
|
||||
def test_jinja_metrics_and_calc_columns(self, mock_username: MagicMock) -> None:
|
||||
base_query_obj: dict[str, Any] = {
|
||||
"granularity": None,
|
||||
"from_dttm": None,
|
||||
"to_dttm": None,
|
||||
@@ -199,8 +199,8 @@ class TestDatabaseModel(SupersetTestCase):
|
||||
assert "'foo_P1D'" in query
|
||||
# assert dataset saved metric
|
||||
assert "count('bar_P1D')" in query
|
||||
# assert adhoc metric
|
||||
assert "SUM(CASE WHEN user = 'user_abc' THEN 1 ELSE 0 END)" in query
|
||||
# assert adhoc metric (sanitize_clause preserves the user's SQL verbatim)
|
||||
assert "SUM(case when user = 'user_abc' then 1 else 0 end)" in query
|
||||
# Cleanup
|
||||
db.session.delete(table)
|
||||
db.session.commit()
|
||||
|
||||
@@ -3123,7 +3123,8 @@ def test_is_valid_cvas(sql: str, engine: str, expected: bool) -> None:
|
||||
"sql, expected, engine",
|
||||
[
|
||||
("col = 1", "col = 1", "base"),
|
||||
("1=\t\n1", "1 = 1", "base"),
|
||||
# Comment-free clauses are returned verbatim (no semantic round-trip).
|
||||
("1=\t\n1", "1=\t\n1", "base"),
|
||||
("(col = 1)", "(col = 1)", "base"), # Compact format without newlines
|
||||
(
|
||||
"(col1 = 1) AND (col2 = 2)",
|
||||
@@ -3147,6 +3148,10 @@ def test_is_valid_cvas(sql: str, engine: str, expected: bool) -> None:
|
||||
), # Block comments preserved
|
||||
("col = 'col1 = 1) AND (col2 = 2'", "col = 'col1 = 1) AND (col2 = 2'", "base"),
|
||||
("col = 'select 1; select 2'", "col = 'select 1; select 2'", "base"),
|
||||
# Trailing statement terminators are stripped so the clause stays valid
|
||||
# once embedded inside a larger fragment (e.g. ``WHERE (...)``).
|
||||
("col = 1;", "col = 1", "base"),
|
||||
("col = 1 ; ", "col = 1", "base"),
|
||||
("col = 'abc -- comment'", "col = 'abc -- comment'", "base"),
|
||||
("col1 = 1) AND (col2 = 2)", QueryClauseValidationException, "base"),
|
||||
("(col1 = 1) AND (col2 = 2", QueryClauseValidationException, "base"),
|
||||
@@ -3208,6 +3213,63 @@ def test_sanitize_clause(sql: str, expected: str | Exception, engine: str) -> No
|
||||
sanitize_clause(sql, engine)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
["postgresql", "redshift", "cockroachdb", "netezza", "hana", "base", "mysql"],
|
||||
)
|
||||
def test_sanitize_clause_preserves_aggregation_semantics(engine: str) -> None:
|
||||
"""
|
||||
Regression test for https://github.com/apache/superset/issues/36113.
|
||||
|
||||
`sanitize_clause` must not silently rewrite a user-authored expression. The
|
||||
Postgres SQLGlot dialect (which several engines borrow) rewrites
|
||||
``ROUND(AVG(x), n)`` to ``ROUND(CAST(AVG(x) AS DECIMAL), n)`` at generation
|
||||
time. On engines whose unqualified ``DECIMAL`` defaults to scale 0 (e.g.
|
||||
Redshift, Netezza) the injected cast rounds the aggregate to an integer
|
||||
*before* the explicit ``ROUND``, producing wrong results.
|
||||
|
||||
The clause must be returned unchanged regardless of the engine dialect.
|
||||
"""
|
||||
clause = "ROUND(AVG(col), 4)"
|
||||
sanitized = sanitize_clause(clause, engine)
|
||||
assert "CAST" not in sanitized.upper(), (
|
||||
f"sanitize_clause injected a cast for engine {engine!r}: {sanitized!r}"
|
||||
)
|
||||
assert sanitized == clause
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
["postgresql", "redshift", "cockroachdb", "netezza", "hana", "base", "mysql"],
|
||||
)
|
||||
def test_sanitize_clause_preserves_aggregation_semantics_with_comment(
|
||||
engine: str,
|
||||
) -> None:
|
||||
"""
|
||||
Regression test for https://github.com/apache/superset/issues/36113.
|
||||
|
||||
A clause that contains a comment takes the re-rendering branch of
|
||||
``sanitize_clause``. That branch must normalize comments using the *base*
|
||||
dialect rather than the engine dialect, so it must not re-apply the Postgres
|
||||
``ROUND(AVG(x), n)`` -> ``ROUND(CAST(AVG(x) AS DECIMAL), n)`` rewrite that
|
||||
truncates results on engines where ``DECIMAL`` defaults to scale 0.
|
||||
"""
|
||||
clause = "ROUND(AVG(col), 4) /* precise_count_distinct=true */"
|
||||
sanitized = sanitize_clause(clause, engine)
|
||||
assert "CAST" not in sanitized.upper(), (
|
||||
f"sanitize_clause injected a cast for engine {engine!r}: {sanitized!r}"
|
||||
)
|
||||
# The comment-handling branch must preserve the user-authored expression and
|
||||
# comment payload, not just avoid the cast (otherwise dropping the comment or
|
||||
# rewriting the clause entirely would still pass the assertion above).
|
||||
assert "ROUND(AVG(col), 4)" in sanitized, (
|
||||
f"sanitize_clause rewrote the clause for engine {engine!r}: {sanitized!r}"
|
||||
)
|
||||
assert "precise_count_distinct=true" in sanitized, (
|
||||
f"sanitize_clause dropped the comment for engine {engine!r}: {sanitized!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user