fix(sqla): parenthesize calculated column expressions in WHERE clause (#39793)

Co-authored-by: Brian Donovan <briand@netflix.com>
Co-authored-by: Vitor Avila <96086495+Vitor-Avila@users.noreply.github.com>
This commit is contained in:
bdonovan1
2026-05-06 15:45:27 -07:00
committed by GitHub
parent 4aa4415d8f
commit 5b5dd01028
2 changed files with 237 additions and 0 deletions

View File

@@ -3084,6 +3084,14 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
sqla_col = self.convert_tbl_column_to_sqla_col(
tbl_column=col_obj, template_processor=template_processor
)
# Parenthesize expression-based columns to prevent operator
# precedence issues (e.g. OR in a calculated column breaking
# surrounding AND filters). Same pattern as extras.where
# wrapping added in PR #38183.
if sqla_col is not None and (
(col_obj and col_obj.expression) or is_adhoc_column(flt_col)
):
sqla_col = Grouping(sqla_col)
col_type = col_obj.type if col_obj else None
col_spec = db_engine_spec.get_column_spec(native_type=col_type)
is_list_target = op in (

View File

@@ -1937,6 +1937,235 @@ def test_extras_having_is_parenthesized(
)
def test_calculated_column_filter_is_parenthesized(
database: Database,
) -> None:
"""
Test that calculated column expressions containing OR are wrapped in
parentheses when used in WHERE filters.
Without parentheses, a calculated column expression like
``status = 'active' OR status = 'pending'`` combined with other filters
via AND would produce unexpected evaluation order due to SQL operator
precedence (AND binds tighter than OR), potentially dropping time range
and other filters. Same class of bug as fixed in PR #38183 for
extras.where/having, but on the calculated column filter path.
"""
from superset.connectors.sqla.models import SqlaTable, TableColumn
table = SqlaTable(
database=database,
schema=None,
table_name="t",
columns=[
TableColumn(column_name="a", type="INTEGER"),
TableColumn(
column_name="is_active",
expression="status = 'active' OR status = 'pending'",
type="BOOLEAN",
),
],
)
sqla_query = table.get_sqla_query(
columns=["a"],
filter=[
{
"col": "is_active",
"op": "IS TRUE",
"val": None,
},
],
extras={},
is_timeseries=False,
metrics=[],
)
with database.get_sqla_engine() as engine:
sql = str(
sqla_query.sqla_query.compile(
dialect=engine.dialect,
compile_kwargs={"literal_binds": True},
)
)
assert "(status = 'active' OR status = 'pending')" in sql, (
f"Calculated column expression should be wrapped in parentheses. "
f"Generated SQL: {sql}"
)
def test_calculated_column_nested_or_and_is_parenthesized(
database: Database,
) -> None:
"""
Test that calculated column expressions with nested OR/AND combinations
are correctly parenthesized as a single unit in WHERE filters.
"""
from superset.connectors.sqla.models import SqlaTable, TableColumn
table = SqlaTable(
database=database,
schema=None,
table_name="t",
columns=[
TableColumn(column_name="a", type="INTEGER"),
TableColumn(
column_name="is_target",
expression=(
"(status = 'active' AND region = 'US') "
"OR (status = 'pending' AND region = 'EU')"
),
type="BOOLEAN",
),
],
)
sqla_query = table.get_sqla_query(
columns=["a"],
filter=[
{
"col": "is_target",
"op": "IS TRUE",
"val": None,
},
],
extras={},
is_timeseries=False,
metrics=[],
)
with database.get_sqla_engine() as engine:
sql = str(
sqla_query.sqla_query.compile(
dialect=engine.dialect,
compile_kwargs={"literal_binds": True},
)
)
assert (
"((status = 'active' AND region = 'US') "
"OR (status = 'pending' AND region = 'EU'))"
) in sql, (
f"Nested OR/AND expression should be wrapped in parentheses. "
f"Generated SQL: {sql}"
)
def test_calculated_column_non_boolean_filter_is_parenthesized(
database: Database,
) -> None:
"""
Test that non-boolean calculated column expressions are parenthesized
when used with IN filters.
"""
from superset.connectors.sqla.models import SqlaTable, TableColumn
table = SqlaTable(
database=database,
schema=None,
table_name="t",
columns=[
TableColumn(column_name="a", type="INTEGER"),
TableColumn(
column_name="full_name",
expression="first_name || ' ' || last_name",
type="TEXT",
),
],
)
sqla_query = table.get_sqla_query(
columns=["a"],
filter=[
{
"col": "full_name",
"op": "IN",
"val": ["John Doe", "Jane Doe"],
},
],
extras={},
is_timeseries=False,
metrics=[],
)
with database.get_sqla_engine() as engine:
sql = str(
sqla_query.sqla_query.compile(
dialect=engine.dialect,
compile_kwargs={"literal_binds": True},
)
)
assert "(first_name || ' ' || last_name)" in sql, (
f"Non-boolean calculated column should be wrapped in parentheses. "
f"Generated SQL: {sql}"
)
def test_multiple_calculated_columns_each_parenthesized(
database: Database,
) -> None:
"""
Test that multiple calculated columns used as filters are each
independently wrapped in parentheses.
"""
from superset.connectors.sqla.models import SqlaTable, TableColumn
table = SqlaTable(
database=database,
schema=None,
table_name="t",
columns=[
TableColumn(column_name="a", type="INTEGER"),
TableColumn(
column_name="is_active",
expression="status = 'active' OR status = 'pending'",
type="BOOLEAN",
),
TableColumn(
column_name="is_premium",
expression="tier = 'gold' OR tier = 'platinum'",
type="BOOLEAN",
),
],
)
sqla_query = table.get_sqla_query(
columns=["a"],
filter=[
{
"col": "is_active",
"op": "IS TRUE",
"val": None,
},
{
"col": "is_premium",
"op": "IS TRUE",
"val": None,
},
],
extras={},
is_timeseries=False,
metrics=[],
)
with database.get_sqla_engine() as engine:
sql = str(
sqla_query.sqla_query.compile(
dialect=engine.dialect,
compile_kwargs={"literal_binds": True},
)
)
assert "(status = 'active' OR status = 'pending')" in sql, (
f"First calculated column should be parenthesized. Generated SQL: {sql}"
)
assert "(tier = 'gold' OR tier = 'platinum')" in sql, (
f"Second calculated column should be parenthesized. Generated SQL: {sql}"
)
def _run_probe(
database: Database,
type_probe_needs_row: bool = False,