fix(charts): Fix unquoted 'Others' literal in series limit GROUP BY clause (#34390)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Maxime Beauchemin
2025-07-29 17:36:10 -07:00
committed by GitHub
parent 131ae5aa9d
commit bf967d6ba4
2 changed files with 173 additions and 7 deletions

View File

@@ -1290,9 +1290,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
condition = condition_factory(expr.name, expr)
# Create CASE expression: condition true -> original, else "Others"
case_expr = sa.case(
[(condition, expr)], else_=sa.literal_column("Others")
)
case_expr = sa.case([(condition, expr)], else_=sa.literal("Others"))
case_expr = self.make_sqla_column_compatible(case_expr, expr.name)
modified_select_exprs.append(case_expr)
else:
@@ -1308,9 +1306,13 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
# Create CASE expression for groupby
case_expr = sa.case(
[(condition, gby_expr)],
else_=sa.literal_column("Others"),
else_=sa.literal("Others"),
)
case_expr = self.make_sqla_column_compatible(case_expr, col_name)
# Don't apply make_sqla_column_compatible to GROUP BY expressions.
# When make_sqla_column_compatible adds a label to the expression,
# it can cause SQLAlchemy to incorrectly render string literals
# without quotes in the GROUP BY clause (e.g., "ELSE Others"
# instead of "ELSE 'Others'")
modified_groupby_all_columns[col_name] = case_expr
else:
modified_groupby_all_columns[col_name] = gby_expr

View File

@@ -296,7 +296,10 @@ def test_apply_series_others_grouping(database: Database) -> None:
# Category (series column) should be replaced with CASE expression
assert "category" in result_groupby_columns
category_groupby_result = result_groupby_columns["category"]
assert category_groupby_result.name == "category" # Should be made compatible
# After our fix, GROUP BY expressions are NOT wrapped with
# make_sqla_column_compatible, so it should be a raw CASE expression,
# not a Mock with .name attribute. Verify it's different from the original
assert category_groupby_result != category_expr
# Other (non-series column) should remain unchanged
assert result_groupby_columns["other_col"] == other_expr
@@ -359,4 +362,165 @@ def test_apply_series_others_grouping_with_false_condition(database: Database) -
assert len(result_groupby_columns) == 1
assert "category" in result_groupby_columns
assert result_groupby_columns["category"].name == "category"
# GROUP BY expression should be a CASE expression, not the original
assert result_groupby_columns["category"] != category_expr
def test_apply_series_others_grouping_sql_compilation(database: Database) -> None:
"""
Test that the `_apply_series_others_grouping` method properly quotes
the 'Others' literal in both SELECT and GROUP BY clauses.
This test verifies the fix for the bug where 'Others' was not quoted
in the GROUP BY clause, causing SQL syntax errors.
"""
import sqlalchemy as sa
from superset.connectors.sqla.models import SqlaTable, TableColumn
# Create a real table instance
table = SqlaTable(
database=database,
schema=None,
table_name="test_table",
columns=[
TableColumn(column_name="name", type="TEXT"),
TableColumn(column_name="value", type="INTEGER"),
],
)
# Create real SQLAlchemy expressions
name_col = sa.column("name")
value_col = sa.column("value")
select_exprs = [name_col, value_col]
groupby_all_columns = {"name": name_col}
groupby_series_columns = {"name": name_col}
# Condition factory that checks if a subquery column is not null
def condition_factory(col_name: str, expr):
return sa.column("series_limit.name__").is_not(None)
# Call the method
result_select_exprs, result_groupby_columns = table._apply_series_others_grouping(
select_exprs,
groupby_all_columns,
groupby_series_columns,
condition_factory,
)
# Get the database dialect from the actual database
with database.get_sqla_engine() as engine:
dialect = engine.dialect
# Test SELECT expression compilation
select_case_expr = result_select_exprs[0]
select_sql = str(
select_case_expr.compile(
dialect=dialect, compile_kwargs={"literal_binds": True}
)
)
# Test GROUP BY expression compilation
groupby_case_expr = result_groupby_columns["name"]
groupby_sql = str(
groupby_case_expr.compile(
dialect=dialect, compile_kwargs={"literal_binds": True}
)
)
# Different databases may use different quote characters
# PostgreSQL/MySQL use single quotes, some might use double quotes
# The key is that Others should be quoted, not bare
# Check that 'Others' appears with some form of quotes
# and not as a bare identifier
assert " Others " not in select_sql, "Found unquoted 'Others' in SELECT"
assert " Others " not in groupby_sql, "Found unquoted 'Others' in GROUP BY"
# Check for common quoting patterns
has_single_quotes = "'Others'" in select_sql and "'Others'" in groupby_sql
has_double_quotes = '"Others"' in select_sql and '"Others"' in groupby_sql
assert has_single_quotes or has_double_quotes, (
"Others literal should be quoted with either single or double quotes"
)
# Verify the structure of the generated SQL
assert "CASE WHEN" in select_sql
assert "CASE WHEN" in groupby_sql
# Check that ELSE is followed by a quoted value
assert "ELSE " in select_sql
assert "ELSE " in groupby_sql
# The key test is that GROUP BY expression doesn't have a label
# while SELECT might or might not have one depending on the database
# What matters is that GROUP BY should NOT have label
assert " AS " not in groupby_sql # GROUP BY should NOT have label
# Also verify that if SELECT has a label, it's different from GROUP BY
if " AS " in select_sql:
# If labeled, SELECT and GROUP BY should be different
assert select_sql != groupby_sql
def test_apply_series_others_grouping_no_label_in_groupby(database: Database) -> None:
"""
Test that GROUP BY expressions don't get wrapped with make_sqla_column_compatible.
This is a specific test for the bug fix where make_sqla_column_compatible
was causing issues with literal quoting in GROUP BY clauses.
"""
from unittest.mock import ANY, call, Mock, patch
from superset.connectors.sqla.models import SqlaTable, TableColumn
# Create a table instance
table = SqlaTable(
database=database,
schema=None,
table_name="test_table",
columns=[TableColumn(column_name="category", type="TEXT")],
)
# Mock expressions
category_expr = Mock()
category_expr.name = "category"
select_exprs = [category_expr]
groupby_all_columns = {"category": category_expr}
groupby_series_columns = {"category": category_expr}
def condition_factory(col_name: str, expr):
return True
# Track calls to make_sqla_column_compatible
with patch.object(
table, "make_sqla_column_compatible", side_effect=lambda expr, name: expr
) as mock_make_compatible:
result_select_exprs, result_groupby_columns = (
table._apply_series_others_grouping(
select_exprs,
groupby_all_columns,
groupby_series_columns,
condition_factory,
)
)
# Verify make_sqla_column_compatible was called for SELECT expressions
# but NOT for GROUP BY expressions
calls = mock_make_compatible.call_args_list
# Should have exactly one call (for the SELECT expression)
assert len(calls) == 1
# The call should be for the SELECT expression with the column name
# Using unittest.mock.ANY to match any CASE expression
assert calls[0] == call(ANY, "category")
# Verify the GROUP BY expression was NOT passed through
# make_sqla_column_compatible - it should be the raw CASE expression
assert "category" in result_groupby_columns
# The GROUP BY expression should be different from the SELECT expression
# because only SELECT gets make_sqla_column_compatible applied