mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
fix: adhoc column quoting (#36215)
This commit is contained in:
@@ -1509,33 +1509,48 @@ class SqlaTable(
|
||||
:rtype: sqlalchemy.sql.column
|
||||
"""
|
||||
label = utils.get_column_name(col)
|
||||
try:
|
||||
sql_expression = col["sqlExpression"]
|
||||
|
||||
# For column references, conditionally quote identifiers that need it
|
||||
if col.get("isColumnReference"):
|
||||
sql_expression = self.database.quote_identifier(sql_expression)
|
||||
|
||||
expression = self._process_select_expression(
|
||||
expression=sql_expression,
|
||||
database_id=self.database_id,
|
||||
engine=self.database.backend,
|
||||
schema=self.schema,
|
||||
template_processor=template_processor,
|
||||
)
|
||||
except SupersetSecurityException as ex:
|
||||
raise QueryObjectValidationError(ex.message) from ex
|
||||
sql_expression = col["sqlExpression"]
|
||||
time_grain = col.get("timeGrain")
|
||||
has_timegrain = col.get("columnType") == "BASE_AXIS" and time_grain
|
||||
is_dttm = False
|
||||
pdf = None
|
||||
if col_in_metadata := self.get_column(expression):
|
||||
is_column_reference = col.get("isColumnReference")
|
||||
|
||||
# First, check if this is a column reference that exists in metadata
|
||||
col_in_metadata = None
|
||||
if is_column_reference:
|
||||
col_in_metadata = self.get_column(sql_expression)
|
||||
|
||||
if col_in_metadata:
|
||||
# Column exists in metadata - use it directly
|
||||
sqla_column = col_in_metadata.get_sqla_col(
|
||||
template_processor=template_processor
|
||||
)
|
||||
is_dttm = col_in_metadata.is_temporal
|
||||
pdf = col_in_metadata.python_date_format
|
||||
else:
|
||||
# Column doesn't exist in metadata or is not a reference - treat as ad-hoc
|
||||
# expression Note: If isColumnReference=true but column not found, we still
|
||||
# quote it as a fallback for backwards compatibility, though this indicates
|
||||
# the frontend sent incorrect metadata
|
||||
try:
|
||||
# For column references, conditionally quote identifiers that need it
|
||||
expression_to_process = sql_expression
|
||||
if is_column_reference:
|
||||
expression_to_process = self.database.quote_identifier(
|
||||
sql_expression
|
||||
)
|
||||
|
||||
expression = self._process_select_expression(
|
||||
expression=expression_to_process,
|
||||
database_id=self.database_id,
|
||||
engine=self.database.backend,
|
||||
schema=self.schema,
|
||||
template_processor=template_processor,
|
||||
)
|
||||
except SupersetSecurityException as ex:
|
||||
raise QueryObjectValidationError(ex.message) from ex
|
||||
|
||||
sqla_column = literal_column(expression)
|
||||
if has_timegrain or force_type_check:
|
||||
try:
|
||||
|
||||
@@ -408,8 +408,19 @@ class DBEventLogger(AbstractEventLogger):
|
||||
db.session.bulk_save_objects(logs)
|
||||
db.session.commit() # pylint: disable=consider-using-transaction
|
||||
except SQLAlchemyError as ex:
|
||||
# Log errors but don't raise - logging failures should not break the
|
||||
# application. Common in tests where the session may be in prepared state or
|
||||
# db is locked
|
||||
logging.error("DBEventLogger failed to log event(s)")
|
||||
logging.exception(ex)
|
||||
# Rollback to clean up the session state
|
||||
try:
|
||||
db.session.rollback()
|
||||
except Exception: # pylint: disable=broad-except
|
||||
# If rollback also fails, just continue - don't let issues crash the app
|
||||
logging.error(
|
||||
"DBEventLogger failed to rollback the session after failure"
|
||||
)
|
||||
|
||||
|
||||
class StdOutEventLogger(AbstractEventLogger):
|
||||
|
||||
@@ -864,12 +864,14 @@ def test_time_column_with_time_grain(app_context, physical_dataset):
|
||||
"label": "I_AM_AN_ORIGINAL_COLUMN",
|
||||
"sqlExpression": "col5",
|
||||
"timeGrain": "P1Y",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
adhoc_column: AdhocColumn = {
|
||||
"label": "I_AM_A_TRUNC_COLUMN",
|
||||
"sqlExpression": "col6",
|
||||
"columnType": "BASE_AXIS",
|
||||
"timeGrain": "P1Y",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
qc = QueryContextFactory().create(
|
||||
datasource={
|
||||
@@ -1039,6 +1041,7 @@ def test_time_grain_and_time_offset_with_base_axis(app_context, physical_dataset
|
||||
"sqlExpression": "col6",
|
||||
"columnType": "BASE_AXIS",
|
||||
"timeGrain": "P3M",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
qc = QueryContextFactory().create(
|
||||
datasource={
|
||||
@@ -1152,6 +1155,7 @@ def test_time_offset_with_temporal_range_filter(app_context, physical_dataset):
|
||||
"sqlExpression": "col6",
|
||||
"columnType": "BASE_AXIS",
|
||||
"timeGrain": "P3M",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
|
||||
@@ -1395,20 +1395,24 @@ def test_reapply_query_filters_with_empty_filters(database: Database) -> None:
|
||||
|
||||
def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None:
|
||||
"""
|
||||
Test that adhoc_column_to_sqla
|
||||
properly quotes column identifiers when isColumnReference is true.
|
||||
Test that adhoc_column_to_sqla properly handles column references
|
||||
by looking up the column in metadata instead of quoting and processing through
|
||||
SQLGlot.
|
||||
|
||||
This tests the fix for column names with spaces being properly quoted
|
||||
before being processed by SQLGlot to prevent "column AS alias" misinterpretation.
|
||||
This tests the fix for column names with spaces being properly handled
|
||||
without going through SQLGlot which could misinterpret "column AS alias" patterns.
|
||||
"""
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
from superset.connectors.sqla.models import SqlaTable, TableColumn
|
||||
|
||||
table = SqlaTable(
|
||||
table_name="test_table",
|
||||
database=database,
|
||||
columns=[
|
||||
TableColumn(column_name="Customer Name", type="TEXT"),
|
||||
],
|
||||
)
|
||||
|
||||
# Test 1: Column reference with spaces should be quoted
|
||||
# Test: Column reference with spaces should be found in metadata
|
||||
col_with_spaces: AdhocColumn = {
|
||||
"sqlExpression": "Customer Name",
|
||||
"label": "Customer Name",
|
||||
@@ -1417,8 +1421,223 @@ def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None:
|
||||
|
||||
result = table.adhoc_column_to_sqla(col_with_spaces)
|
||||
|
||||
# Should contain the quoted column name
|
||||
# Should return a valid SQLAlchemy column
|
||||
assert result is not None
|
||||
result_str = str(result)
|
||||
|
||||
assert '"Customer Name"' in result_str
|
||||
# The column name should be present (may or may not be quoted depending on dialect)
|
||||
assert "Customer Name" in result_str or '"Customer Name"' in result_str
|
||||
|
||||
|
||||
def test_adhoc_column_to_sqla_preserves_column_type_for_time_grain(
|
||||
database: Database,
|
||||
) -> None:
|
||||
"""
|
||||
Test that adhoc_column_to_sqla preserves column type info in column references.
|
||||
|
||||
This tests the fix where column references now look up metadata first, preserving
|
||||
type information needed for time grain operations. Previously, quoting the column
|
||||
name before metadata lookup would cause the column to not be found, resulting in
|
||||
NULL type and failing to apply time grain transformations properly.
|
||||
|
||||
The test verifies that:
|
||||
1. Column metadata is found by looking up the unquoted column name
|
||||
2. The column type (DATE) is preserved when creating the SQLAlchemy column
|
||||
3. The get_timestamp_expr method is properly called with the column type info
|
||||
"""
|
||||
from superset.connectors.sqla.models import SqlaTable, TableColumn
|
||||
|
||||
# Create a table with a temporal column
|
||||
table = SqlaTable(
|
||||
table_name="test_table",
|
||||
database=database,
|
||||
columns=[
|
||||
TableColumn(
|
||||
column_name="local_date",
|
||||
type="DATE",
|
||||
is_dttm=True,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
# Test with a DATE column reference with time grain
|
||||
date_col: AdhocColumn = {
|
||||
"sqlExpression": "local_date",
|
||||
"label": "local_date",
|
||||
"isColumnReference": True,
|
||||
"timeGrain": "P1D", # Daily time grain
|
||||
"columnType": "BASE_AXIS",
|
||||
}
|
||||
|
||||
# Should not raise ColumnNotFoundException
|
||||
result = table.adhoc_column_to_sqla(date_col)
|
||||
|
||||
assert result is not None
|
||||
result_str = str(result)
|
||||
|
||||
# Verify the column name is present (may be quoted depending on dialect)
|
||||
assert "local_date" in result_str
|
||||
|
||||
|
||||
def test_adhoc_column_to_sqla_with_temporal_column_types(database: Database) -> None:
|
||||
"""
|
||||
Test that adhoc_column_to_sqla correctly handles different temporal column types.
|
||||
|
||||
This verifies that for different temporal types (DATE, DATETIME, TIMESTAMP),
|
||||
the column metadata is properly found and the column type is preserved,
|
||||
allowing time grain operations to work correctly.
|
||||
"""
|
||||
from superset.connectors.sqla.models import SqlaTable, TableColumn
|
||||
|
||||
# Test different temporal types
|
||||
temporal_types = ["DATE", "DATETIME", "TIMESTAMP"]
|
||||
|
||||
for type_name in temporal_types:
|
||||
table = SqlaTable(
|
||||
table_name="test_table",
|
||||
database=database,
|
||||
columns=[
|
||||
TableColumn(
|
||||
column_name="time_col",
|
||||
type=type_name,
|
||||
is_dttm=True,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
time_col: AdhocColumn = {
|
||||
"sqlExpression": "time_col",
|
||||
"label": "time_col",
|
||||
"isColumnReference": True,
|
||||
"timeGrain": "P1D",
|
||||
"columnType": "BASE_AXIS",
|
||||
}
|
||||
|
||||
result = table.adhoc_column_to_sqla(time_col)
|
||||
|
||||
assert result is not None
|
||||
result_str = str(result)
|
||||
|
||||
# Verify the column name is present
|
||||
assert "time_col" in result_str
|
||||
|
||||
|
||||
def test_adhoc_column_with_spaces_generates_quoted_sql(database: Database) -> None:
|
||||
"""
|
||||
Test that column names with spaces are properly quoted in the generated SQL.
|
||||
|
||||
This verifies that even though we look up columns using unquoted names,
|
||||
the final SQL still properly quotes column names that need quoting (like those with
|
||||
spaces).
|
||||
"""
|
||||
|
||||
from superset.connectors.sqla.models import SqlaTable, TableColumn
|
||||
|
||||
table = SqlaTable(
|
||||
table_name="test_table",
|
||||
database=database,
|
||||
columns=[
|
||||
TableColumn(column_name="Customer Name", type="TEXT"),
|
||||
TableColumn(column_name="Order Total", type="NUMERIC"),
|
||||
],
|
||||
)
|
||||
|
||||
# Test column reference with spaces
|
||||
col_with_spaces: AdhocColumn = {
|
||||
"sqlExpression": "Customer Name",
|
||||
"label": "Customer Name",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
|
||||
result = table.adhoc_column_to_sqla(col_with_spaces)
|
||||
|
||||
# Compile the column to SQL to see how it's rendered
|
||||
with database.get_sqla_engine() as engine:
|
||||
sql = str(
|
||||
result.compile(
|
||||
dialect=engine.dialect, compile_kwargs={"literal_binds": True}
|
||||
)
|
||||
)
|
||||
|
||||
# The SQL should quote the column name (SQLite uses double quotes)
|
||||
# Column names with spaces MUST be quoted in SQL
|
||||
assert '"Customer Name"' in sql, f"Expected quoted column name in SQL: {sql}"
|
||||
|
||||
# Also test that it works in a query context
|
||||
col_numeric: AdhocColumn = {
|
||||
"sqlExpression": "Order Total",
|
||||
"label": "Order Total",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
|
||||
result_numeric = table.adhoc_column_to_sqla(col_numeric)
|
||||
|
||||
with database.get_sqla_engine() as engine:
|
||||
sql_numeric = str(
|
||||
result_numeric.compile(
|
||||
dialect=engine.dialect, compile_kwargs={"literal_binds": True}
|
||||
)
|
||||
)
|
||||
|
||||
assert '"Order Total"' in sql_numeric, (
|
||||
f"Expected quoted column name in SQL: {sql_numeric}"
|
||||
)
|
||||
|
||||
|
||||
def test_adhoc_column_with_spaces_in_full_query(database: Database) -> None:
|
||||
"""
|
||||
Test that column names with spaces work correctly in a full SELECT query.
|
||||
|
||||
This demonstrates that the fix properly handles column names with spaces
|
||||
throughout the entire query generation process, with proper quoting in the final
|
||||
SQL.
|
||||
"""
|
||||
import sqlalchemy as sa
|
||||
|
||||
from superset.connectors.sqla.models import SqlaTable, TableColumn
|
||||
|
||||
table = SqlaTable(
|
||||
table_name="test_table",
|
||||
database=database,
|
||||
columns=[
|
||||
TableColumn(column_name="Customer Name", type="TEXT"),
|
||||
TableColumn(column_name="Order Total", type="NUMERIC"),
|
||||
],
|
||||
)
|
||||
|
||||
# Create adhoc columns for both columns with spaces
|
||||
customer_col: AdhocColumn = {
|
||||
"sqlExpression": "Customer Name",
|
||||
"label": "Customer Name",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
|
||||
order_col: AdhocColumn = {
|
||||
"sqlExpression": "Order Total",
|
||||
"label": "Order Total",
|
||||
"isColumnReference": True,
|
||||
}
|
||||
|
||||
# Get SQLAlchemy columns
|
||||
customer_sqla = table.adhoc_column_to_sqla(customer_col)
|
||||
order_sqla = table.adhoc_column_to_sqla(order_col)
|
||||
|
||||
# Build a full query
|
||||
tbl = table.get_sqla_table()
|
||||
query = sa.select(customer_sqla, order_sqla).select_from(tbl)
|
||||
|
||||
# Compile to SQL
|
||||
with database.get_sqla_engine() as engine:
|
||||
sql = str(
|
||||
query.compile(
|
||||
dialect=engine.dialect, compile_kwargs={"literal_binds": True}
|
||||
)
|
||||
)
|
||||
|
||||
# Verify both column names are quoted in the final SQL
|
||||
assert '"Customer Name"' in sql, f"Customer Name not properly quoted in SQL: {sql}"
|
||||
assert '"Order Total"' in sql, f"Order Total not properly quoted in SQL: {sql}"
|
||||
|
||||
# Verify SELECT and FROM clauses are present
|
||||
assert "SELECT" in sql
|
||||
assert "FROM" in sql
|
||||
|
||||
Reference in New Issue
Block a user