diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index b49e9ba4d33..c48a222e382 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1652,8 +1652,20 @@ class SqlaTable( if has_timegrain or force_type_check: try: # probe adhoc column type + # Most databases populate cursor.description from query-plan + # metadata, so WHERE FALSE (zero rows, no table scan) is + # preferred — it avoids hitting row-read limits enforced by + # engines like ClickHouse (max_rows_to_read). + # A small number of drivers (Druid, Pinot) instead build + # cursor.description by inspecting the first returned row; + # for those we fall back to LIMIT 1. tbl, _ = self.get_from_clause(template_processor) - qry = sa.select([sqla_column]).limit(1).select_from(tbl) + if self.db_engine_spec.type_probe_needs_row: + qry = sa.select([sqla_column]).limit(1).select_from(tbl) + else: + qry = ( + sa.select([sqla_column]).where(sa.false()).select_from(tbl) + ) sql = self.database.compile_sqla_query( qry, catalog=self.catalog, diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 965eec46b12..82d26c2bd2b 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -526,6 +526,14 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods force_column_alias_quotes = False arraysize = 0 max_column_name_length: int | None = None + + # Some databases (e.g. Druid, Pinot) build cursor.description by inspecting + # the values in the first returned row rather than from query-plan metadata. + # For those engines WHERE FALSE returns no rows and therefore leaves + # cursor.description as None, which breaks the adhoc column type probe. + # Set this to True on any engine spec where at least one row must be + # fetched for cursor.description to be populated. + type_probe_needs_row: bool = False try_remove_schema_from_table_name = True # pylint: disable=invalid-name run_multiple_statements_as_one = False custom_errors: dict[ diff --git a/superset/db_engine_specs/druid.py b/superset/db_engine_specs/druid.py index 453231c3910..f80c1e1d353 100644 --- a/superset/db_engine_specs/druid.py +++ b/superset/db_engine_specs/druid.py @@ -46,6 +46,10 @@ class DruidEngineSpec(BaseEngineSpec): allows_joins = is_feature_enabled("DRUID_JOINS") allows_subqueries = True + # pydruid builds cursor.description from the first returned row, so a + # WHERE FALSE query (zero rows) leaves description as None. + type_probe_needs_row = True + metadata = { "description": ( "Apache Druid is a high performance real-time analytics database." diff --git a/superset/db_engine_specs/pinot.py b/superset/db_engine_specs/pinot.py index 9e0b8a888da..7942df79efe 100644 --- a/superset/db_engine_specs/pinot.py +++ b/superset/db_engine_specs/pinot.py @@ -31,6 +31,10 @@ class PinotEngineSpec(BaseEngineSpec): allows_alias_in_select = False allows_alias_in_orderby = False + # pinotdb only sets cursor.description when the response contains + # columnDataTypes, which Pinot omits for zero-row results. + type_probe_needs_row = True + metadata = { "description": "Apache Pinot is a real-time distributed OLAP datastore.", "logo": "apache-pinot.svg", diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index c0354844737..6a423d5f828 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -1856,3 +1856,112 @@ def test_extras_having_is_parenthesized( assert "(COUNT(*) > 0 OR 1 = 1)" in sql, ( f"extras.having should be wrapped in parentheses. Generated SQL: {sql}" ) + + +def _run_probe( + database: Database, + type_probe_needs_row: bool = False, +) -> str: + """ + Run adhoc_column_to_sqla with a mocked get_columns_description and + return the SQL that was passed to it. + + ``type_probe_needs_row`` is patched onto the database's real engine spec + class so every other method keeps working normally. + """ + from unittest.mock import patch + + from superset.connectors.sqla.models import SqlaTable, TableColumn + + table = SqlaTable( + database=database, + schema=None, + table_name="t", + columns=[TableColumn(column_name="a", type="INTEGER")], + ) + + adhoc_col: AdhocColumn = { + "sqlExpression": "round(a / 50) * 50 / 1000", + "label": "Duration", + "columnType": "BASE_AXIS", + "timeGrain": "P1D", + } + captured: list[str] = [] + + def fake_get_columns_description( + db: object, + catalog: object, + schema: object, + sql: str, + ) -> list[dict[str, object]]: + captured.append(sql) + return [ + { + "column_name": "Duration", + "name": "Duration", + "type": "FLOAT", + "is_dttm": False, + } + ] + + spec_cls = database.db_engine_spec + with ( + patch( + "superset.connectors.sqla.models.get_columns_description", + side_effect=fake_get_columns_description, + ), + patch.object(spec_cls, "type_probe_needs_row", type_probe_needs_row), + ): + result = table.adhoc_column_to_sqla(adhoc_col) + + assert result is not None + assert len(captured) == 1, "Expected exactly one type-probe query" + return captured[0] + + +def test_adhoc_column_type_probe_uses_where_false(database: Database) -> None: + """ + Most engines populate cursor.description from query-plan metadata, so the + probe uses WHERE FALSE — zero rows read, no table scan. + + SQLAlchemy renders sa.false() differently per dialect: + - Most databases: WHERE false + - SQLite: WHERE 0 = 1 + """ + probe_sql = _run_probe(database, type_probe_needs_row=False).lower() + + always_false_patterns = ("where false", "where 0 = 1") + assert any(p in probe_sql for p in always_false_patterns), ( + f"Probe query should use a WHERE false condition to avoid scanning the " + f"table, but got: {probe_sql}" + ) + assert "limit" not in probe_sql, ( + f"WHERE false probe must not contain LIMIT, but got: {probe_sql}" + ) + + +def test_adhoc_column_type_probe_uses_limit_1_for_row_dependent_engines( + database: Database, +) -> None: + """ + Druid and Pinot build cursor.description by inspecting the first returned + row. WHERE FALSE yields no rows, so description stays None. These engines + must fall back to LIMIT 1. + """ + from superset.db_engine_specs.druid import DruidEngineSpec + from superset.db_engine_specs.pinot import PinotEngineSpec + + for spec_cls in (DruidEngineSpec, PinotEngineSpec): + assert spec_cls.type_probe_needs_row is True, ( + f"{spec_cls.__name__} must declare type_probe_needs_row = True" + ) + + probe_sql = _run_probe(database, type_probe_needs_row=True).lower() + + assert "limit 1" in probe_sql, ( + f"Row-dependent engines: probe should use LIMIT 1, got: {probe_sql}" + ) + always_false_patterns = ("where false", "where 0 = 1") + assert not any(p in probe_sql for p in always_false_patterns), ( + f"Row-dependent engines: probe must NOT use WHERE false, got: {probe_sql}" + )