fix(SQL Lab): handle columns without names (#38986)

2026-04-19 16:14:52 +00:00 · 2026-04-06 10:09:16 -04:00
parent d796543f5a
commit 12eb40db01
2 changed files with 99 additions and 3 deletions
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -99,6 +99,38 @@ def convert_to_string(value: Any) -> str:
    return str(value)


+def normalize_cursor_description_names(
+    cursor_description: DbapiDescription,
+) -> list[str]:
+    """
+    Replace empty cursor.description names with synthetic names that do not
+    collide with any explicit column names.
+    """
+    normalized_names: list[str] = []
+    unavailable_names = {
+        convert_to_string(col[0])
+        for col in cursor_description
+        if convert_to_string(col[0])
+    }
+    synthetic_index = 0
+
+    for col in cursor_description:
+        column_name = convert_to_string(col[0])
+        if column_name:
+            normalized_names.append(column_name)
+            continue
+
+        while True:
+            synthetic_name = f"_col_{synthetic_index}"
+            synthetic_index += 1
+            if synthetic_name not in unavailable_names:
+                unavailable_names.add(synthetic_name)
+                normalized_names.append(synthetic_name)
+                break
+
+    return normalized_names
+
+
 class SupersetResultSet:
    def __init__(  # pylint: disable=too-many-locals  # noqa: C901
        self,
@@ -116,9 +148,14 @@ class SupersetResultSet:

        if cursor_description:
            # get deduped list of column names
-            column_names = dedup(
-                [convert_to_string(col[0]) for col in cursor_description]
-            )
+            # Some databases (e.g. SQL Server) return an empty string as the
+            # column name for un-aliased expressions like SELECT COUNT(*).
+            # An empty field name is illegal in NumPy structured arrays and in
+            # PyArrow tables, so we substitute a synthetic name when needed.
+            # Synthetic names are chosen to avoid colliding with any explicit
+            # column names before deduplication runs.
+            # See https://github.com/apache/superset/issues/23848
+            column_names = dedup(normalize_cursor_description_names(cursor_description))

            # fix cursor descriptor with the deduped names
            deduped_cursor_desc = [
--- a/tests/unit_tests/result_set_test.py
+++ b/tests/unit_tests/result_set_test.py
@@ -185,3 +185,62 @@ def test_get_column_description_from_empty_data_using_cursor_description(
    )
    assert any(col.get("column_name") == "__time" for col in result_set.columns)
    logger.exception.assert_not_called()
+
+
+def test_empty_column_names_get_synthetic_names() -> None:
+    """
+    SQL Server returns an empty-string column name in cursor.description for
+    any un-aliased expression (e.g. ``SELECT COUNT(*) FROM t``).  An empty
+    field name is illegal in NumPy structured arrays and PyArrow tables.
+
+    SupersetResultSet must replace empty column names with synthetic names
+    so queries like ``SELECT COUNT(*) FROM t`` succeed on MSSQL.
+
+    Regression test for https://github.com/apache/superset/issues/23848
+    """
+    data = [(42,)]
+    description = [("", 3, None, None, None, None, None)]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    assert result_set.columns[0]["column_name"] == "_col_0"
+    df = result_set.to_pandas_df()
+    assert list(df.columns) == ["_col_0"]
+    assert df["_col_0"].iloc[0] == 42
+
+
+def test_multiple_empty_column_names_get_unique_synthetic_names() -> None:
+    """
+    When several columns have empty names (e.g. ``SELECT COUNT(*), SUM(x)``
+    on MSSQL), each must receive a distinct synthetic name.
+    """
+    data = [(10, 20)]
+    description = [
+        ("", 3, None, None, None, None, None),
+        ("", 3, None, None, None, None, None),
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    col_names = [c["column_name"] for c in result_set.columns]
+    assert len(col_names) == 2
+    assert len(set(col_names)) == 2  # all unique
+    df = result_set.to_pandas_df()
+    assert df.iloc[0].tolist() == [10, 20]
+
+
+def test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
+    """
+    Synthetic names assigned to empty columns must not collide with explicit
+    user-selected names that already look like Superset fallbacks.
+    """
+    data = [(10, 20)]
+    description = [
+        ("", 3, None, None, None, None, None),
+        ("_col_0", 3, None, None, None, None, None),
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    col_names = [c["column_name"] for c in result_set.columns]
+    assert col_names == ["_col_1", "_col_0"]
+    df = result_set.to_pandas_df()
+    assert list(df.columns) == ["_col_1", "_col_0"]
+    assert df.iloc[0].tolist() == [10, 20]