mirror of
https://github.com/apache/superset.git
synced 2026-04-19 16:14:52 +00:00
fix(SQL Lab): handle columns without names (#38986)
This commit is contained in:
@@ -99,6 +99,38 @@ def convert_to_string(value: Any) -> str:
|
||||
return str(value)
|
||||
|
||||
|
||||
def normalize_cursor_description_names(
|
||||
cursor_description: DbapiDescription,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Replace empty cursor.description names with synthetic names that do not
|
||||
collide with any explicit column names.
|
||||
"""
|
||||
normalized_names: list[str] = []
|
||||
unavailable_names = {
|
||||
convert_to_string(col[0])
|
||||
for col in cursor_description
|
||||
if convert_to_string(col[0])
|
||||
}
|
||||
synthetic_index = 0
|
||||
|
||||
for col in cursor_description:
|
||||
column_name = convert_to_string(col[0])
|
||||
if column_name:
|
||||
normalized_names.append(column_name)
|
||||
continue
|
||||
|
||||
while True:
|
||||
synthetic_name = f"_col_{synthetic_index}"
|
||||
synthetic_index += 1
|
||||
if synthetic_name not in unavailable_names:
|
||||
unavailable_names.add(synthetic_name)
|
||||
normalized_names.append(synthetic_name)
|
||||
break
|
||||
|
||||
return normalized_names
|
||||
|
||||
|
||||
class SupersetResultSet:
|
||||
def __init__( # pylint: disable=too-many-locals # noqa: C901
|
||||
self,
|
||||
@@ -116,9 +148,14 @@ class SupersetResultSet:
|
||||
|
||||
if cursor_description:
|
||||
# get deduped list of column names
|
||||
column_names = dedup(
|
||||
[convert_to_string(col[0]) for col in cursor_description]
|
||||
)
|
||||
# Some databases (e.g. SQL Server) return an empty string as the
|
||||
# column name for un-aliased expressions like SELECT COUNT(*).
|
||||
# An empty field name is illegal in NumPy structured arrays and in
|
||||
# PyArrow tables, so we substitute a synthetic name when needed.
|
||||
# Synthetic names are chosen to avoid colliding with any explicit
|
||||
# column names before deduplication runs.
|
||||
# See https://github.com/apache/superset/issues/23848
|
||||
column_names = dedup(normalize_cursor_description_names(cursor_description))
|
||||
|
||||
# fix cursor descriptor with the deduped names
|
||||
deduped_cursor_desc = [
|
||||
|
||||
@@ -185,3 +185,62 @@ def test_get_column_description_from_empty_data_using_cursor_description(
|
||||
)
|
||||
assert any(col.get("column_name") == "__time" for col in result_set.columns)
|
||||
logger.exception.assert_not_called()
|
||||
|
||||
|
||||
def test_empty_column_names_get_synthetic_names() -> None:
|
||||
"""
|
||||
SQL Server returns an empty-string column name in cursor.description for
|
||||
any un-aliased expression (e.g. ``SELECT COUNT(*) FROM t``). An empty
|
||||
field name is illegal in NumPy structured arrays and PyArrow tables.
|
||||
|
||||
SupersetResultSet must replace empty column names with synthetic names
|
||||
so queries like ``SELECT COUNT(*) FROM t`` succeed on MSSQL.
|
||||
|
||||
Regression test for https://github.com/apache/superset/issues/23848
|
||||
"""
|
||||
data = [(42,)]
|
||||
description = [("", 3, None, None, None, None, None)]
|
||||
result_set = SupersetResultSet(data, description, BaseEngineSpec) # type: ignore
|
||||
|
||||
assert result_set.columns[0]["column_name"] == "_col_0"
|
||||
df = result_set.to_pandas_df()
|
||||
assert list(df.columns) == ["_col_0"]
|
||||
assert df["_col_0"].iloc[0] == 42
|
||||
|
||||
|
||||
def test_multiple_empty_column_names_get_unique_synthetic_names() -> None:
|
||||
"""
|
||||
When several columns have empty names (e.g. ``SELECT COUNT(*), SUM(x)``
|
||||
on MSSQL), each must receive a distinct synthetic name.
|
||||
"""
|
||||
data = [(10, 20)]
|
||||
description = [
|
||||
("", 3, None, None, None, None, None),
|
||||
("", 3, None, None, None, None, None),
|
||||
]
|
||||
result_set = SupersetResultSet(data, description, BaseEngineSpec) # type: ignore
|
||||
|
||||
col_names = [c["column_name"] for c in result_set.columns]
|
||||
assert len(col_names) == 2
|
||||
assert len(set(col_names)) == 2 # all unique
|
||||
df = result_set.to_pandas_df()
|
||||
assert df.iloc[0].tolist() == [10, 20]
|
||||
|
||||
|
||||
def test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
|
||||
"""
|
||||
Synthetic names assigned to empty columns must not collide with explicit
|
||||
user-selected names that already look like Superset fallbacks.
|
||||
"""
|
||||
data = [(10, 20)]
|
||||
description = [
|
||||
("", 3, None, None, None, None, None),
|
||||
("_col_0", 3, None, None, None, None, None),
|
||||
]
|
||||
result_set = SupersetResultSet(data, description, BaseEngineSpec) # type: ignore
|
||||
|
||||
col_names = [c["column_name"] for c in result_set.columns]
|
||||
assert col_names == ["_col_1", "_col_0"]
|
||||
df = result_set.to_pandas_df()
|
||||
assert list(df.columns) == ["_col_1", "_col_0"]
|
||||
assert df.iloc[0].tolist() == [10, 20]
|
||||
|
||||
Reference in New Issue
Block a user