feat: Use DuckDB for examples data in Docker development environment (#34831)

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>
This commit is contained in:
Maxime Beauchemin
2025-08-25 12:37:28 -07:00
committed by GitHub
parent 9c9588cce6
commit 47414e18d4
15 changed files with 142 additions and 44 deletions

View File

@@ -33,7 +33,7 @@ from sqlalchemy.engine.url import URL
from superset.constants import TimeGrain
from superset.databases.utils import make_url_safe
from superset.db_engine_specs.base import BaseEngineSpec
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.utils.core import GenericDataType, get_user_agent, QuerySource
@@ -261,6 +261,39 @@ class DuckDBEngineSpec(DuckDBParametersMixin, BaseEngineSpec):
return f"""'{dttm.isoformat(sep=" ", timespec="microseconds")}'"""
return None
@classmethod
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]:
"""
Override fetch_data to work around duckdb-engine cursor.description bug.
The duckdb-engine SQLAlchemy driver has a bug where cursor.description
becomes None after calling fetchall(), even though the native DuckDB cursor
preserves this information correctly.
See: https://github.com/Mause/duckdb_engine/issues/1322
This method captures the cursor description before fetchall() and restores
it afterward to prevent downstream processing failures.
"""
# Capture description BEFORE fetchall() invalidates it
description = cursor.description
# Execute fetchall() (which will clear cursor.description in duckdb-engine)
if cls.arraysize:
cursor.arraysize = cls.arraysize
try:
if cls.limit_method == LimitMethod.FETCH_MANY and limit:
data = cursor.fetchmany(limit)
else:
data = cursor.fetchall()
except Exception as ex:
raise cls.get_dbapi_mapped_exception(ex) from ex
# Restore the captured description for downstream processing
cursor.description = description
return data
@classmethod
def get_table_names(
cls, database: Database, inspector: Inspector, schema: str | None