mirror of
https://github.com/apache/superset.git
synced 2026-04-13 13:18:25 +00:00
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Beto Dealmeida <roberto@dealmeida.net> Co-authored-by: bito-code-review[bot] <188872107+bito-code-review[bot]@users.noreply.github.com>
673 lines
25 KiB
Python
673 lines
25 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
"""Command to export a dashboard as an example bundle.
|
|
|
|
This creates an example-ready structure that can be committed to
|
|
superset/examples/ and loaded via the example loading system.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from collections.abc import Iterator
|
|
from io import BytesIO
|
|
from typing import Any, Callable, TYPE_CHECKING
|
|
|
|
import yaml
|
|
|
|
from superset.commands.base import BaseCommand
|
|
from superset.commands.dashboard.exceptions import DashboardNotFoundError
|
|
from superset.daos.dashboard import DashboardDAO
|
|
|
|
if TYPE_CHECKING:
|
|
from superset.connectors.sqla.models import SqlaTable
|
|
from superset.models.dashboard import Dashboard
|
|
from superset.models.slice import Slice
|
|
|
|
from superset.sql.parse import SQLStatement, Table
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Canonical UUID for the examples database
|
|
EXAMPLES_DATABASE_UUID = "a2dc77af-e654-49bb-b321-40f6b559a1ee"
|
|
|
|
# ASF license header for generated YAML files
|
|
YAML_LICENSE_HEADER = """\
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
"""
|
|
|
|
|
|
def sanitize_filename(name: str) -> str:
|
|
"""Convert a name to a safe filename."""
|
|
safe = "".join(c if c.isalnum() or c in "._-" else "_" for c in name)
|
|
while "__" in safe:
|
|
safe = safe.replace("__", "_")
|
|
return safe.strip("_")
|
|
|
|
|
|
def get_referenced_tables(sql: str, engine: str = "base") -> set[Table]:
|
|
"""Extract table references from SQL using Superset's SQL parser.
|
|
|
|
Args:
|
|
sql: The SQL query to parse
|
|
engine: The database engine/dialect (e.g., "postgresql", "mysql")
|
|
|
|
Returns:
|
|
Set of Table objects referenced in the SQL
|
|
"""
|
|
try:
|
|
statement = SQLStatement(sql, engine=engine)
|
|
return statement.tables
|
|
except Exception as e:
|
|
logger.warning("Could not parse SQL to extract tables: %s", e)
|
|
return set()
|
|
|
|
|
|
def is_virtual_dataset(dataset: SqlaTable) -> bool:
|
|
"""Check if a dataset is virtual (SQL-based) vs physical (table-based)."""
|
|
return bool(dataset.sql)
|
|
|
|
|
|
def can_preserve_virtual_dataset(
|
|
dataset: SqlaTable,
|
|
physical_tables: set[str],
|
|
engine: str = "base",
|
|
) -> bool:
|
|
"""Check if a virtual dataset can be preserved (all dependencies are in export).
|
|
|
|
A virtual dataset can be preserved if all tables it references are
|
|
physical tables that will be exported as Parquet files.
|
|
|
|
Args:
|
|
dataset: The virtual dataset to check
|
|
physical_tables: Set of physical table names being exported
|
|
engine: The database engine/dialect for SQL parsing
|
|
|
|
Returns:
|
|
True if the virtual dataset can be preserved with its SQL intact
|
|
"""
|
|
if not dataset.sql:
|
|
return False # Not a virtual dataset
|
|
|
|
referenced = get_referenced_tables(dataset.sql, engine)
|
|
if not referenced:
|
|
# Couldn't parse SQL or no tables found - safer to materialize
|
|
logger.info(
|
|
"Could not determine dependencies for %s, will materialize",
|
|
dataset.table_name,
|
|
)
|
|
return False
|
|
|
|
# Check if all referenced tables are in our physical tables set
|
|
for table in referenced:
|
|
# Match by table name (ignore schema since we normalize to default schema)
|
|
if table.table not in physical_tables:
|
|
logger.info(
|
|
"Virtual dataset %s references external table %s, will materialize",
|
|
dataset.table_name,
|
|
table.table,
|
|
)
|
|
return False
|
|
|
|
logger.info(
|
|
"Virtual dataset %s can be preserved (references: %s)",
|
|
dataset.table_name,
|
|
", ".join(t.table for t in referenced),
|
|
)
|
|
return True
|
|
|
|
|
|
def export_dataset_yaml(
|
|
dataset: SqlaTable,
|
|
data_file: str | None = None,
|
|
preserve_virtual: bool = False,
|
|
) -> dict[str, Any]:
|
|
"""Export a dataset to YAML format.
|
|
|
|
Args:
|
|
dataset: The dataset to export
|
|
data_file: Optional explicit parquet filename (for physical datasets)
|
|
preserve_virtual: If True and dataset is virtual, preserve the SQL query
|
|
instead of converting to physical with data_file
|
|
"""
|
|
# Determine if this is a preserved virtual dataset
|
|
is_preserved_virtual = preserve_virtual and dataset.sql
|
|
|
|
dataset_config: dict[str, Any] = {
|
|
"table_name": dataset.table_name,
|
|
# Virtual datasets don't have data files - they query other tables
|
|
"data_file": None if is_preserved_virtual else data_file,
|
|
"main_dttm_col": dataset.main_dttm_col,
|
|
"description": dataset.description,
|
|
"default_endpoint": dataset.default_endpoint,
|
|
"offset": dataset.offset,
|
|
"cache_timeout": dataset.cache_timeout,
|
|
"catalog": dataset.catalog,
|
|
"schema": None, # Don't export - use target database's default schema
|
|
# Preserve SQL for virtual datasets, None for physical (data is in parquet)
|
|
"sql": dataset.sql if is_preserved_virtual else None,
|
|
# Track source database engine for SQL transpilation during import
|
|
"source_db_engine": (
|
|
dataset.database.db_engine_spec.engine if is_preserved_virtual else None
|
|
),
|
|
"params": None, # Don't export - contains stale import metadata
|
|
"template_params": dataset.template_params,
|
|
"filter_select_enabled": dataset.filter_select_enabled,
|
|
"fetch_values_predicate": dataset.fetch_values_predicate,
|
|
"extra": dataset.extra,
|
|
"normalize_columns": dataset.normalize_columns,
|
|
"always_filter_main_dttm": dataset.always_filter_main_dttm,
|
|
"folders": None,
|
|
"uuid": str(dataset.uuid),
|
|
"metrics": [],
|
|
"columns": [],
|
|
"version": "1.0.0",
|
|
"database_uuid": EXAMPLES_DATABASE_UUID,
|
|
}
|
|
|
|
for metric in dataset.metrics:
|
|
dataset_config["metrics"].append(
|
|
{
|
|
"metric_name": metric.metric_name,
|
|
"verbose_name": metric.verbose_name,
|
|
"metric_type": metric.metric_type,
|
|
"expression": metric.expression,
|
|
"description": metric.description,
|
|
"d3format": metric.d3format,
|
|
"currency": metric.currency,
|
|
"extra": metric.extra,
|
|
"warning_text": metric.warning_text,
|
|
}
|
|
)
|
|
|
|
for column in dataset.columns:
|
|
dataset_config["columns"].append(
|
|
{
|
|
"column_name": column.column_name,
|
|
"verbose_name": column.verbose_name,
|
|
"is_dttm": column.is_dttm,
|
|
"is_active": column.is_active,
|
|
"type": column.type,
|
|
"advanced_data_type": column.advanced_data_type,
|
|
"groupby": column.groupby,
|
|
"filterable": column.filterable,
|
|
"expression": column.expression,
|
|
"description": column.description,
|
|
"python_date_format": column.python_date_format,
|
|
"extra": column.extra,
|
|
}
|
|
)
|
|
|
|
return dataset_config
|
|
|
|
|
|
def export_dataset_data(
|
|
dataset: SqlaTable,
|
|
sample_rows: int | None = None,
|
|
) -> bytes | None:
|
|
"""Export dataset data to Parquet format. Returns bytes or None on failure."""
|
|
import pandas as pd # pylint: disable=import-outside-toplevel
|
|
|
|
from superset import db # pylint: disable=import-outside-toplevel
|
|
|
|
# Ensure dataset is attached to session and relationships are loaded
|
|
if dataset not in db.session:
|
|
dataset = db.session.merge(dataset)
|
|
|
|
# Force load the database and columns relationships by accessing them
|
|
_ = dataset.database
|
|
_ = dataset.columns
|
|
|
|
if not dataset.database:
|
|
logger.warning("Dataset %s has no database", dataset.table_name)
|
|
return None
|
|
|
|
try:
|
|
logger.info("Exporting data for %s to Parquet...", dataset.table_name)
|
|
|
|
# Check if this is a virtual dataset (SQL-based)
|
|
if dataset.sql:
|
|
sql = dataset.sql
|
|
else:
|
|
# For physical tables, build SELECT query from columns
|
|
columns = [col.column_name for col in dataset.columns if not col.expression]
|
|
|
|
if not columns:
|
|
logger.warning("No columns to export for %s", dataset.table_name)
|
|
return None
|
|
|
|
# Build simple SELECT query (quote identifiers to handle spaces/keywords)
|
|
column_list = ", ".join(f'"{c}"' for c in columns)
|
|
quoted_table = f'"{dataset.table_name}"'
|
|
if dataset.schema:
|
|
table_ref = f'"{dataset.schema}".{quoted_table}'
|
|
else:
|
|
table_ref = quoted_table
|
|
sql = f"SELECT {column_list} FROM {table_ref}" # noqa: S608
|
|
|
|
with dataset.database.get_sqla_engine() as engine:
|
|
df = pd.read_sql(sql, engine)
|
|
|
|
if sample_rows and len(df) > sample_rows:
|
|
df = df.head(sample_rows)
|
|
logger.info("Sampled to %d rows", sample_rows)
|
|
|
|
# Write to bytes buffer
|
|
buf = BytesIO()
|
|
df.to_parquet(buf, index=False)
|
|
buf.seek(0)
|
|
logger.info("Exported %d rows for %s", len(df), dataset.table_name)
|
|
return buf.getvalue()
|
|
|
|
except Exception as e:
|
|
logger.exception("Could not export data for %s: %s", dataset.table_name, e)
|
|
return None
|
|
|
|
|
|
def export_chart(chart: Slice, dataset_uuid: str) -> dict[str, Any]:
|
|
"""Export a chart to YAML format."""
|
|
params = chart.params_dict if hasattr(chart, "params_dict") else {}
|
|
|
|
return {
|
|
"slice_name": chart.slice_name,
|
|
"description": chart.description,
|
|
"certified_by": chart.certified_by,
|
|
"certification_details": chart.certification_details,
|
|
"viz_type": chart.viz_type,
|
|
"params": params,
|
|
"query_context": None, # Don't include - contains stale IDs
|
|
"cache_timeout": chart.cache_timeout,
|
|
"uuid": str(chart.uuid),
|
|
"version": "1.0.0",
|
|
"dataset_uuid": dataset_uuid,
|
|
}
|
|
|
|
|
|
def remap_native_filters(
|
|
filters: list[dict[str, Any]],
|
|
chart_id_to_uuid: dict[int, str],
|
|
dataset_id_to_uuid: dict[int, str],
|
|
) -> list[dict[str, Any]]:
|
|
"""Remap IDs to UUIDs in native filter configuration."""
|
|
remapped = []
|
|
for f in filters:
|
|
new_filter = f.copy()
|
|
|
|
# Remap chartsInScope from IDs to UUIDs
|
|
if "chartsInScope" in new_filter:
|
|
new_filter["chartsInScope"] = [
|
|
chart_id_to_uuid.get(cid, cid) for cid in new_filter["chartsInScope"]
|
|
]
|
|
|
|
# Remap targets to use datasetUuid
|
|
if "targets" in new_filter:
|
|
new_targets = []
|
|
for target in new_filter["targets"]:
|
|
new_target = target.copy()
|
|
if "datasetId" in new_target:
|
|
dataset_id = new_target.pop("datasetId")
|
|
if dataset_id in dataset_id_to_uuid:
|
|
new_target["datasetUuid"] = dataset_id_to_uuid[dataset_id]
|
|
new_targets.append(new_target)
|
|
new_filter["targets"] = new_targets
|
|
|
|
remapped.append(new_filter)
|
|
return remapped
|
|
|
|
|
|
def remap_chart_configuration(
|
|
chart_config: dict[str, Any],
|
|
chart_id_to_uuid: dict[int, str],
|
|
) -> dict[str, Any]:
|
|
"""Remap chart IDs to UUIDs in chart_configuration (cross-filters)."""
|
|
remapped: dict[str, Any] = {}
|
|
for chart_id_str, config in chart_config.items():
|
|
chart_id = int(chart_id_str)
|
|
if chart_id not in chart_id_to_uuid:
|
|
continue
|
|
|
|
new_config = config.copy()
|
|
chart_uuid = chart_id_to_uuid[chart_id]
|
|
|
|
# Update the id field
|
|
new_config["id"] = chart_uuid
|
|
|
|
# Remap chartsInScope
|
|
cross_filters = new_config.get("crossFilters", {})
|
|
if "chartsInScope" in cross_filters:
|
|
new_config["crossFilters"] = new_config["crossFilters"].copy()
|
|
new_config["crossFilters"]["chartsInScope"] = [
|
|
chart_id_to_uuid.get(cid, cid)
|
|
for cid in new_config["crossFilters"]["chartsInScope"]
|
|
]
|
|
|
|
remapped[chart_uuid] = new_config
|
|
|
|
return remapped
|
|
|
|
|
|
def remap_global_chart_configuration(
|
|
global_config: dict[str, Any],
|
|
chart_id_to_uuid: dict[int, str],
|
|
) -> dict[str, Any]:
|
|
"""Remap chart IDs in global_chart_configuration."""
|
|
new_config = global_config.copy()
|
|
if "chartsInScope" in new_config:
|
|
new_config["chartsInScope"] = [
|
|
chart_id_to_uuid.get(cid, cid) for cid in new_config["chartsInScope"]
|
|
]
|
|
return new_config
|
|
|
|
|
|
def export_dashboard_yaml(
|
|
dashboard: Dashboard,
|
|
chart_id_to_uuid: dict[int, str],
|
|
dataset_id_to_uuid: dict[int, str],
|
|
) -> dict[str, Any]:
|
|
"""Export dashboard to YAML format with proper ID remapping."""
|
|
from superset.utils import (
|
|
json as superset_json, # pylint: disable=import-outside-toplevel
|
|
)
|
|
|
|
position = dashboard.position or {}
|
|
|
|
# Update position to use UUIDs
|
|
updated_position = {}
|
|
for key, value in position.items():
|
|
if isinstance(value, dict):
|
|
updated_value = value.copy()
|
|
if "meta" in updated_value and "chartId" in updated_value.get("meta", {}):
|
|
chart_id = updated_value["meta"]["chartId"]
|
|
if chart_id in chart_id_to_uuid:
|
|
updated_value["meta"]["uuid"] = chart_id_to_uuid[chart_id]
|
|
updated_position[key] = updated_value
|
|
else:
|
|
updated_position[key] = value
|
|
|
|
# Parse json_metadata
|
|
json_metadata = {}
|
|
if dashboard.json_metadata:
|
|
try:
|
|
json_metadata = superset_json.loads(dashboard.json_metadata)
|
|
except Exception:
|
|
logger.debug("Could not parse json_metadata")
|
|
|
|
# Remap native filters
|
|
native_filters = json_metadata.get("native_filter_configuration", [])
|
|
remapped_filters = remap_native_filters(
|
|
native_filters, chart_id_to_uuid, dataset_id_to_uuid
|
|
)
|
|
|
|
# Remap chart_configuration (cross-filters)
|
|
chart_configuration = json_metadata.get("chart_configuration", {})
|
|
remapped_chart_config = remap_chart_configuration(
|
|
chart_configuration, chart_id_to_uuid
|
|
)
|
|
|
|
# Remap global_chart_configuration
|
|
global_chart_config = json_metadata.get("global_chart_configuration", {})
|
|
remapped_global_config = remap_global_chart_configuration(
|
|
global_chart_config, chart_id_to_uuid
|
|
)
|
|
|
|
# Build metadata section
|
|
metadata: dict[str, Any] = {
|
|
"timed_refresh_immune_slices": json_metadata.get(
|
|
"timed_refresh_immune_slices", []
|
|
),
|
|
"expanded_slices": json_metadata.get("expanded_slices", {}),
|
|
"refresh_frequency": json_metadata.get("refresh_frequency", 0),
|
|
"default_filters": json_metadata.get("default_filters", "{}"),
|
|
"color_scheme": json_metadata.get("color_scheme", ""),
|
|
"label_colors": json_metadata.get("label_colors", {}),
|
|
"native_filter_configuration": remapped_filters,
|
|
"shared_label_colors": json_metadata.get("shared_label_colors", []),
|
|
"map_label_colors": json_metadata.get("map_label_colors", {}),
|
|
"color_scheme_domain": json_metadata.get("color_scheme_domain", []),
|
|
"cross_filters_enabled": json_metadata.get("cross_filters_enabled", False),
|
|
"chart_configuration": remapped_chart_config,
|
|
"global_chart_configuration": remapped_global_config,
|
|
}
|
|
|
|
return {
|
|
"dashboard_title": dashboard.dashboard_title,
|
|
"description": dashboard.description,
|
|
"css": dashboard.css,
|
|
"slug": dashboard.slug,
|
|
"certified_by": dashboard.certified_by,
|
|
"certification_details": dashboard.certification_details,
|
|
"published": dashboard.published,
|
|
"uuid": str(dashboard.uuid),
|
|
"position": updated_position,
|
|
"metadata": metadata,
|
|
"version": "1.0.0",
|
|
}
|
|
|
|
|
|
def _make_yaml_generator(config: dict[str, Any]) -> Callable[[], bytes]:
|
|
"""Create a generator function for YAML content with ASF license header."""
|
|
yaml_content = yaml.safe_dump(config, default_flow_style=False, allow_unicode=True)
|
|
return lambda: (YAML_LICENSE_HEADER + yaml_content).encode("utf-8")
|
|
|
|
|
|
def _make_bytes_generator(data: bytes) -> Callable[[], bytes]:
|
|
"""Create a generator function for raw bytes content."""
|
|
return lambda: data
|
|
|
|
|
|
class ExportExampleCommand(BaseCommand):
|
|
"""Export dashboard as an example bundle with Parquet data and YAML configs.
|
|
|
|
Output structure for single dataset:
|
|
data.parquet - Raw data
|
|
dataset.yaml - Dataset metadata
|
|
dashboard.yaml - Dashboard definition
|
|
charts/*.yaml - Chart definitions
|
|
|
|
Output structure for multiple datasets:
|
|
data/*.parquet - Raw data files
|
|
datasets/*.yaml - Dataset metadata files
|
|
dashboard.yaml - Dashboard definition
|
|
charts/*.yaml - Chart definitions
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
dashboard_id: int,
|
|
export_data: bool = True,
|
|
sample_rows: int | None = None,
|
|
):
|
|
self._dashboard_id = dashboard_id
|
|
self._export_data = export_data
|
|
self._sample_rows = sample_rows
|
|
self._dashboard: Dashboard | None = None
|
|
|
|
def validate(self) -> None:
|
|
self._dashboard = DashboardDAO.find_by_id(self._dashboard_id)
|
|
if not self._dashboard:
|
|
raise DashboardNotFoundError()
|
|
|
|
def run(self) -> Iterator[tuple[str, Callable[[], bytes]]]: # noqa: C901
|
|
"""Yield (filename, content_generator) tuples for ZIP packaging.
|
|
|
|
Content generators return bytes (either YAML encoded or raw Parquet).
|
|
"""
|
|
self.validate()
|
|
assert self._dashboard is not None
|
|
|
|
# Collect all charts and their datasets
|
|
charts = self._dashboard.slices
|
|
datasets: dict[int, SqlaTable] = {}
|
|
chart_id_to_uuid: dict[int, str] = {}
|
|
chart_to_dataset_uuid: dict[int, str] = {}
|
|
|
|
for chart in charts:
|
|
chart_id_to_uuid[chart.id] = str(chart.uuid)
|
|
if chart.datasource:
|
|
datasets[chart.datasource.id] = chart.datasource
|
|
chart_to_dataset_uuid[chart.id] = str(chart.datasource.uuid)
|
|
|
|
# Build dataset ID to UUID mapping
|
|
dataset_id_to_uuid: dict[int, str] = {
|
|
ds_id: str(ds.uuid) for ds_id, ds in datasets.items()
|
|
}
|
|
|
|
logger.info("Found %d charts and %d datasets", len(charts), len(datasets))
|
|
|
|
# Classify datasets: physical vs virtual
|
|
# Physical datasets need Parquet export; virtual datasets with all
|
|
# dependencies in the export can preserve their SQL
|
|
physical_datasets: dict[int, SqlaTable] = {}
|
|
virtual_datasets: dict[int, SqlaTable] = {}
|
|
|
|
for ds_id, dataset in datasets.items():
|
|
if is_virtual_dataset(dataset):
|
|
virtual_datasets[ds_id] = dataset
|
|
else:
|
|
physical_datasets[ds_id] = dataset
|
|
|
|
# Get the set of physical table names for dependency checking
|
|
physical_table_names = {ds.table_name for ds in physical_datasets.values()}
|
|
|
|
# Determine which virtual datasets can be preserved vs need materialization
|
|
# A virtual dataset can be preserved if all its referenced tables are
|
|
# physical datasets in this export
|
|
preserved_virtual: dict[int, SqlaTable] = {}
|
|
materialized_virtual: dict[int, SqlaTable] = {}
|
|
|
|
# Get database engine for SQL parsing (use first dataset's database)
|
|
db_engine = "base"
|
|
if datasets:
|
|
first_dataset = next(iter(datasets.values()))
|
|
if first_dataset.database:
|
|
db_engine = first_dataset.database.backend or "base"
|
|
|
|
for ds_id, dataset in virtual_datasets.items():
|
|
if can_preserve_virtual_dataset(dataset, physical_table_names, db_engine):
|
|
preserved_virtual[ds_id] = dataset
|
|
else:
|
|
materialized_virtual[ds_id] = dataset
|
|
|
|
# Log classification summary
|
|
logger.info(
|
|
"Dataset classification: %d physical, %d virtual preserved, "
|
|
"%d virtual materialized",
|
|
len(physical_datasets),
|
|
len(preserved_virtual),
|
|
len(materialized_virtual),
|
|
)
|
|
|
|
# Datasets that need Parquet export = physical + materialized virtual
|
|
datasets_needing_data = {**physical_datasets, **materialized_virtual}
|
|
|
|
# Build unique filenames for datasets (handle table_name collisions)
|
|
dataset_filenames: dict[int, str] = {}
|
|
seen_table_names: dict[str, int] = {} # table_name -> first dataset_id
|
|
|
|
for ds_id, dataset in datasets.items():
|
|
table_name = dataset.table_name
|
|
if table_name in seen_table_names:
|
|
# Collision! Use UUID suffix for uniqueness
|
|
uuid_suffix = str(dataset.uuid)[:8]
|
|
filename = f"{table_name}-{uuid_suffix}"
|
|
logger.info(
|
|
"Table name collision for '%s', using '%s'", table_name, filename
|
|
)
|
|
else:
|
|
filename = table_name
|
|
seen_table_names[table_name] = ds_id
|
|
dataset_filenames[ds_id] = filename
|
|
|
|
# Export datasets
|
|
multi_dataset = len(datasets) > 1
|
|
|
|
if multi_dataset:
|
|
# Multiple datasets: use datasets/ and data/ folders
|
|
for ds_id, dataset in datasets.items():
|
|
filename = dataset_filenames[ds_id]
|
|
needs_data = ds_id in datasets_needing_data
|
|
is_preserved = ds_id in preserved_virtual
|
|
data_file = f"{filename}.parquet" if needs_data else None
|
|
|
|
# Export YAML
|
|
dataset_config = export_dataset_yaml(
|
|
dataset,
|
|
data_file=data_file,
|
|
preserve_virtual=is_preserved,
|
|
)
|
|
yield (
|
|
f"datasets/{filename}.yaml",
|
|
_make_yaml_generator(dataset_config),
|
|
)
|
|
|
|
# Export data only for datasets that need it
|
|
if self._export_data and needs_data:
|
|
data = export_dataset_data(dataset, self._sample_rows)
|
|
if data:
|
|
yield (
|
|
f"data/{data_file}",
|
|
_make_bytes_generator(data),
|
|
)
|
|
|
|
elif len(datasets) == 1:
|
|
# Single dataset: use dataset.yaml and data.parquet at root
|
|
ds_id = next(iter(datasets.keys()))
|
|
dataset = datasets[ds_id]
|
|
needs_data = ds_id in datasets_needing_data
|
|
is_preserved = ds_id in preserved_virtual
|
|
data_file = "data.parquet" if needs_data else None
|
|
|
|
dataset_config = export_dataset_yaml(
|
|
dataset,
|
|
data_file=data_file,
|
|
preserve_virtual=is_preserved,
|
|
)
|
|
yield ("dataset.yaml", _make_yaml_generator(dataset_config))
|
|
|
|
if self._export_data and needs_data:
|
|
data = export_dataset_data(dataset, self._sample_rows)
|
|
if data:
|
|
yield ("data.parquet", _make_bytes_generator(data))
|
|
|
|
# Export charts
|
|
for chart in charts:
|
|
dataset_uuid = chart_to_dataset_uuid.get(chart.id, "")
|
|
chart_config = export_chart(chart, dataset_uuid)
|
|
filename = sanitize_filename(chart.slice_name) + ".yaml"
|
|
yield (f"charts/{filename}", _make_yaml_generator(chart_config))
|
|
|
|
# Export dashboard
|
|
dashboard_config = export_dashboard_yaml(
|
|
self._dashboard, chart_id_to_uuid, dataset_id_to_uuid
|
|
)
|
|
yield ("dashboard.yaml", _make_yaml_generator(dashboard_config))
|