Files
superset2/superset/cli/examples.py
Maxime Beauchemin 48d8c91b19 feat: migrate examples from Python to YAML format with enhanced CLI
Migrates Superset's example data system from Python-based scripts to YAML configuration files, providing a cleaner, more maintainable approach to managing example datasets, charts, and dashboards.

- Converted 9 Python example modules to YAML configurations
- Exported existing examples from database and added as YAML files:
  - 11 dashboards (USA Births Names, World Bank's Data, etc.)
  - 115 charts
  - 25 datasets
- Moved test-specific fixtures to `tests/fixtures/examples/`
- Removed theme_id from dashboard exports for compatibility

- **New command group**: `superset examples` with subcommands:
  - `load` - Load example data (replaces `load-examples`)
  - `clear-old` - Remove old Python-based examples
  - `clear` - Placeholder for future YAML clearing
  - `reload` - Clear and reload in one command
- **Backwards compatibility**: `superset load-examples` still works with deprecation warning
- **Safety mechanism**: Detects old examples and preserves them to avoid data loss

- Fixed JSON data loading - examples can now load `.json.gz` files from CDN
- Fixed Docker compose configuration for isolated development
- Fixed webpack WebSocket configuration for different ports

- Import operations now log what's being created vs updated:
  - "Creating new dashboard: Sales Dashboard"
  - "Updating existing chart: World's Population"
- Provides clear visibility into the import process

- Moved import logging to individual import functions (DRY principle)
- Non-destructive migration approach - no user data is deleted
- Deterministic UUID generation for consistent example data

- Tested migration from old Python examples to new YAML format
- Verified safety mechanism prevents accidental data overwrites
- Confirmed backwards compatibility with deprecated command
- All pre-commit checks pass

- Updated installation docs to use new CLI commands
- Added deprecation notice to UPDATING.md
- Updated development documentation

None - the old `load-examples` command continues to work with a deprecation warning.

For users with existing Python-based examples:
1. Run `superset examples clear-old --confirm` to remove old examples
2. Run `superset examples load` to load new YAML-based examples
2025-07-29 22:23:52 -07:00

325 lines
10 KiB
Python
Executable File

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
import click
from flask.cli import with_appcontext
import superset.utils.database as database_utils
from superset.utils.decorators import transaction
logger = logging.getLogger(__name__)
def load_examples_run(
load_test_data: bool = False,
load_big_data: bool = False,
only_metadata: bool = False,
force: bool = False,
cleanup: bool = False,
) -> None:
if only_metadata:
logger.info("Loading examples metadata")
else:
examples_db = database_utils.get_example_database()
logger.info(f"Loading examples metadata and related data into {examples_db}")
# pylint: disable=import-outside-toplevel
import superset.examples.data_loading as examples
# Clear old examples if requested
if cleanup:
clear_old_examples()
examples.load_css_templates()
if load_test_data:
# Import test fixtures from tests directory
from tests.fixtures.examples.energy import load_energy
from tests.fixtures.examples.supported_charts_dashboard import (
load_supported_charts_dashboard,
)
from tests.fixtures.examples.tabbed_dashboard import load_tabbed_dashboard
logger.info("Loading energy related dataset")
load_energy(only_metadata, force)
logger.info("Loading [Tabbed dashboard]")
load_tabbed_dashboard(only_metadata)
logger.info("Loading [Supported Charts Dashboard]")
load_supported_charts_dashboard()
else:
logger.info("Loading [Country Map data]")
examples.load_country_map_data(only_metadata, force)
if load_big_data:
# Import test fixture from tests directory
from tests.fixtures.examples.big_data import load_big_data as load_big_data_func
logger.info("Loading big synthetic data for tests")
load_big_data_func()
# load examples that are stored as YAML config files
logger.info("Loading examples from YAML configuration files")
examples.load_examples_from_configs(force, load_test_data)
@click.command()
@with_appcontext
@transaction()
@click.option("--load-test-data", "-t", is_flag=True, help="Load additional test data")
@click.option("--load-big-data", "-b", is_flag=True, help="Load additional big data")
@click.option(
"--only-metadata",
"-m",
is_flag=True,
help="Only load metadata, skip actual data",
)
@click.option(
"--force",
"-f",
is_flag=True,
help="Force load data even if table already exists",
)
def load_examples(
load_test_data: bool = False,
load_big_data: bool = False,
only_metadata: bool = False,
force: bool = False,
) -> None:
"""Loads a set of Slices and Dashboards and a supporting dataset"""
# Show deprecation warning
click.echo(
click.style(
"\nWARNING: 'superset load-examples' is deprecated. "
"Please use 'superset examples load' instead.\n",
fg="yellow",
),
err=True,
)
load_examples_run(load_test_data, load_big_data, only_metadata, force)
# New CLI structure
@click.group(name="examples", help="Manage example data")
def examples_cli() -> None:
"""Group for example-related commands."""
pass
@examples_cli.command(name="load", help="Load example data into the database")
@with_appcontext
@transaction()
@click.option("--load-test-data", "-t", is_flag=True, help="Load additional test data")
@click.option("--load-big-data", "-b", is_flag=True, help="Load additional big data")
@click.option(
"--only-metadata",
"-m",
is_flag=True,
help="Only load metadata, skip actual data",
)
@click.option(
"--force",
"-f",
is_flag=True,
help="Force load data even if table already exists",
)
def load(
load_test_data: bool = False,
load_big_data: bool = False,
only_metadata: bool = False,
force: bool = False,
) -> None:
"""Load example datasets, charts, and dashboards."""
load_examples_run(
load_test_data, load_big_data, only_metadata, force, cleanup=False
)
def clear_old_examples() -> bool:
"""
Clear old Python-generated examples.
Returns True if clear was performed, False otherwise.
"""
from superset import db
from superset.connectors.sqla.models import SqlaTable
from superset.examples.utils import _has_old_examples
from superset.models.core import Database
from superset.models.dashboard import Dashboard, dashboard_slices
from superset.models.slice import Slice
# Check if old examples exist
if not _has_old_examples():
logger.info("No old examples found to clear")
return False
# Find the examples database
examples_db = db.session.query(Database).filter_by(database_name="examples").first()
if not examples_db:
return False
logger.info("Found examples database (id=%s)", examples_db.id)
logger.info("Clearing old examples...")
# 1. Get all datasets from examples database
example_datasets = (
db.session.query(SqlaTable).filter_by(database_id=examples_db.id).all()
)
dataset_ids = [ds.id for ds in example_datasets]
logger.info("Found %d example datasets", len(example_datasets))
# 2. Find all charts using these datasets
example_charts = []
if dataset_ids:
example_charts = (
db.session.query(Slice)
.filter(
Slice.datasource_id.in_(dataset_ids),
Slice.datasource_type == "table",
)
.all()
)
logger.info("Found %d example charts", len(example_charts))
chart_ids = [chart.id for chart in example_charts]
# 3. Find dashboards that contain these charts
example_dashboards = []
if chart_ids:
# Get dashboards that have relationships with our example charts
example_dashboards = (
db.session.query(Dashboard)
.join(dashboard_slices)
.filter(dashboard_slices.c.slice_id.in_(chart_ids))
.distinct()
.all()
)
logger.info("Found %d example dashboards", len(example_dashboards))
# Remove dashboard-slice relationships first
db.session.execute(
dashboard_slices.delete().where(dashboard_slices.c.slice_id.in_(chart_ids))
)
logger.info(
"Removed dashboard-slice relationships for %d charts",
len(chart_ids),
)
# 4. Delete dashboards that are now empty (contain only example charts)
for dashboard in example_dashboards:
# Since we already deleted the relationships, check if dashboard is empty
remaining_charts = (
db.session.query(dashboard_slices.c.slice_id)
.filter(dashboard_slices.c.dashboard_id == dashboard.id)
.count()
)
if remaining_charts == 0:
db.session.delete(dashboard)
logger.info(
"Deleted dashboard: %s (slug: %s)",
dashboard.dashboard_title,
dashboard.slug,
)
else:
logger.info(
"Keeping dashboard %s as it contains non-example charts",
dashboard.dashboard_title,
)
# 5. Delete charts
for chart in example_charts:
db.session.delete(chart)
logger.info("Deleted %d example charts", len(example_charts))
# 6. Delete the database - this will cascade delete all datasets,
# columns, and metrics thanks to the cascade="all, delete-orphan"
db.session.delete(examples_db)
logger.info("Examples database and all related objects removed successfully")
return True
@examples_cli.command(name="clear-old", help="Clear old Python-based example data")
@with_appcontext
@transaction()
@click.option(
"--confirm",
is_flag=True,
help="Skip confirmation prompt",
)
def clear_old(confirm: bool = False) -> None:
"""Clear old Python-generated example datasets, charts, and dashboards."""
if not confirm:
click.confirm(
"This will delete old Python-based example data. Are you sure?",
abort=True,
)
try:
if clear_old_examples():
logger.info("Old examples cleared successfully")
else:
logger.info("No old examples found to clear")
except Exception as e:
logger.error(f"Failed to clear old examples: {e}")
raise
@examples_cli.command(name="clear", help="Clear all example data (NOT YET IMPLEMENTED)")
@with_appcontext
def clear() -> None:
"""Clear all example data including YAML-based examples."""
click.echo(
click.style(
"Clearing YAML-based examples is NOT YET IMPLEMENTED.\n"
"Use 'superset examples clear-old' to remove old Python-based examples.",
fg="yellow",
)
)
@examples_cli.command(name="reload", help="Clear and reload example data")
@with_appcontext
@transaction()
@click.option("--load-test-data", "-t", is_flag=True, help="Load additional test data")
@click.option("--load-big-data", "-b", is_flag=True, help="Load additional big data")
@click.option(
"--only-metadata",
"-m",
is_flag=True,
help="Only load metadata, skip actual data",
)
@click.option(
"--force",
"-f",
is_flag=True,
help="Force load data even if table already exists",
)
def reload(
load_test_data: bool = False,
load_big_data: bool = False,
only_metadata: bool = False,
force: bool = False,
) -> None:
"""Clear existing examples and load fresh ones."""
# This is essentially the old --cleanup behavior
load_examples_run(load_test_data, load_big_data, only_metadata, force, cleanup=True)