mirror of
https://github.com/apache/superset.git
synced 2026-04-20 00:24:38 +00:00
feat(examples): Modernize example data loading with Parquet and YAML configs (#36538)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
from typing import Any, Callable
|
||||
|
||||
import click
|
||||
from flask.cli import with_appcontext
|
||||
@@ -25,6 +26,44 @@ from superset.utils.decorators import transaction
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _should_skip_loader(
|
||||
loader_name: str, load_big_data: bool, only_metadata: bool
|
||||
) -> bool:
|
||||
"""Check if a loader should be skipped."""
|
||||
# Skip special loaders that aren't datasets
|
||||
if loader_name in ["load_css_templates", "load_examples_from_configs"]:
|
||||
return True
|
||||
|
||||
# Skip big data if not requested or when only metadata is requested
|
||||
if loader_name == "load_big_data" and (not load_big_data or only_metadata):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _load_dataset(
|
||||
loader: Callable[..., Any], loader_name: str, only_metadata: bool, force: bool
|
||||
) -> None:
|
||||
"""Load a single dataset with error handling."""
|
||||
import inspect
|
||||
|
||||
dataset_name = loader_name[5:].replace("_", " ").title()
|
||||
logger.info("Loading [%s]", dataset_name)
|
||||
|
||||
# Call loader with appropriate parameters
|
||||
sig = inspect.signature(loader)
|
||||
params = {}
|
||||
if "only_metadata" in sig.parameters:
|
||||
params["only_metadata"] = only_metadata
|
||||
if "force" in sig.parameters:
|
||||
params["force"] = force
|
||||
|
||||
try:
|
||||
loader(**params)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load %s: %s", dataset_name, e)
|
||||
|
||||
|
||||
def load_examples_run(
|
||||
load_test_data: bool = False,
|
||||
load_big_data: bool = False,
|
||||
@@ -40,54 +79,21 @@ def load_examples_run(
|
||||
# pylint: disable=import-outside-toplevel
|
||||
import superset.examples.data_loading as examples
|
||||
|
||||
# Always load CSS templates
|
||||
examples.load_css_templates()
|
||||
|
||||
if load_test_data:
|
||||
logger.info("Loading energy related dataset")
|
||||
examples.load_energy(only_metadata, force)
|
||||
# Auto-discover and load all datasets
|
||||
for loader_name in dir(examples):
|
||||
if not loader_name.startswith("load_"):
|
||||
continue
|
||||
|
||||
logger.info("Loading [World Bank's Health Nutrition and Population Stats]")
|
||||
examples.load_world_bank_health_n_pop(only_metadata, force)
|
||||
if _should_skip_loader(loader_name, load_big_data, only_metadata):
|
||||
continue
|
||||
|
||||
logger.info("Loading [Birth names]")
|
||||
examples.load_birth_names(only_metadata, force)
|
||||
loader = getattr(examples, loader_name)
|
||||
_load_dataset(loader, loader_name, only_metadata, force)
|
||||
|
||||
logger.info("Loading [International Sales]")
|
||||
examples.load_international_sales(only_metadata, force)
|
||||
|
||||
if load_test_data:
|
||||
logger.info("Loading [Tabbed dashboard]")
|
||||
examples.load_tabbed_dashboard(only_metadata)
|
||||
|
||||
logger.info("Loading [Supported Charts Dashboard]")
|
||||
examples.load_supported_charts_dashboard()
|
||||
else:
|
||||
logger.info("Loading [Random long/lat data]")
|
||||
examples.load_long_lat_data(only_metadata, force)
|
||||
|
||||
logger.info("Loading [Country Map data]")
|
||||
examples.load_country_map_data(only_metadata, force)
|
||||
|
||||
logger.info("Loading [San Francisco population polygons]")
|
||||
examples.load_sf_population_polygons(only_metadata, force)
|
||||
|
||||
logger.info("Loading [Flights data]")
|
||||
examples.load_flights(only_metadata, force)
|
||||
|
||||
logger.info("Loading [BART lines]")
|
||||
examples.load_bart_lines(only_metadata, force)
|
||||
|
||||
logger.info("Loading [Misc Charts] dashboard")
|
||||
examples.load_misc_dashboard()
|
||||
|
||||
logger.info("Loading DECK.gl demo")
|
||||
examples.load_deck_dash()
|
||||
|
||||
if load_big_data:
|
||||
logger.info("Loading big synthetic data for tests")
|
||||
examples.load_big_data()
|
||||
|
||||
# load examples that are stored as YAML config files
|
||||
# Load examples that are stored as YAML config files
|
||||
examples.load_examples_from_configs(force, load_test_data)
|
||||
|
||||
|
||||
|
||||
234
superset/cli/export_example.py
Normal file
234
superset/cli/export_example.py
Normal file
@@ -0,0 +1,234 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""CLI command to export a dashboard as an example.
|
||||
|
||||
This creates an example-ready folder structure that can be committed
|
||||
to superset/examples/ and loaded via the example loading system.
|
||||
|
||||
Usage:
|
||||
superset export-example --dashboard-id 123 --name my_example
|
||||
superset export-example --dashboard-slug my-dashboard --name my_example
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import click
|
||||
from flask.cli import with_appcontext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
APACHE_LICENSE_HEADER = """# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""
|
||||
|
||||
|
||||
def write_file_with_header(path: Path, content: bytes) -> None:
|
||||
"""Write file, adding Apache license header for YAML files."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if path.suffix == ".yaml":
|
||||
# Add license header to YAML files
|
||||
with open(path, "wb") as f:
|
||||
f.write(APACHE_LICENSE_HEADER.encode("utf-8"))
|
||||
f.write(content)
|
||||
else:
|
||||
# Binary files (like Parquet) written as-is
|
||||
with open(path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
logger.info("Wrote %s", path)
|
||||
|
||||
|
||||
@click.command()
|
||||
@with_appcontext
|
||||
@click.option("--dashboard-id", "-d", type=int, help="Dashboard ID to export")
|
||||
@click.option("--dashboard-slug", "-s", type=str, help="Dashboard slug to export")
|
||||
@click.option("--name", "-n", required=True, help="Name for the example folder")
|
||||
@click.option(
|
||||
"--output-dir",
|
||||
"-o",
|
||||
default="superset/examples",
|
||||
help="Output directory (default: superset/examples)",
|
||||
)
|
||||
@click.option(
|
||||
"--export-data/--no-export-data",
|
||||
default=True,
|
||||
help="Export data to Parquet (default: True)",
|
||||
)
|
||||
@click.option(
|
||||
"--sample-rows", type=int, default=None, help="Limit data export to this many rows"
|
||||
)
|
||||
@click.option("--force", "-f", is_flag=True, help="Overwrite existing example folder")
|
||||
def export_example( # noqa: C901
|
||||
dashboard_id: Optional[int],
|
||||
dashboard_slug: Optional[str],
|
||||
name: str,
|
||||
output_dir: str,
|
||||
export_data: bool,
|
||||
sample_rows: Optional[int],
|
||||
force: bool,
|
||||
) -> None:
|
||||
"""Export a dashboard as an example.
|
||||
|
||||
Creates a folder structure in superset/examples/ that can be loaded
|
||||
by the example loading system:
|
||||
|
||||
\b
|
||||
Single dataset:
|
||||
<name>/
|
||||
├── data.parquet # Raw data
|
||||
├── dataset.yaml # Dataset metadata
|
||||
├── dashboard.yaml # Dashboard definition
|
||||
└── charts/
|
||||
└── *.yaml # Chart definitions
|
||||
|
||||
\b
|
||||
Multiple datasets:
|
||||
<name>/
|
||||
├── data/
|
||||
│ ├── table1.parquet
|
||||
│ └── table2.parquet
|
||||
├── datasets/
|
||||
│ ├── table1.yaml
|
||||
│ └── table2.yaml
|
||||
├── dashboard.yaml
|
||||
└── charts/
|
||||
└── *.yaml
|
||||
|
||||
Examples:
|
||||
|
||||
\b
|
||||
# Export by dashboard ID
|
||||
superset export-example -d 1 -n my_example
|
||||
|
||||
\b
|
||||
# Export by slug, limit data to 1000 rows
|
||||
superset export-example -s my-dashboard -n my_example --sample-rows 1000
|
||||
|
||||
\b
|
||||
# Export metadata only (no data)
|
||||
superset export-example -d 1 -n my_example --no-export-data
|
||||
"""
|
||||
# Import at runtime to avoid app initialization issues during CLI loading
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from flask import g
|
||||
|
||||
from superset import db, security_manager
|
||||
from superset.commands.dashboard.exceptions import DashboardNotFoundError
|
||||
from superset.commands.dashboard.export_example import ExportExampleCommand
|
||||
from superset.models.dashboard import Dashboard
|
||||
from superset.utils import json as superset_json
|
||||
|
||||
g.user = security_manager.find_user(username="admin")
|
||||
|
||||
# Find the dashboard
|
||||
if dashboard_id:
|
||||
dashboard = db.session.query(Dashboard).get(dashboard_id)
|
||||
elif dashboard_slug:
|
||||
dashboard = db.session.query(Dashboard).filter_by(slug=dashboard_slug).first()
|
||||
else:
|
||||
raise click.UsageError("Must specify --dashboard-id or --dashboard-slug")
|
||||
|
||||
if not dashboard:
|
||||
raise click.ClickException(
|
||||
f"Dashboard not found: {dashboard_id or dashboard_slug}"
|
||||
)
|
||||
|
||||
logger.info("Exporting dashboard: %s", dashboard.dashboard_title)
|
||||
|
||||
# Create output directory
|
||||
example_dir = Path(output_dir) / name
|
||||
if example_dir.exists() and not force:
|
||||
raise click.ClickException(
|
||||
f"Directory already exists: {example_dir}. Use --force to overwrite."
|
||||
)
|
||||
|
||||
example_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Run the export command
|
||||
command = ExportExampleCommand(
|
||||
dashboard_id=dashboard.id,
|
||||
export_data=export_data,
|
||||
sample_rows=sample_rows,
|
||||
)
|
||||
|
||||
try:
|
||||
file_count = {"charts": 0, "datasets": 0, "data": 0}
|
||||
|
||||
for filename, content_fn in command.run():
|
||||
file_path = example_dir / filename
|
||||
content = content_fn()
|
||||
write_file_with_header(file_path, content)
|
||||
|
||||
# Track file counts for summary
|
||||
if filename.startswith("charts/"):
|
||||
file_count["charts"] += 1
|
||||
elif filename.startswith("datasets/") or filename == "dataset.yaml":
|
||||
file_count["datasets"] += 1
|
||||
elif filename.startswith("data/") or filename == "data.parquet":
|
||||
file_count["data"] += 1
|
||||
|
||||
except DashboardNotFoundError as err:
|
||||
raise click.ClickException(
|
||||
f"Dashboard not found: {dashboard_id or dashboard_slug}"
|
||||
) from err
|
||||
|
||||
# Summary
|
||||
click.echo(f"\n✅ Exported to: {example_dir}")
|
||||
click.echo(" - dashboard.yaml")
|
||||
|
||||
if file_count["datasets"] > 1:
|
||||
click.echo(f" - datasets/ ({file_count['datasets']} datasets)")
|
||||
if export_data and file_count["data"]:
|
||||
click.echo(f" - data/ ({file_count['data']} parquet files)")
|
||||
else:
|
||||
click.echo(" - dataset.yaml")
|
||||
if export_data and file_count["data"]:
|
||||
click.echo(" - data.parquet")
|
||||
|
||||
click.echo(f" - charts/ ({file_count['charts']} charts)")
|
||||
|
||||
# Native filters summary
|
||||
if dashboard.json_metadata:
|
||||
try:
|
||||
meta = superset_json.loads(dashboard.json_metadata)
|
||||
filters = meta.get("native_filter_configuration", [])
|
||||
if filters:
|
||||
click.echo(f" - {len(filters)} native filters exported")
|
||||
except Exception:
|
||||
logger.debug("Could not parse json_metadata for filter count")
|
||||
|
||||
click.echo("\nTo load this example, ensure the folder is in superset/examples/")
|
||||
click.echo("and it will be picked up by load_examples_from_configs().")
|
||||
90
superset/cli/test_loaders.py
Normal file
90
superset/cli/test_loaders.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""Test data loaders for stress testing and development.
|
||||
|
||||
This module contains specialized data loaders that generate synthetic data
|
||||
for testing Superset's capabilities with edge cases:
|
||||
- Wide tables (many columns)
|
||||
- Many tables (testing catalog performance)
|
||||
- Long table names (UI edge cases)
|
||||
|
||||
These loaders are invoked via CLI flags and are not part of the standard
|
||||
example datasets.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import string
|
||||
|
||||
import sqlalchemy.sql.sqltypes
|
||||
|
||||
from superset.utils.mock_data import add_data, ColumnInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COLUMN_TYPES = [
|
||||
sqlalchemy.sql.sqltypes.INTEGER(),
|
||||
sqlalchemy.sql.sqltypes.VARCHAR(length=255),
|
||||
sqlalchemy.sql.sqltypes.TEXT(),
|
||||
sqlalchemy.sql.sqltypes.BOOLEAN(),
|
||||
sqlalchemy.sql.sqltypes.FLOAT(),
|
||||
sqlalchemy.sql.sqltypes.DATE(),
|
||||
sqlalchemy.sql.sqltypes.TIME(),
|
||||
sqlalchemy.sql.sqltypes.TIMESTAMP(),
|
||||
]
|
||||
|
||||
|
||||
def load_big_data() -> None:
|
||||
logger.debug("Creating table `wide_table` with 100 columns")
|
||||
columns: list[ColumnInfo] = []
|
||||
for i in range(100):
|
||||
column: ColumnInfo = {
|
||||
"name": f"col{i}",
|
||||
"type": COLUMN_TYPES[i % len(COLUMN_TYPES)],
|
||||
"nullable": False,
|
||||
"default": None,
|
||||
"autoincrement": "auto",
|
||||
"primary_key": 1 if i == 0 else 0,
|
||||
}
|
||||
columns.append(column)
|
||||
add_data(columns=columns, num_rows=1000, table_name="wide_table")
|
||||
|
||||
logger.debug("Creating 1000 small tables")
|
||||
columns = [
|
||||
{
|
||||
"name": "id",
|
||||
"type": sqlalchemy.sql.sqltypes.INTEGER(),
|
||||
"nullable": False,
|
||||
"default": None,
|
||||
"autoincrement": "auto",
|
||||
"primary_key": 1,
|
||||
},
|
||||
{
|
||||
"name": "value",
|
||||
"type": sqlalchemy.sql.sqltypes.VARCHAR(length=255),
|
||||
"nullable": False,
|
||||
"default": None,
|
||||
"autoincrement": "auto",
|
||||
"primary_key": 0,
|
||||
},
|
||||
]
|
||||
for i in range(1000):
|
||||
add_data(columns=columns, num_rows=10, table_name=f"small_table_{i}")
|
||||
|
||||
logger.debug("Creating table with long name")
|
||||
name = "".join(random.choices(string.ascii_letters + string.digits, k=60)) # noqa: S311
|
||||
add_data(columns=columns, num_rows=10, table_name=name)
|
||||
Reference in New Issue
Block a user