feat: handle subtle bug with load-examples (#16052)

(cherry picked from commit 69c5cd7922)
2026-07-18 12:45:44 +00:00 · 2021-08-03 12:44:44 -07:00
parent a87c5b89d6
commit a4e9e8f87f
1 changed files with 41 additions and 10 deletions
--- a/superset/commands/importers/v1/examples.py
+++ b/superset/commands/importers/v1/examples.py
@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Tuple

 from marshmallow import Schema
 from sqlalchemy.orm import Session
+from sqlalchemy.orm.exc import MultipleResultsFound
 from sqlalchemy.sql import select

 from superset import db
@@ -70,7 +71,7 @@ class ImportExamplesCommand(ImportModelsCommand):
            db.session.rollback()
            raise self.import_error()

-    # pylint: disable=too-many-locals, arguments-differ
+    # pylint: disable=too-many-locals, arguments-differ, too-many-branches
    @staticmethod
    def _import(
        session: Session,
@@ -86,19 +87,40 @@ class ImportExamplesCommand(ImportModelsCommand):
                database_ids[str(database.uuid)] = database.id

        # import datasets
-        # TODO (betodealmeida): once we have all examples being imported we can
-        # have a stable UUID for the database stored in the dataset YAML; for
-        # now we need to fetch the current ID.
-        examples_id = (
-            db.session.query(Database).filter_by(database_name="examples").one().id
+        # If database_uuid is not in the list of UUIDs it means that the examples
+        # database was created before its UUID was frozen, so it has a random UUID.
+        # We need to determine its ID so we can point the dataset to it.
+        examples_db = (
+            db.session.query(Database).filter_by(database_name="examples").first()
        )
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if file_name.startswith("datasets/"):
-                config["database_id"] = examples_id
+                # find the ID of the corresponding database
+                if config["database_uuid"] not in database_ids:
+                    if examples_db is None:
+                        raise Exception("Cannot find examples database")
+                    config["database_id"] = examples_db.id
+                else:
+                    config["database_id"] = database_ids[config["database_uuid"]]
+
                dataset = import_dataset(
                    session, config, overwrite=overwrite, force_data=force_data
                )
+
+                try:
+                    dataset = import_dataset(
+                        session, config, overwrite=overwrite, force_data=force_data
+                    )
+                except MultipleResultsFound:
+                    # Multiple result can be found for datasets. There was a bug in
+                    # load-examples that resulted in datasets being loaded with a NULL
+                    # schema. Users could then add a new dataset with the same name in
+                    # the correct schema, resulting in duplicates, since the uniqueness
+                    # constraint was not enforced correctly in the application logic.
+                    # See https://github.com/apache/superset/issues/16051.
+                    continue
+
                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type": "view" if dataset.is_sqllab_view else "table",
@@ -108,7 +130,10 @@ class ImportExamplesCommand(ImportModelsCommand):
        # import charts
        chart_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
-            if file_name.startswith("charts/"):
+            if (
+                file_name.startswith("charts/")
+                and config["dataset_uuid"] in dataset_info
+            ):
                # update datasource id, type, and name
                config.update(dataset_info[config["dataset_uuid"]])
                chart = import_chart(session, config, overwrite=overwrite)
@@ -123,8 +148,14 @@ class ImportExamplesCommand(ImportModelsCommand):
        dashboard_chart_ids: List[Tuple[int, int]] = []
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
-                config = update_id_refs(config, chart_ids)
+                try:
+                    config = update_id_refs(config, chart_ids)
+                except KeyError:
+                    continue
+
                dashboard = import_dashboard(session, config, overwrite=overwrite)
+                dashboard.published = True
+
                for uuid in find_chart_uuids(config["position"]):
                    chart_id = chart_ids[uuid]
                    if (dashboard.id, chart_id) not in existing_relationships:
@@ -135,5 +166,5 @@ class ImportExamplesCommand(ImportModelsCommand):
            {"dashboard_id": dashboard_id, "slice_id": chart_id}
            for (dashboard_id, chart_id) in dashboard_chart_ids
        ]
-        # pylint: disable=no-value-for-parameter (sqlalchemy/issues/4656)
+        # pylint: disable=no-value-for-parameter # sqlalchemy/issues/4656
        session.execute(dashboard_slices.insert(), values)