feat: handle subtle bug with load-examples (#16052)

(cherry picked from commit 69c5cd7922)
This commit is contained in:
Beto Dealmeida
2021-08-03 12:44:44 -07:00
committed by henryyeh
parent a87c5b89d6
commit a4e9e8f87f

View File

@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Tuple
from marshmallow import Schema from marshmallow import Schema
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy.orm.exc import MultipleResultsFound
from sqlalchemy.sql import select from sqlalchemy.sql import select
from superset import db from superset import db
@@ -70,7 +71,7 @@ class ImportExamplesCommand(ImportModelsCommand):
db.session.rollback() db.session.rollback()
raise self.import_error() raise self.import_error()
# pylint: disable=too-many-locals, arguments-differ # pylint: disable=too-many-locals, arguments-differ, too-many-branches
@staticmethod @staticmethod
def _import( def _import(
session: Session, session: Session,
@@ -86,19 +87,40 @@ class ImportExamplesCommand(ImportModelsCommand):
database_ids[str(database.uuid)] = database.id database_ids[str(database.uuid)] = database.id
# import datasets # import datasets
# TODO (betodealmeida): once we have all examples being imported we can # If database_uuid is not in the list of UUIDs it means that the examples
# have a stable UUID for the database stored in the dataset YAML; for # database was created before its UUID was frozen, so it has a random UUID.
# now we need to fetch the current ID. # We need to determine its ID so we can point the dataset to it.
examples_id = ( examples_db = (
db.session.query(Database).filter_by(database_name="examples").one().id db.session.query(Database).filter_by(database_name="examples").first()
) )
dataset_info: Dict[str, Dict[str, Any]] = {} dataset_info: Dict[str, Dict[str, Any]] = {}
for file_name, config in configs.items(): for file_name, config in configs.items():
if file_name.startswith("datasets/"): if file_name.startswith("datasets/"):
config["database_id"] = examples_id # find the ID of the corresponding database
if config["database_uuid"] not in database_ids:
if examples_db is None:
raise Exception("Cannot find examples database")
config["database_id"] = examples_db.id
else:
config["database_id"] = database_ids[config["database_uuid"]]
dataset = import_dataset( dataset = import_dataset(
session, config, overwrite=overwrite, force_data=force_data session, config, overwrite=overwrite, force_data=force_data
) )
try:
dataset = import_dataset(
session, config, overwrite=overwrite, force_data=force_data
)
except MultipleResultsFound:
# Multiple result can be found for datasets. There was a bug in
# load-examples that resulted in datasets being loaded with a NULL
# schema. Users could then add a new dataset with the same name in
# the correct schema, resulting in duplicates, since the uniqueness
# constraint was not enforced correctly in the application logic.
# See https://github.com/apache/superset/issues/16051.
continue
dataset_info[str(dataset.uuid)] = { dataset_info[str(dataset.uuid)] = {
"datasource_id": dataset.id, "datasource_id": dataset.id,
"datasource_type": "view" if dataset.is_sqllab_view else "table", "datasource_type": "view" if dataset.is_sqllab_view else "table",
@@ -108,7 +130,10 @@ class ImportExamplesCommand(ImportModelsCommand):
# import charts # import charts
chart_ids: Dict[str, int] = {} chart_ids: Dict[str, int] = {}
for file_name, config in configs.items(): for file_name, config in configs.items():
if file_name.startswith("charts/"): if (
file_name.startswith("charts/")
and config["dataset_uuid"] in dataset_info
):
# update datasource id, type, and name # update datasource id, type, and name
config.update(dataset_info[config["dataset_uuid"]]) config.update(dataset_info[config["dataset_uuid"]])
chart = import_chart(session, config, overwrite=overwrite) chart = import_chart(session, config, overwrite=overwrite)
@@ -123,8 +148,14 @@ class ImportExamplesCommand(ImportModelsCommand):
dashboard_chart_ids: List[Tuple[int, int]] = [] dashboard_chart_ids: List[Tuple[int, int]] = []
for file_name, config in configs.items(): for file_name, config in configs.items():
if file_name.startswith("dashboards/"): if file_name.startswith("dashboards/"):
config = update_id_refs(config, chart_ids) try:
config = update_id_refs(config, chart_ids)
except KeyError:
continue
dashboard = import_dashboard(session, config, overwrite=overwrite) dashboard = import_dashboard(session, config, overwrite=overwrite)
dashboard.published = True
for uuid in find_chart_uuids(config["position"]): for uuid in find_chart_uuids(config["position"]):
chart_id = chart_ids[uuid] chart_id = chart_ids[uuid]
if (dashboard.id, chart_id) not in existing_relationships: if (dashboard.id, chart_id) not in existing_relationships:
@@ -135,5 +166,5 @@ class ImportExamplesCommand(ImportModelsCommand):
{"dashboard_id": dashboard_id, "slice_id": chart_id} {"dashboard_id": dashboard_id, "slice_id": chart_id}
for (dashboard_id, chart_id) in dashboard_chart_ids for (dashboard_id, chart_id) in dashboard_chart_ids
] ]
# pylint: disable=no-value-for-parameter (sqlalchemy/issues/4656) # pylint: disable=no-value-for-parameter # sqlalchemy/issues/4656
session.execute(dashboard_slices.insert(), values) session.execute(dashboard_slices.insert(), values)