feat: handle subtle bug with load-examples (#16052)

(cherry picked from commit 69c5cd7922)
This commit is contained in:
Beto Dealmeida
2021-08-03 12:44:44 -07:00
committed by henryyeh
parent a87c5b89d6
commit a4e9e8f87f

View File

@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Tuple
from marshmallow import Schema
from sqlalchemy.orm import Session
from sqlalchemy.orm.exc import MultipleResultsFound
from sqlalchemy.sql import select
from superset import db
@@ -70,7 +71,7 @@ class ImportExamplesCommand(ImportModelsCommand):
db.session.rollback()
raise self.import_error()
# pylint: disable=too-many-locals, arguments-differ
# pylint: disable=too-many-locals, arguments-differ, too-many-branches
@staticmethod
def _import(
session: Session,
@@ -86,19 +87,40 @@ class ImportExamplesCommand(ImportModelsCommand):
database_ids[str(database.uuid)] = database.id
# import datasets
# TODO (betodealmeida): once we have all examples being imported we can
# have a stable UUID for the database stored in the dataset YAML; for
# now we need to fetch the current ID.
examples_id = (
db.session.query(Database).filter_by(database_name="examples").one().id
# If database_uuid is not in the list of UUIDs it means that the examples
# database was created before its UUID was frozen, so it has a random UUID.
# We need to determine its ID so we can point the dataset to it.
examples_db = (
db.session.query(Database).filter_by(database_name="examples").first()
)
dataset_info: Dict[str, Dict[str, Any]] = {}
for file_name, config in configs.items():
if file_name.startswith("datasets/"):
config["database_id"] = examples_id
# find the ID of the corresponding database
if config["database_uuid"] not in database_ids:
if examples_db is None:
raise Exception("Cannot find examples database")
config["database_id"] = examples_db.id
else:
config["database_id"] = database_ids[config["database_uuid"]]
dataset = import_dataset(
session, config, overwrite=overwrite, force_data=force_data
)
try:
dataset = import_dataset(
session, config, overwrite=overwrite, force_data=force_data
)
except MultipleResultsFound:
# Multiple result can be found for datasets. There was a bug in
# load-examples that resulted in datasets being loaded with a NULL
# schema. Users could then add a new dataset with the same name in
# the correct schema, resulting in duplicates, since the uniqueness
# constraint was not enforced correctly in the application logic.
# See https://github.com/apache/superset/issues/16051.
continue
dataset_info[str(dataset.uuid)] = {
"datasource_id": dataset.id,
"datasource_type": "view" if dataset.is_sqllab_view else "table",
@@ -108,7 +130,10 @@ class ImportExamplesCommand(ImportModelsCommand):
# import charts
chart_ids: Dict[str, int] = {}
for file_name, config in configs.items():
if file_name.startswith("charts/"):
if (
file_name.startswith("charts/")
and config["dataset_uuid"] in dataset_info
):
# update datasource id, type, and name
config.update(dataset_info[config["dataset_uuid"]])
chart = import_chart(session, config, overwrite=overwrite)
@@ -123,8 +148,14 @@ class ImportExamplesCommand(ImportModelsCommand):
dashboard_chart_ids: List[Tuple[int, int]] = []
for file_name, config in configs.items():
if file_name.startswith("dashboards/"):
config = update_id_refs(config, chart_ids)
try:
config = update_id_refs(config, chart_ids)
except KeyError:
continue
dashboard = import_dashboard(session, config, overwrite=overwrite)
dashboard.published = True
for uuid in find_chart_uuids(config["position"]):
chart_id = chart_ids[uuid]
if (dashboard.id, chart_id) not in existing_relationships:
@@ -135,5 +166,5 @@ class ImportExamplesCommand(ImportModelsCommand):
{"dashboard_id": dashboard_id, "slice_id": chart_id}
for (dashboard_id, chart_id) in dashboard_chart_ids
]
# pylint: disable=no-value-for-parameter (sqlalchemy/issues/4656)
# pylint: disable=no-value-for-parameter # sqlalchemy/issues/4656
session.execute(dashboard_slices.insert(), values)