diff --git a/UPDATING.md b/UPDATING.md index 0680e44fb61..a175f7f440b 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -23,6 +23,7 @@ This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. ## Next +- [34782](https://github.com/apache/superset/pull/34782): Dataset exports now include the dataset ID in their file name (similar to charts and dashboards). If managing assets as code, make sure to rename existing dataset YAMLs to include the ID (and avoid duplicated files). - [34536](https://github.com/apache/superset/pull/34536): The `ENVIRONMENT_TAG_CONFIG` color values have changed to support only Ant Design semantic colors. Update your `superset_config.py`: - Change `"error.base"` to just `"error"` after this PR - Change any hex color values to one of: `"success"`, `"processing"`, `"error"`, `"warning"`, `"default"` diff --git a/superset/commands/dataset/export.py b/superset/commands/dataset/export.py index 70cce0291a0..5422f821577 100644 --- a/superset/commands/dataset/export.py +++ b/superset/commands/dataset/export.py @@ -46,7 +46,7 @@ class ExportDatasetsCommand(ExportModelsCommand): db_file_name = get_filename( model.database.database_name, model.database.id, skip_id=True ) - ds_file_name = get_filename(model.table_name, model.id, skip_id=True) + ds_file_name = get_filename(model.table_name, model.id) return f"datasets/{db_file_name}/{ds_file_name}.yaml" @staticmethod diff --git a/tests/integration_tests/charts/commands_tests.py b/tests/integration_tests/charts/commands_tests.py index f50a9ea9685..6eec119ca9a 100644 --- a/tests/integration_tests/charts/commands_tests.py +++ b/tests/integration_tests/charts/commands_tests.py @@ -74,7 +74,7 @@ class TestExportChartsCommand(SupersetTestCase): expected = [ "metadata.yaml", f"charts/Energy_Sankey_{example_chart.id}.yaml", - "datasets/examples/energy_usage.yaml", + f"datasets/examples/energy_usage_{example_chart.table.id}.yaml", "databases/examples.yaml", ] assert expected == list(contents.keys()) diff --git a/tests/integration_tests/dashboards/commands_tests.py b/tests/integration_tests/dashboards/commands_tests.py index bcf46a36cb5..fc2f89ce448 100644 --- a/tests/integration_tests/dashboards/commands_tests.py +++ b/tests/integration_tests/dashboards/commands_tests.py @@ -81,12 +81,15 @@ class TestExportDashboardsCommand(SupersetTestCase): expected_paths = { "metadata.yaml", f"dashboards/World_Banks_Data_{example_dashboard.id}.yaml", - "datasets/examples/wb_health_population.yaml", "databases/examples.yaml", } for chart in example_dashboard.slices: chart_slug = secure_filename(chart.slice_name) expected_paths.add(f"charts/{chart_slug}_{chart.id}.yaml") + dataset_slug = secure_filename(chart.table.table_name) + expected_paths.add( + f"datasets/examples/{dataset_slug}_{chart.table.id}.yaml" + ) assert expected_paths == set(contents.keys()) metadata = yaml.safe_load( diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py index 2316414a12c..2a26f418cc0 100644 --- a/tests/integration_tests/datasets/api_tests.py +++ b/tests/integration_tests/datasets/api_tests.py @@ -2390,6 +2390,63 @@ class TestDatasetApi(SupersetTestCase): # gamma users by default do not have access to this dataset assert rv.status_code in (403, 404) + def test_export_dataset_bundle_with_id_in_filename(self): + """ + Dataset API: Test that exported dataset filenames include the dataset ID + to prevent filename collisions when datasets have identical names. + """ + # Test fails for SQLite because of same table name + if backend() == "sqlite": + return + + first_connection = self.insert_database("test_db_connection_1") + second_connection = self.insert_database("test_db_connection_2") + first_dataset = self.insert_dataset( + table_name="test_dataset", + owners=[], + database=first_connection, + fetch_metadata=False, + ) + second_dataset = self.insert_dataset( + table_name="test_dataset", + owners=[], + database=second_connection, + fetch_metadata=False, + ) + + self.items_to_delete = [ + first_dataset, + second_dataset, + first_connection, + second_connection, + ] + + self.login(ADMIN_USERNAME) + argument = [first_dataset.id, second_dataset.id] + uri = f"api/v1/dataset/export/?q={prison.dumps(argument)}" + rv = self.get_assert_metric(uri, "export") + + assert rv.status_code == 200 + + buf = BytesIO(rv.data) + assert is_zipfile(buf) + + with ZipFile(buf, "r") as zip_file: + filenames = zip_file.namelist() + + assert any( + filename.endswith( + f"datasets/test_db_connection_1/test_dataset_{first_dataset.id}.yaml" + ) + for filename in filenames + ) + assert any( + filename.endswith( + f"datasets/test_db_connection_2/test_dataset_{second_dataset.id}.yaml" + ) + for filename in filenames + ) + @unittest.skip("Number of related objects depend on DB") @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") def test_get_dataset_related_objects(self): diff --git a/tests/integration_tests/datasets/commands_tests.py b/tests/integration_tests/datasets/commands_tests.py index fb802bd1bf8..a7ce72b5fad 100644 --- a/tests/integration_tests/datasets/commands_tests.py +++ b/tests/integration_tests/datasets/commands_tests.py @@ -78,11 +78,13 @@ class TestExportDatasetsCommand(SupersetTestCase): assert list(contents.keys()) == [ "metadata.yaml", - "datasets/examples/energy_usage.yaml", + f"datasets/examples/energy_usage_{example_dataset.id}.yaml", "databases/examples.yaml", ] - metadata = yaml.safe_load(contents["datasets/examples/energy_usage.yaml"]()) + metadata = yaml.safe_load( + contents[f"datasets/examples/energy_usage_{example_dataset.id}.yaml"]() + ) # sort columns for deterministic comparison metadata["columns"] = sorted(metadata["columns"], key=itemgetter("column_name")) @@ -218,7 +220,9 @@ class TestExportDatasetsCommand(SupersetTestCase): command = ExportDatasetsCommand([example_dataset.id]) contents = dict(command.run()) - metadata = yaml.safe_load(contents["datasets/examples/energy_usage.yaml"]()) + metadata = yaml.safe_load( + contents[f"datasets/examples/energy_usage_{example_dataset.id}.yaml"]() + ) assert list(metadata.keys()) == [ "table_name", "main_dttm_col", @@ -261,7 +265,7 @@ class TestExportDatasetsCommand(SupersetTestCase): assert list(contents.keys()) == [ "metadata.yaml", - "datasets/examples/energy_usage.yaml", + f"datasets/examples/energy_usage_{example_dataset.id}.yaml", ] diff --git a/tests/unit_tests/datasets/commands/export_test.py b/tests/unit_tests/datasets/commands/export_test.py index 04bc989ba5b..47d72bf9d44 100644 --- a/tests/unit_tests/datasets/commands/export_test.py +++ b/tests/unit_tests/datasets/commands/export_test.py @@ -132,6 +132,10 @@ def test_export(session: Session) -> None: extra=json.dumps({"warning_markdown": "*WARNING*"}), ) + # Add the table to the session and flush to get an ID + db.session.add(sqla_table) + db.session.flush() + export = [ (file[0], file[1]()) for file in list( @@ -148,7 +152,7 @@ def test_export(session: Session) -> None: assert export == [ ( - "datasets/my_database/my_table.yaml", + f"datasets/my_database/my_table_{sqla_table.id}.yaml", f"""table_name: my_table main_dttm_col: ds description: This is the description