From cc1c307c4f240a4d83a47b32c4a21c5fbfa9388e Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 5 May 2026 22:20:47 -0700 Subject: [PATCH] fix: strip instance-specific keys from chart export (#32972) The v1 export currently emits chart `params.datasource`, `params.slice_id`, `params.dashboards`, and the datasource references inside `query_context` even though the import path immediately overwrites them in `update_chart_config_dataset`. That makes round-tripping an export through a second environment produce a non-equal yaml diff for unchanged content. Strip those instance-specific keys from `params`, and remove the top-level / form_data / queries-level `datasource` references inside `query_context` (the rest of `query_context` carries real chart logic and is preserved). The dashboard `position.*.chartId` leak called out in the same issue is left alone here because removing it requires also migrating `metadata.{timed_refresh_immune_slices,filter_scopes, expanded_slices,default_filters}` chart-id references to UUIDs. Co-Authored-By: Claude --- superset/commands/chart/export.py | 44 +++++- .../unit_tests/charts/commands/export_test.py | 139 ++++++++++++++++++ 2 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 tests/unit_tests/charts/commands/export_test.py diff --git a/superset/commands/chart/export.py b/superset/commands/chart/export.py index 50b06cb30de..ee8801283d6 100644 --- a/superset/commands/chart/export.py +++ b/superset/commands/chart/export.py @@ -37,9 +37,40 @@ from superset.extensions import feature_flag_manager logger = logging.getLogger(__name__) -# keys present in the standard export that are not needed +# top-level keys present in the standard export that are not needed: +# they are either rewritten on import or contain instance-specific values. REMOVE_KEYS = ["datasource_type", "datasource_name", "url_params"] +# keys inside `params` that point at the source instance and are +# overwritten on import (see `update_chart_config_dataset`). +REMOVE_PARAMS_KEYS = ["datasource", "slice_id", "dashboards", "url_params"] + + +def _strip_query_context_datasource(query_context: str | None) -> str | None: + """Remove instance-specific datasource references from a query_context. + + The datasource id/type baked into ``query_context`` (top-level, queries, + and form_data) is rewritten on import via ``update_chart_config_dataset``, + so it is only noise in the export. + """ + if not query_context: + return query_context + try: + parsed = json.loads(query_context) + except (json.JSONDecodeError, TypeError): + logger.info("Unable to decode `query_context` field: %s", query_context) + return query_context + + parsed.pop("datasource", None) + if isinstance(parsed.get("form_data"), dict): + parsed["form_data"].pop("datasource", None) + parsed["form_data"].pop("slice_id", None) + for query in parsed.get("queries", []) or []: + if isinstance(query, dict): + query.pop("datasource", None) + + return json.dumps(parsed) + class ExportChartsCommand(ExportModelsCommand): dao = ChartDAO @@ -64,9 +95,18 @@ class ExportChartsCommand(ExportModelsCommand): key: value for key, value in payload.items() if key not in REMOVE_KEYS } + if payload.get("query_context"): + payload["query_context"] = _strip_query_context_datasource( + payload["query_context"] + ) + if payload.get("params"): try: - payload["params"] = json.loads(payload["params"]) + params = json.loads(payload["params"]) + if isinstance(params, dict): + for key in REMOVE_PARAMS_KEYS: + params.pop(key, None) + payload["params"] = params except json.JSONDecodeError: logger.info("Unable to decode `params` field: %s", payload["params"]) diff --git a/tests/unit_tests/charts/commands/export_test.py b/tests/unit_tests/charts/commands/export_test.py new file mode 100644 index 00000000000..e40153baa73 --- /dev/null +++ b/tests/unit_tests/charts/commands/export_test.py @@ -0,0 +1,139 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=import-outside-toplevel, unused-argument, unused-import + +from typing import Any, Optional + +from sqlalchemy.orm.session import Session + +from superset import db +from superset.utils import json + + +def _build_chart( + name: str, + params: dict[str, Any], + query_context: Optional[dict[str, Any]] = None, +) -> Any: + from superset.connectors.sqla.models import SqlaTable, TableColumn + from superset.models.core import Database + from superset.models.slice import Slice + + database = Database(database_name=f"db_{name}", sqlalchemy_uri="sqlite://") + db.session.add(database) + db.session.flush() + table = SqlaTable( + table_name=f"tbl_{name}", + columns=[TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP")], + database=database, + ) + db.session.add(table) + db.session.flush() + chart = Slice( + slice_name=name, + viz_type="table", + datasource_id=table.id, + datasource_type="table", + datasource_name=table.table_name, + params=json.dumps(params), + query_context=json.dumps(query_context) if query_context else None, + ) + db.session.add(chart) + db.session.flush() + return chart + + +def test_export_strips_instance_specific_params(session: Session) -> None: + """Chart export should strip instance-specific keys from params.""" + from superset.commands.chart.export import ExportChartsCommand + from superset.connectors.sqla.models import SqlaTable + + engine = db.session.get_bind() + SqlaTable.metadata.create_all(engine) # pylint: disable=no-member + + chart = _build_chart( + "leaky_chart", + params={ + "datasource": "1__table", + "slice_id": 42, + "dashboards": [1, 2, 3], + "url_params": {"foo": "bar"}, + "metric": "count", + "row_limit": 1000, + }, + ) + + file_name, content_fn = next( + ExportChartsCommand._export( # pylint: disable=protected-access + chart, export_related=False + ) + ) + import yaml + + payload = yaml.safe_load(content_fn()) + assert payload["params"] == {"metric": "count", "row_limit": 1000} + + +def test_export_strips_query_context_datasource(session: Session) -> None: + """Chart export should strip datasource refs from query_context.""" + from superset.commands.chart.export import ExportChartsCommand + from superset.connectors.sqla.models import SqlaTable + + engine = db.session.get_bind() + SqlaTable.metadata.create_all(engine) # pylint: disable=no-member + + chart = _build_chart( + "qc_chart", + params={"viz_type": "table"}, + query_context={ + "datasource": {"id": 1, "type": "table"}, + "form_data": { + "datasource": "1__table", + "slice_id": 42, + "viz_type": "table", + }, + "queries": [ + {"datasource": {"id": 1, "type": "table"}, "metrics": ["count"]}, + ], + }, + ) + + file_name, content_fn = next( + ExportChartsCommand._export( # pylint: disable=protected-access + chart, export_related=False + ) + ) + import yaml + + payload = yaml.safe_load(content_fn()) + qc = json.loads(payload["query_context"]) + assert "datasource" not in qc + assert "datasource" not in qc["form_data"] + assert "slice_id" not in qc["form_data"] + assert qc["form_data"]["viz_type"] == "table" + assert "datasource" not in qc["queries"][0] + assert qc["queries"][0]["metrics"] == ["count"] + + +def test_export_preserves_non_dict_params(session: Session) -> None: + """Non-dict params (e.g. legacy strings) should round-trip unchanged.""" + from superset.commands.chart.export import _strip_query_context_datasource + + # _strip_query_context_datasource should be a no-op on falsy / undecodable input. + assert _strip_query_context_datasource(None) is None + assert _strip_query_context_datasource("") == "" + assert _strip_query_context_datasource("not-json") == "not-json"