From 2f95d288dd71e2df8f256875d6b768111be8b65e Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 21 May 2026 16:34:38 +0300 Subject: [PATCH] fix(mcp): eager-load dataset.metrics to prevent Excel export DetachedInstanceError (#39483) Co-authored-by: Claude Opus 4.7 (1M context) --- superset/mcp_service/chart/chart_helpers.py | 15 +- .../mcp_service/chart/tool/get_chart_data.py | 18 ++- .../chart/tool/test_get_chart_data.py | 151 ++++++++++++++++++ 3 files changed, 179 insertions(+), 5 deletions(-) diff --git a/superset/mcp_service/chart/chart_helpers.py b/superset/mcp_service/chart/chart_helpers.py index b6f2b5b30dc..fca0f1087a2 100644 --- a/superset/mcp_service/chart/chart_helpers.py +++ b/superset/mcp_service/chart/chart_helpers.py @@ -44,20 +44,29 @@ QUERY_CONTEXT_EXTRA_FORM_DATA_OVERRIDE_KEYS = { } -def find_chart_by_identifier(identifier: int | str) -> Slice | None: +def find_chart_by_identifier( + identifier: int | str, + query_options: list[Any] | None = None, +) -> Slice | None: """Find a chart by numeric ID or UUID string. Accepts an integer ID, a string that looks like a digit (e.g. "123"), or a UUID string. Returns the Slice model instance or None. + + ``query_options`` is forwarded to the DAO so callers can eager-load + relationships needed after the request-scoped session is detached. """ from superset.daos.chart import ChartDAO # avoid circular import + extra: dict[str, Any] = ( + {"query_options": query_options} if query_options is not None else {} + ) if isinstance(identifier, int) or ( isinstance(identifier, str) and identifier.isdigit() ): chart_id = int(identifier) if isinstance(identifier, str) else identifier - return ChartDAO.find_by_id(chart_id) - return ChartDAO.find_by_id(identifier, id_column="uuid") + return ChartDAO.find_by_id(chart_id, **extra) + return ChartDAO.find_by_id(identifier, id_column="uuid", **extra) def get_cached_form_data(form_data_key: str) -> str | None: diff --git a/superset/mcp_service/chart/tool/get_chart_data.py b/superset/mcp_service/chart/tool/get_chart_data.py index 2d1b03db8fa..f4d3ac257b4 100644 --- a/superset/mcp_service/chart/tool/get_chart_data.py +++ b/superset/mcp_service/chart/tool/get_chart_data.py @@ -26,6 +26,7 @@ from typing import Any, Dict, List, TYPE_CHECKING from fastmcp import Context from flask import current_app from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import subqueryload from superset_core.mcp.decorators import tool, ToolAnnotations if TYPE_CHECKING: @@ -350,7 +351,18 @@ async def get_chart_data( # noqa: C901 # Build query context entirely from cached form_data return await _query_from_form_data(cached_form_data_dict, request, ctx) - # Find the chart by identifier + # Find the chart by identifier. + # Eagerly load the dataset's metrics relationship so Excel export + # (which may run after the request-scoped session is detached) can + # access dataset.metrics without triggering a lazy load. See + # apache/superset#39206 for the analogous database eager-load fix. + from superset.connectors.sqla.models import SqlaTable + from superset.models.slice import Slice + + chart_query_options = [ + subqueryload(Slice.table).subqueryload(SqlaTable.metrics), + ] + with event_logger.log_context(action="mcp.get_chart_data.chart_lookup"): await ctx.debug("Looking up chart: identifier=%s" % (request.identifier,)) if request.identifier is None: @@ -358,7 +370,9 @@ async def get_chart_data( # noqa: C901 error="Chart identifier is required", error_type="ValidationError", ) - chart = find_chart_by_identifier(request.identifier) + chart = find_chart_by_identifier( + request.identifier, query_options=chart_query_options + ) if not chart: await ctx.warning("Chart not found: identifier=%s" % (request.identifier,)) diff --git a/tests/unit_tests/mcp_service/chart/tool/test_get_chart_data.py b/tests/unit_tests/mcp_service/chart/tool/test_get_chart_data.py index 7a368093313..2f46fbd5c57 100644 --- a/tests/unit_tests/mcp_service/chart/tool/test_get_chart_data.py +++ b/tests/unit_tests/mcp_service/chart/tool/test_get_chart_data.py @@ -1173,6 +1173,157 @@ class TestChartDataCommandValidation: mock_command.run.assert_not_called() +@pytest.fixture +def mcp_server(): + from superset.mcp_service.app import mcp + + return mcp + + +@pytest.fixture +def mock_auth(): + """Mock MCP auth so Client.call_tool() doesn't need a real admin user.""" + import importlib + from contextlib import contextmanager + from unittest.mock import Mock, patch + + _gcd_module = importlib.import_module( + "superset.mcp_service.chart.tool.get_chart_data" + ) + + @contextmanager + def _noop_log_context(*_args: Any, **_kwargs: Any) -> Any: + yield lambda **_kw: None + + # Neutralize event_logger.log_context: the default DBEventLogger would + # otherwise insert a log row referencing our mock user_id and fail a + # FK constraint against the real users table. Patch via the module + # object directly — the `tool` package's __init__.py re-exports the + # get_chart_data function under the same name, which shadows the + # submodule binding in the package namespace, so a dotted-string patch + # target resolves to the function and mock.patch cannot find + # event_logger on it. + mock_event_logger = Mock() + mock_event_logger.log_context.side_effect = _noop_log_context + with ( + patch("superset.mcp_service.auth.get_user_from_request") as mock_get_user, + patch.object(_gcd_module, "event_logger", mock_event_logger), + ): + user = Mock() + user.id = 1 + user.username = "admin" + mock_get_user.return_value = user + yield mock_get_user + + +def _extract_metrics_load_path(load_opt: Any) -> list[str]: + """Walk a SQLAlchemy Load option and return the attr chain. + + e.g. subqueryload(Slice.table).subqueryload(SqlaTable.metrics) + -> ["table", "metrics"] + """ + path = getattr(load_opt, "path", ()) + return [elem.key for elem in path if hasattr(elem, "key")] + + +class TestChartLookupEagerLoading: + """Tests that get_chart_data eager-loads dataset.metrics on chart lookup. + + Regression tests for the Excel export DetachedInstanceError on + dataset.metrics. The chart's dataset metrics relationship must be + eager-loaded at fetch time so it remains accessible during Excel export + after the request-scoped session is detached. + """ + + @pytest.mark.asyncio + async def test_numeric_id_lookup_passes_metrics_eager_load( + self, mcp_server, mock_auth + ): + """Integer identifier lookup must eager-load Slice.table.metrics.""" + from unittest.mock import patch + + from fastmcp import Client + + with patch( + "superset.daos.chart.ChartDAO.find_by_id", return_value=None + ) as mock_find: + async with Client(mcp_server) as client: + await client.call_tool( + "get_chart_data", + {"request": {"identifier": 42, "format": "excel"}}, + ) + + mock_find.assert_called_once() + call = mock_find.call_args + assert call.args == (42,) + query_options = call.kwargs.get("query_options") + assert query_options is not None, ( + "Chart lookup must pass query_options for eager-loading." + ) + assert len(query_options) == 1 + load_path = _extract_metrics_load_path(query_options[0]) + assert load_path == ["table", "metrics"], ( + f"Expected subqueryload chain 'table' -> 'metrics', got {load_path}" + ) + + @pytest.mark.asyncio + async def test_uuid_lookup_passes_metrics_eager_load(self, mcp_server, mock_auth): + """UUID identifier lookup must also eager-load Slice.table.metrics.""" + from unittest.mock import patch + + from fastmcp import Client + + uuid = "a1b2c3d4-5678-90ab-cdef-1234567890ab" + + with patch( + "superset.daos.chart.ChartDAO.find_by_id", return_value=None + ) as mock_find: + async with Client(mcp_server) as client: + await client.call_tool( + "get_chart_data", + {"request": {"identifier": uuid, "format": "excel"}}, + ) + + mock_find.assert_called_once() + call = mock_find.call_args + assert call.args == (uuid,) + assert call.kwargs.get("id_column") == "uuid" + query_options = call.kwargs.get("query_options") + assert query_options is not None, ( + "UUID chart lookup must pass query_options for eager-loading." + ) + load_path = _extract_metrics_load_path(query_options[0]) + assert load_path == ["table", "metrics"], ( + f"Expected subqueryload chain 'table' -> 'metrics', got {load_path}" + ) + + @pytest.mark.asyncio + async def test_json_format_also_eager_loads_metrics(self, mcp_server, mock_auth): + """Eager-load is applied for every format, not just Excel. + + Applying unconditionally keeps the fix robust if additional code paths + start touching dataset.metrics, and avoids branching behavior that + would be easy to regress on. + """ + from unittest.mock import patch + + from fastmcp import Client + + with patch( + "superset.daos.chart.ChartDAO.find_by_id", return_value=None + ) as mock_find: + async with Client(mcp_server) as client: + await client.call_tool( + "get_chart_data", + {"request": {"identifier": 7, "format": "json"}}, + ) + + call = mock_find.call_args + query_options = call.kwargs.get("query_options") + assert query_options is not None + assert _extract_metrics_load_path(query_options[0]) == ["table", "metrics"] + + # --------------------------------------------------------------------------- # Tests for _recommend_visualizations # ---------------------------------------------------------------------------