fix(mcp): validate dataset exists in generate_explore_link before generating URL (#38951)

This commit is contained in:
Amin Ghadersohi
2026-03-30 15:29:29 +02:00
committed by GitHub
parent aa1a69555b
commit 89f7e5e7ba
2 changed files with 103 additions and 9 deletions

View File

@@ -97,7 +97,38 @@ async def generate_explore_link(
)
try:
await ctx.report_progress(1, 3, "Converting configuration to form data")
await ctx.report_progress(1, 4, "Validating dataset exists")
with event_logger.log_context(action="mcp.generate_explore_link.dataset_check"):
from superset.daos.dataset import DatasetDAO
dataset = None
if isinstance(request.dataset_id, int) or (
isinstance(request.dataset_id, str) and request.dataset_id.isdigit()
):
dataset_id_int = (
int(request.dataset_id)
if isinstance(request.dataset_id, str)
else request.dataset_id
)
dataset = DatasetDAO.find_by_id(dataset_id_int)
else:
dataset = DatasetDAO.find_by_id(request.dataset_id, id_column="uuid")
if not dataset:
await ctx.error(
"Dataset not found: dataset_id=%s" % (request.dataset_id,)
)
return {
"url": "",
"form_data": {},
"form_data_key": None,
"error": (
f"Dataset not found: {request.dataset_id}. "
"Use list_datasets to find valid dataset IDs."
),
}
await ctx.report_progress(2, 4, "Converting configuration to form data")
with event_logger.log_context(action="mcp.generate_explore_link.form_data"):
# Normalize column names to match canonical dataset column names
# This fixes case sensitivity issues (e.g., 'order_date' vs 'OrderDate')
@@ -131,7 +162,7 @@ async def generate_explore_link(
)
)
await ctx.report_progress(2, 3, "Generating explore URL")
await ctx.report_progress(3, 4, "Generating explore URL")
with event_logger.log_context(
action="mcp.generate_explore_link.url_generation"
):
@@ -149,7 +180,7 @@ async def generate_explore_link(
if form_data_key_list:
form_data_key = form_data_key_list[0]
await ctx.report_progress(3, 3, "URL generation complete")
await ctx.report_progress(4, 4, "URL generation complete")
await ctx.info(
"Explore link generated successfully: url_length=%s, dataset_id=%s, "
"form_data_key=%s"

View File

@@ -312,15 +312,19 @@ class TestGenerateExploreLink:
)
mock_create_form_data.assert_called_once()
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@patch(
"superset.mcp_service.commands.create_form_data.MCPCreateFormDataCommand.run"
)
@pytest.mark.asyncio
async def test_generate_explore_link_cache_failure_fallback(
self, mock_create_form_data, mcp_server
self, mock_create_form_data, mock_find_dataset, mcp_server
):
"""Test fallback when form_data cache creation fails."""
mock_create_form_data.side_effect = Exception("Cache storage failed")
mock_find_dataset.return_value = _mock_dataset(id=1)
from superset.commands.exceptions import CommandException
mock_create_form_data.side_effect = CommandException("Cache storage failed")
config = TableChartConfig(
chart_type="table", columns=[ColumnRef(name="test_col")]
@@ -339,16 +343,18 @@ class TestGenerateExploreLink:
== "http://localhost:9001/explore/?datasource_type=table&datasource_id=1"
)
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@patch(
"superset.mcp_service.commands.create_form_data.MCPCreateFormDataCommand.run"
)
@pytest.mark.asyncio
async def test_generate_explore_link_database_lock_fallback(
self, mock_create_form_data, mcp_server
self, mock_create_form_data, mock_find_dataset, mcp_server
):
"""Test fallback when database is locked."""
from sqlalchemy.exc import OperationalError
mock_find_dataset.return_value = _mock_dataset(id=5)
mock_create_form_data.side_effect = OperationalError(
"database is locked", None, None
)
@@ -584,15 +590,19 @@ class TestGenerateExploreLink:
)
mock_create_form_data.assert_called_once()
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@patch(
"superset.mcp_service.commands.create_form_data.MCPCreateFormDataCommand.run"
)
@pytest.mark.asyncio
async def test_fallback_url_different_datasets(
self, mock_create_form_data, mcp_server
self, mock_create_form_data, mock_find_dataset, mcp_server
):
"""Test fallback URLs are correct for different dataset IDs."""
mock_create_form_data.side_effect = Exception(
mock_find_dataset.return_value = _mock_dataset(id=1)
from superset.commands.exceptions import CommandException
mock_create_form_data.side_effect = CommandException(
"Always fail for fallback testing"
)
@@ -612,9 +622,13 @@ class TestGenerateExploreLink:
assert result.data["error"] is None
assert result.data["url"] == expected_url
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@pytest.mark.asyncio
async def test_generate_explore_link_tool_exception_handling(self, mcp_server):
async def test_generate_explore_link_tool_exception_handling(
self, mock_find_dataset, mcp_server
):
"""Test that tool-level exceptions are properly handled and return error."""
mock_find_dataset.return_value = _mock_dataset(id=1)
import sys
# Get the actual module object from sys.modules (not via __init__.py which
@@ -708,6 +722,55 @@ class TestGenerateExploreLink:
# Verify datasource field format: "{dataset_id}__table"
assert result.data["form_data"].get("datasource") == "1__table"
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@pytest.mark.asyncio
async def test_generate_explore_link_nonexistent_dataset(
self, mock_find_dataset, mcp_server
):
"""Test nonexistent dataset_id returns error instead of broken URL."""
mock_find_dataset.return_value = None
config = TableChartConfig(
chart_type="table", columns=[ColumnRef(name="test_col")]
)
request = GenerateExploreLinkRequest(dataset_id="99999", config=config)
async with Client(mcp_server) as client:
result = await client.call_tool(
"generate_explore_link", {"request": request.model_dump()}
)
assert result.data["url"] == ""
assert result.data["form_data"] == {}
assert result.data["form_data_key"] is None
assert "Dataset not found: 99999" in result.data["error"]
assert "list_datasets" in result.data["error"]
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@pytest.mark.asyncio
async def test_generate_explore_link_nonexistent_uuid_dataset(
self, mock_find_dataset, mcp_server
):
"""Test that nonexistent UUID dataset_id returns structured error."""
mock_find_dataset.return_value = None
config = TableChartConfig(
chart_type="table", columns=[ColumnRef(name="test_col")]
)
request = GenerateExploreLinkRequest(
dataset_id="00000000-0000-0000-0000-000000000000", config=config
)
async with Client(mcp_server) as client:
result = await client.call_tool(
"generate_explore_link", {"request": request.model_dump()}
)
assert result.data["url"] == ""
assert result.data["form_data"] == {}
assert result.data["form_data_key"] is None
assert "Dataset not found" in result.data["error"]
class TestGenerateExploreLinkColumnNormalization:
"""Tests that generate_explore_link normalizes column names.