Compare commits

...

3 Commits

Author SHA1 Message Date
Amin Ghadersohi
c0ef691886 test(mcp): add created_by_fk_or_owner to filter mocks and dashboard test
Add `created_by_fk_or_owner` to the mock return values in the chart and
dataset self-referencing filter tests so the exclusion is actually exercised
(previously the assertion was vacuously true since the key was never in the
mock). Also add a matching test for the dashboard schema core, which received
the same exclude_filter_columns fix but had no corresponding coverage.
2026-05-07 15:37:16 +00:00
Elizabeth Thompson
f6ef071d2b test(mcp): add get_schema filter_columns exclusion tests for chart and dataset
The existing test only covered dashboard. Add parallel tests for chart and
dataset to verify that created_by_fk, owner, and created_by_fk_or_owner are
excluded from filter_columns even when the DAO returns them.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 23:43:52 +00:00
Elizabeth Thompson
373c366907 fix(mcp): exclude self-referencing filter columns from get_schema output
After #39638, chart/dataset/dashboard ModelGetSchemaCore instances had no
exclude_filter_columns set, so get_schema(model_type='chart|dataset|dashboard')
still advertised created_by_fk and owner in filter_columns. LLMs would call
get_schema to discover available filters, see those columns, and pass real user
IDs directly — bypassing the created_by_me/owned_by_me server-side injection
added in #39638.

Pass SELF_REFERENCING_FILTER_COLUMNS as exclude_filter_columns for chart,
dataset, and dashboard schema cores (the database core already excluded its
user-directory columns via DATABASE_EXCLUDE_COLUMNS).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 23:10:32 +00:00
2 changed files with 91 additions and 0 deletions

View File

@@ -56,6 +56,7 @@ from superset.mcp_service.mcp_core import ModelGetSchemaCore
from superset.mcp_service.privacy import (
PrivacyError,
remove_chart_data_model_columns,
SELF_REFERENCING_FILTER_COLUMNS,
user_can_view_data_model_metadata,
)
@@ -77,6 +78,7 @@ def _get_chart_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]:
search_columns=CHART_SEARCH_COLUMNS,
default_sort="changed_on",
default_sort_direction="desc",
exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS),
logger=logger,
)
@@ -96,6 +98,7 @@ def _get_dataset_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]:
search_columns=DATASET_SEARCH_COLUMNS,
default_sort="changed_on",
default_sort_direction="desc",
exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS),
logger=logger,
)
@@ -115,6 +118,7 @@ def _get_dashboard_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]:
search_columns=DASHBOARD_SEARCH_COLUMNS,
default_sort="changed_on",
default_sort_direction="desc",
exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS),
logger=logger,
)

View File

@@ -355,6 +355,93 @@ class TestGetSchemaToolViaClient:
assert field not in info["filter_columns"]
assert field not in info["sortable_columns"]
@patch("superset.daos.chart.ChartDAO.get_filterable_columns_and_operators")
@pytest.mark.asyncio
async def test_get_schema_chart_omits_self_referencing_filter_columns(
self, mock_filters, mcp_server
):
"""Test that chart schema does not advertise self-referencing filter columns.
Even if the DAO returns created_by_fk or owner, they must be excluded so
LLMs cannot discover and use them to enumerate user IDs.
"""
mock_filters.return_value = {
"slice_name": ["eq", "ilike"],
"created_by_fk": ["eq"],
"owner": ["eq", "in"],
"created_by_fk_or_owner": ["eq"],
}
async with Client(mcp_server) as client:
result = await client.call_tool(
"get_schema", {"request": {"model_type": "chart"}}
)
data = json.loads(result.content[0].text)
info = data["schema_info"]
assert "slice_name" in info["filter_columns"]
for field in ("created_by_fk", "owner", "created_by_fk_or_owner"):
assert field not in info["filter_columns"]
@patch("superset.daos.dataset.DatasetDAO.get_filterable_columns_and_operators")
@pytest.mark.asyncio
async def test_get_schema_dataset_omits_self_referencing_filter_columns(
self, mock_filters, mcp_server
):
"""Test that dataset schema does not advertise self-referencing filter columns.
Even if the DAO returns created_by_fk or owner, they must be excluded so
LLMs cannot discover and use them to enumerate user IDs.
"""
mock_filters.return_value = {
"table_name": ["eq", "ilike"],
"created_by_fk": ["eq"],
"owner": ["eq", "in"],
"created_by_fk_or_owner": ["eq"],
}
async with Client(mcp_server) as client:
result = await client.call_tool(
"get_schema", {"request": {"model_type": "dataset"}}
)
data = json.loads(result.content[0].text)
info = data["schema_info"]
assert "table_name" in info["filter_columns"]
for field in ("created_by_fk", "owner", "created_by_fk_or_owner"):
assert field not in info["filter_columns"]
@patch("superset.daos.dashboard.DashboardDAO.get_filterable_columns_and_operators")
@pytest.mark.asyncio
async def test_get_schema_dashboard_omits_self_referencing_filter_columns(
self, mock_filters, mcp_server
):
"""Test dashboard schema omits self-referencing filter columns.
Even if the DAO returns created_by_fk or owner, they must be excluded
so LLMs cannot discover and use them to enumerate user IDs.
"""
mock_filters.return_value = {
"dashboard_title": ["eq", "ilike"],
"created_by_fk": ["eq"],
"owner": ["eq", "in"],
"created_by_fk_or_owner": ["eq"],
}
async with Client(mcp_server) as client:
result = await client.call_tool(
"get_schema", {"request": {"model_type": "dashboard"}}
)
data = json.loads(result.content[0].text)
info = data["schema_info"]
assert "dashboard_title" in info["filter_columns"]
for field in ("created_by_fk", "owner", "created_by_fk_or_owner"):
assert field not in info["filter_columns"]
class TestGetSchemaEdgeCases:
"""Test edge cases for get_schema tool."""