mirror of
https://github.com/apache/superset.git
synced 2026-05-13 11:55:16 +00:00
Compare commits
5 Commits
ts6-migrat
...
fix/mcp-li
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18e8217498 | ||
|
|
32234ea0bb | ||
|
|
a2b0f64176 | ||
|
|
4d66dc0774 | ||
|
|
5542a2f3b1 |
@@ -383,6 +383,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"superset/mcp_service/app.py" = ["S608", "E501"] # LLM instruction text: SQL examples (S608) and long lines in multiline string (E501)
|
||||
"superset/mcp_service/*/tool/list_*.py" = ["E501"] # LLM docstring examples show full request shapes which exceed line length
|
||||
"scripts/*" = ["TID251"]
|
||||
"setup.py" = ["TID251"]
|
||||
"superset/config.py" = ["TID251"]
|
||||
|
||||
@@ -537,8 +537,11 @@ class ChartFilter(ColumnOperator):
|
||||
"datasource_name",
|
||||
] = Field(
|
||||
...,
|
||||
description="Column to filter on. Use get_schema(model_type='chart') for "
|
||||
"available filter columns.",
|
||||
description=(
|
||||
"Column to filter on. Valid values: 'slice_name', 'viz_type', "
|
||||
"'datasource_name'. Other column names are not valid filter columns "
|
||||
"and will cause a validation error."
|
||||
),
|
||||
)
|
||||
opr: ColumnOperatorEnum = Field(
|
||||
...,
|
||||
|
||||
@@ -91,8 +91,24 @@ async def list_charts(
|
||||
Returns chart metadata including id, name, viz_type, URL, and last
|
||||
modified time.
|
||||
|
||||
Sortable columns for order_column: id, slice_name, viz_type, description,
|
||||
changed_on, created_on
|
||||
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
|
||||
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
|
||||
keyword arguments — they will be rejected. Use the ``request`` wrapper::
|
||||
|
||||
# Correct usage
|
||||
list_charts(request={"search": "revenue", "page": 1, "page_size": 10})
|
||||
list_charts(request={"filters": [{"col": "slice_name", "opr": "sw", "value": "sales"}]})
|
||||
list_charts() # no arguments returns first page with defaults
|
||||
|
||||
# Wrong — causes pydantic validation errors
|
||||
list_charts(search="revenue", page=1) # DO NOT DO THIS
|
||||
|
||||
Valid filter columns for ``filters[].col``:
|
||||
``slice_name``, ``viz_type``, ``datasource_name``
|
||||
|
||||
Sortable columns for ``order_column``:
|
||||
``id``, ``slice_name``, ``viz_type``, ``description``,
|
||||
``changed_on``, ``created_on``
|
||||
"""
|
||||
request = request or _DEFAULT_LIST_CHARTS_REQUEST.model_copy(deep=True)
|
||||
await ctx.info(
|
||||
|
||||
@@ -176,8 +176,9 @@ class DashboardFilter(ColumnOperator):
|
||||
] = Field(
|
||||
...,
|
||||
description=(
|
||||
"Column to filter on. Use "
|
||||
"get_schema(model_type='dashboard') for available filter columns."
|
||||
"Column to filter on. Valid values: 'dashboard_title', 'published', "
|
||||
"'favorite'. Other column names are not valid filter columns and will "
|
||||
"cause a validation error."
|
||||
),
|
||||
)
|
||||
opr: ColumnOperatorEnum = Field(
|
||||
|
||||
@@ -85,8 +85,24 @@ async def list_dashboards(
|
||||
including title, slug, URL, and last modified time. Use select_columns to
|
||||
request additional fields.
|
||||
|
||||
Sortable columns for order_column: id, dashboard_title, slug, published,
|
||||
changed_on, created_on
|
||||
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
|
||||
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
|
||||
keyword arguments — they will be rejected. Use the ``request`` wrapper::
|
||||
|
||||
# Correct usage
|
||||
list_dashboards(request={"search": "sales", "page": 1, "page_size": 10})
|
||||
list_dashboards(request={"filters": [{"col": "dashboard_title", "opr": "sw", "value": "exec"}]})
|
||||
list_dashboards() # no arguments returns first page with defaults
|
||||
|
||||
# Wrong — causes pydantic validation errors
|
||||
list_dashboards(search="sales", page=1) # DO NOT DO THIS
|
||||
|
||||
Valid filter columns for ``filters[].col``:
|
||||
``dashboard_title``, ``published``, ``favorite``
|
||||
|
||||
Sortable columns for ``order_column``:
|
||||
``id``, ``dashboard_title``, ``slug``, ``published``,
|
||||
``changed_on``, ``created_on``
|
||||
"""
|
||||
request = request or _DEFAULT_LIST_DASHBOARDS_REQUEST.model_copy(deep=True)
|
||||
await ctx.info(
|
||||
|
||||
@@ -71,8 +71,11 @@ class DatasetFilter(ColumnOperator):
|
||||
"database_name",
|
||||
] = Field(
|
||||
...,
|
||||
description="Column to filter on. Use get_schema(model_type='dataset') for "
|
||||
"available filter columns.",
|
||||
description=(
|
||||
"Column to filter on. Valid values: 'table_name', 'schema', "
|
||||
"'database_name'. Other column names (e.g. 'created_by_fk', 'id') "
|
||||
"are not valid filter columns and will cause a validation error."
|
||||
),
|
||||
)
|
||||
opr: ColumnOperatorEnum = Field(
|
||||
...,
|
||||
|
||||
@@ -96,8 +96,23 @@ async def list_datasets(
|
||||
Returns dataset metadata including table name, schema, and last modified
|
||||
time.
|
||||
|
||||
Sortable columns for order_column: id, table_name, schema, changed_on,
|
||||
created_on
|
||||
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
|
||||
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
|
||||
keyword arguments — they will be rejected. Use the ``request`` wrapper::
|
||||
|
||||
# Correct usage
|
||||
list_datasets(request={"search": "sales", "page": 1, "page_size": 10})
|
||||
list_datasets(request={"filters": [{"col": "table_name", "opr": "sw", "value": "orders"}]})
|
||||
list_datasets() # no arguments returns first page with defaults
|
||||
|
||||
# Wrong — causes pydantic validation errors
|
||||
list_datasets(search="sales", page=1) # DO NOT DO THIS
|
||||
|
||||
Valid filter columns for ``filters[].col``:
|
||||
``table_name``, ``schema``, ``database_name``
|
||||
|
||||
Sortable columns for ``order_column``:
|
||||
``id``, ``table_name``, ``schema``, ``changed_on``, ``created_on``
|
||||
"""
|
||||
if ctx is None:
|
||||
raise RuntimeError("FastMCP context is required for list_datasets")
|
||||
|
||||
@@ -1349,7 +1349,8 @@ class TestDashboardSortableColumns:
|
||||
|
||||
# Check list_dashboards docstring for sortable columns documentation
|
||||
assert list_dashboards.__doc__ is not None
|
||||
assert "Sortable columns for order_column:" in list_dashboards.__doc__
|
||||
assert "Sortable columns for" in list_dashboards.__doc__
|
||||
assert "order_column" in list_dashboards.__doc__
|
||||
for col in SORTABLE_DASHBOARD_COLUMNS:
|
||||
assert col in list_dashboards.__doc__
|
||||
|
||||
|
||||
@@ -1700,7 +1700,8 @@ class TestDatasetSortableColumns:
|
||||
|
||||
# Check list_datasets docstring for sortable columns documentation
|
||||
assert list_datasets.__doc__ is not None
|
||||
assert "Sortable columns for order_column:" in list_datasets.__doc__
|
||||
assert "Sortable columns for" in list_datasets.__doc__
|
||||
assert "order_column" in list_datasets.__doc__
|
||||
for col in SORTABLE_DATASET_COLUMNS:
|
||||
assert col in list_datasets.__doc__
|
||||
|
||||
@@ -2080,3 +2081,90 @@ class TestListDatasetsOwnedByMe:
|
||||
request = ListDatasetsRequest(owned_by_me=True, created_by_me=True)
|
||||
assert request.owned_by_me is True
|
||||
assert request.created_by_me is True
|
||||
|
||||
|
||||
class TestListDatasetsRequestWrapper:
|
||||
"""
|
||||
Tests verifying that list_datasets requires a ``request`` wrapper object.
|
||||
|
||||
LLMs sometimes pass parameters like ``search``, ``page``, or ``page_size``
|
||||
as flat top-level kwargs instead of nesting them inside a ``request``
|
||||
object. These tests confirm the correct call shape through both the Pydantic
|
||||
schema and the actual MCP tool layer, and verify that invalid filter column
|
||||
names (e.g. ``created_by_fk``) are rejected.
|
||||
"""
|
||||
|
||||
def test_request_wrapper_with_search(self) -> None:
|
||||
"""Parameters passed inside request= are accepted by the schema."""
|
||||
request = ListDatasetsRequest(search="sales", page=1, page_size=10)
|
||||
assert request.search == "sales"
|
||||
assert request.page == 1
|
||||
assert request.page_size == 10
|
||||
|
||||
def test_request_wrapper_defaults(self) -> None:
|
||||
"""No-arg constructor produces valid schema defaults."""
|
||||
request = ListDatasetsRequest()
|
||||
assert request.search is None
|
||||
assert request.page == 1
|
||||
assert request.filters == []
|
||||
|
||||
def test_dataset_filter_valid_col(self) -> None:
|
||||
"""Valid col values are accepted by DatasetFilter."""
|
||||
for col in ("table_name", "schema", "database_name"):
|
||||
f = DatasetFilter(col=col, opr="sw", value="test")
|
||||
assert f.col == col
|
||||
|
||||
def test_dataset_filter_invalid_col_raises(self) -> None:
|
||||
"""Column names not in the Literal are rejected with a validation error.
|
||||
|
||||
This guards against LLMs passing ``created_by_fk`` or similar
|
||||
internal column names that are not exposed as filter fields.
|
||||
"""
|
||||
from pydantic import ValidationError
|
||||
|
||||
for bad_col in ("created_by_fk", "id", "database_id", "owner"):
|
||||
with pytest.raises(ValidationError):
|
||||
DatasetFilter(col=bad_col, opr="eq", value="1")
|
||||
|
||||
@patch("superset.daos.dataset.DatasetDAO.list")
|
||||
@pytest.mark.asyncio
|
||||
async def test_request_wrapper_enforced_by_tool(
|
||||
self, mock_list, mcp_server
|
||||
) -> None:
|
||||
"""The MCP tool layer accepts the request wrapper and returns results.
|
||||
|
||||
Verifies end-to-end that wrapping params in ``request={}`` works through
|
||||
the actual FastMCP tool call, not just schema validation.
|
||||
"""
|
||||
mock_list.return_value = ([], 0)
|
||||
async with Client(mcp_server) as client:
|
||||
result = await client.call_tool(
|
||||
"list_datasets",
|
||||
{"request": {"search": "sales", "page": 1, "page_size": 5}},
|
||||
)
|
||||
data = json.loads(result.content[0].text)
|
||||
assert data["count"] == 0
|
||||
assert data["datasets"] == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_flat_kwargs_rejected(self, mcp_server) -> None:
|
||||
"""Passing search/page/page_size as top-level kwargs raises a ToolError
|
||||
that specifically mentions the unexpected arguments.
|
||||
|
||||
This is the exact failure pattern from story #105712: LLMs call
|
||||
``list_datasets(search=..., page=..., page_size=...)`` instead of
|
||||
``list_datasets(request={...})``.
|
||||
"""
|
||||
with pytest.raises(ToolError) as exc_info:
|
||||
async with Client(mcp_server) as client:
|
||||
await client.call_tool(
|
||||
"list_datasets",
|
||||
{"search": "sales", "page": 1, "page_size": 10},
|
||||
)
|
||||
error_text = str(exc_info.value)
|
||||
# The error must call out the unexpected arguments, not some unrelated failure
|
||||
assert (
|
||||
"search" in error_text
|
||||
or "Unexpected" in error_text
|
||||
or "request" in error_text
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user