mirror of
https://github.com/apache/superset.git
synced 2026-07-01 12:25:32 +00:00
Compare commits
5 Commits
chore/ci/s
...
oss-40340
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9b6ff262fd | ||
|
|
576d40111b | ||
|
|
178fe56c9c | ||
|
|
de4da995b2 | ||
|
|
0f7f92011c |
@@ -130,20 +130,17 @@ Dashboard Management:
|
|||||||
- generate_dashboard: Create a dashboard from chart IDs (requires write access)
|
- generate_dashboard: Create a dashboard from chart IDs (requires write access)
|
||||||
- add_chart_to_existing_dashboard: Add a chart to an existing dashboard (requires write access)
|
- add_chart_to_existing_dashboard: Add a chart to an existing dashboard (requires write access)
|
||||||
|
|
||||||
Database Connections:
|
|
||||||
- list_databases: List database connections with advanced filters (1-based pagination)
|
|
||||||
- get_database_info: Get detailed database connection info by ID (backend, capabilities)
|
|
||||||
|
|
||||||
Dataset Management:
|
Dataset Management:
|
||||||
- list_datasets: List datasets with advanced filters (1-based pagination)
|
- list_datasets: List datasets with advanced filters (1-based pagination)
|
||||||
- get_dataset_info: Get detailed dataset information by ID (includes columns/metrics)
|
- get_dataset_info: Get detailed dataset information by ID (includes columns/metrics)
|
||||||
|
- create_dataset: Register a physical table as a dataset against an existing DB connection (requires write access)
|
||||||
- create_virtual_dataset: Save a SQL query as a virtual dataset for charting (requires write access)
|
- create_virtual_dataset: Save a SQL query as a virtual dataset for charting (requires write access)
|
||||||
- query_dataset: Query a dataset using its semantic layer (saved metrics, dimensions, filters) without needing a saved chart
|
- query_dataset: Query a dataset using its semantic layer (saved metrics, dimensions, filters) without needing a saved chart
|
||||||
|
|
||||||
Chart Management:
|
Chart Management:
|
||||||
- list_charts: List charts with advanced filters (1-based pagination)
|
- list_charts: List charts with advanced filters (1-based pagination)
|
||||||
- get_chart_info: Get detailed chart information by ID
|
- get_chart_info: Get detailed chart information by ID
|
||||||
- get_chart_preview: Get a visual preview of a chart as formatted content or URL
|
- get_chart_preview: Get a visual preview of a chart with image URL
|
||||||
- get_chart_data: Get underlying chart data in text-friendly format
|
- get_chart_data: Get underlying chart data in text-friendly format
|
||||||
- get_chart_sql: Get the rendered SQL query for a chart (without executing it)
|
- get_chart_sql: Get the rendered SQL query for a chart (without executing it)
|
||||||
- generate_chart: Create and save a new chart permanently (requires write access)
|
- generate_chart: Create and save a new chart permanently (requires write access)
|
||||||
@@ -163,30 +160,25 @@ System Information:
|
|||||||
- get_instance_info: Get instance-wide statistics, metadata, and current user identity
|
- get_instance_info: Get instance-wide statistics, metadata, and current user identity
|
||||||
- find_users: Resolve a person's name to user IDs for use as a filter value
|
- find_users: Resolve a person's name to user IDs for use as a filter value
|
||||||
- health_check: Simple health check tool (takes NO parameters, call without arguments)
|
- health_check: Simple health check tool (takes NO parameters, call without arguments)
|
||||||
- generate_bug_report: Build a PII-sanitized bug report to send to Preset support
|
|
||||||
(use when the user says the MCP is broken or asks how to report an issue)
|
|
||||||
|
|
||||||
Available Resources:
|
Available Resources:
|
||||||
- instance://metadata: Instance configuration, stats, and available dataset IDs
|
- instance/metadata: Access instance configuration and metadata
|
||||||
- chart://configs: Valid chart configuration examples and best practices
|
- chart/templates: Access chart configuration templates
|
||||||
|
|
||||||
Available Prompts:
|
Available Prompts:
|
||||||
- quickstart: Interactive guide for getting started with the MCP service
|
- quickstart: Interactive guide for getting started with the MCP service
|
||||||
- create_chart_guided: Step-by-step chart creation wizard
|
- create_chart_guided: Step-by-step chart creation wizard
|
||||||
|
|
||||||
IMPORTANT - Using Saved Metrics vs Columns:
|
Common Chart Types (viz_type) and Behaviors:
|
||||||
When get_dataset_info returns a dataset, it includes both 'columns' and 'metrics'.
|
|
||||||
- 'columns' are raw database columns (e.g., order_date, product_name, revenue)
|
|
||||||
- 'metrics' are pre-defined saved metrics with SQL expressions
|
|
||||||
(e.g., count, total_revenue)
|
|
||||||
|
|
||||||
When building chart configurations
|
Interactive Charts (support sorting, filtering, drill-down):
|
||||||
(generate_chart, generate_explore_link, update_chart):
|
- table: Standard table view with sorting and filtering
|
||||||
- For raw columns: use {{"name": "col_name", "aggregate": "SUM"}}
|
- pivot_table_v2: Pivot table with grouping and aggregations
|
||||||
- For saved metrics: use {{"name": "metric", "saved_metric": true}}
|
- echarts_timeseries_line: Time series line chart
|
||||||
Do NOT add an aggregate when using saved_metric=true
|
- echarts_timeseries_bar: Time series bar chart
|
||||||
(it's already defined in the metric).
|
- echarts_timeseries_area: Time series area chart
|
||||||
Do NOT use a saved metric name as if it were a column — it will fail.
|
- echarts_timeseries_scatter: Time series scatter plot
|
||||||
|
- mixed_timeseries: Combined line/bar time series
|
||||||
|
|
||||||
Example: If get_dataset_info returns metrics=[{{"metric_name": "count", ...}}], use:
|
Example: If get_dataset_info returns metrics=[{{"metric_name": "count", ...}}], use:
|
||||||
{{"name": "count", "saved_metric": true}} ← CORRECT
|
{{"name": "count", "saved_metric": true}} ← CORRECT
|
||||||
@@ -315,52 +307,11 @@ Chart Types in Existing Charts (viewable via list_charts/get_chart_info):
|
|||||||
- word_cloud, world_map, box_plot, bubble, mixed_timeseries
|
- word_cloud, world_map, box_plot, bubble, mixed_timeseries
|
||||||
|
|
||||||
Query Examples:
|
Query Examples:
|
||||||
- List all tables:
|
- List all interactive tables:
|
||||||
list_charts(request={{"filters": [{{"col": "viz_type",
|
filters=[{{"col": "viz_type", "opr": "in", "value": ["table", "pivot_table_v2"]}}]
|
||||||
"opr": "in",
|
|
||||||
"value": ["table", "pivot_table_v2"]}}]}})
|
|
||||||
- List time series charts:
|
- List time series charts:
|
||||||
list_charts(request={{"filters": [{{"col": "viz_type",
|
filters=[{{"col": "viz_type", "opr": "sw", "value": "echarts_timeseries"}}]
|
||||||
"opr": "sw", "value": "echarts_timeseries"}}]}})
|
- Search by name: search="sales"
|
||||||
- Search by name: list_charts(request={{"search": "sales"}})
|
|
||||||
- My charts: list_charts(request={{"created_by_me": true}})
|
|
||||||
- My dashboards: list_dashboards(request={{"created_by_me": true}})
|
|
||||||
- My databases: list_databases(request={{"created_by_me": true}})
|
|
||||||
To modify an existing chart (add filters, change metrics, etc.):
|
|
||||||
1. get_chart_info(request={{"identifier": <chart_id>}})
|
|
||||||
-> examine current configuration
|
|
||||||
2. update_chart(request={{
|
|
||||||
"identifier": <chart_id>, "config": {{...}}
|
|
||||||
}}) -> apply changes
|
|
||||||
Do NOT use execute_sql for chart modifications.
|
|
||||||
Use update_chart instead.
|
|
||||||
|
|
||||||
CRITICAL RULES - NEVER VIOLATE:
|
|
||||||
- NEVER fabricate or invent URLs. ALL URLs must come from tool call results.
|
|
||||||
If you need a link, call the appropriate tool (generate_explore_link, generate_chart,
|
|
||||||
open_sql_lab_with_context, etc.) and use the URL it returns.
|
|
||||||
- NEVER call generate_dashboard when the user wants to add a chart to an EXISTING
|
|
||||||
dashboard. Always use add_chart_to_existing_dashboard. Only call generate_dashboard
|
|
||||||
to create a brand-new dashboard, or after the user explicitly confirms they want
|
|
||||||
a new one (e.g., after a permission_denied=True response from
|
|
||||||
add_chart_to_existing_dashboard).
|
|
||||||
- To modify an existing chart's filters, metrics, or dimensions, use update_chart.
|
|
||||||
Do NOT use execute_sql for chart modifications.
|
|
||||||
- Parameter name reminders: ALWAYS use the EXACT parameter names from the tool schema.
|
|
||||||
Do NOT use Superset's internal form_data names.
|
|
||||||
|
|
||||||
IMPORTANT - Tool-Only Interaction:
|
|
||||||
- Do NOT generate code artifacts, HTML pages, JavaScript snippets, or any code intended
|
|
||||||
for the user to run. All visualization, data retrieval, and authentication are handled
|
|
||||||
by the provided MCP tools.
|
|
||||||
- Always call the appropriate tool directly instead of writing code. For example, use
|
|
||||||
generate_chart to create visualizations rather than generating plotting code.
|
|
||||||
- When a tool returns a URL (chart URL, dashboard URL, explore link, SQL Lab link),
|
|
||||||
return that URL to the user. Do NOT attempt to recreate the visualization in code.
|
|
||||||
- Do NOT generate HTML dashboards, embed scripts, or custom frontend code. Use
|
|
||||||
generate_dashboard and add_chart_to_existing_dashboard for dashboard operations.
|
|
||||||
- If a user asks for something the tools cannot do, explain the limitation and suggest
|
|
||||||
the closest available tool rather than generating code as a workaround.
|
|
||||||
|
|
||||||
General usage tips:
|
General usage tips:
|
||||||
- All listing tools use 1-based pagination (first page is 1)
|
- All listing tools use 1-based pagination (first page is 1)
|
||||||
@@ -368,7 +319,7 @@ General usage tips:
|
|||||||
- Use 'filters' parameter for advanced queries with filter columns from get_schema
|
- Use 'filters' parameter for advanced queries with filter columns from get_schema
|
||||||
- IDs can be integer or UUID format where supported
|
- IDs can be integer or UUID format where supported
|
||||||
- All tools return structured, Pydantic-typed responses
|
- All tools return structured, Pydantic-typed responses
|
||||||
- Chart previews can return ASCII text, Explore URLs, table data, or Vega-Lite specs
|
- Chart previews are served as PNG images via custom screenshot endpoints
|
||||||
|
|
||||||
Input format:
|
Input format:
|
||||||
- Tool request parameters accept structured objects (dicts/JSON)
|
- Tool request parameters accept structured objects (dicts/JSON)
|
||||||
@@ -377,10 +328,11 @@ Input format:
|
|||||||
{_feature_availability}Permission Awareness:
|
{_feature_availability}Permission Awareness:
|
||||||
{_instance_info_role_bullet}- ALWAYS check the user's roles BEFORE suggesting write operations (creating datasets,
|
{_instance_info_role_bullet}- ALWAYS check the user's roles BEFORE suggesting write operations (creating datasets,
|
||||||
charts, or dashboards). SQL execution is a separate permission — see execute_sql below.
|
charts, or dashboards). SQL execution is a separate permission — see execute_sql below.
|
||||||
- Write tools (generate_chart, generate_dashboard, update_chart, create_virtual_dataset,
|
- Write tools (generate_chart, generate_dashboard, update_chart, create_dataset,
|
||||||
save_sql_query, add_chart_to_existing_dashboard, update_chart_preview) require write
|
create_virtual_dataset, save_sql_query, add_chart_to_existing_dashboard,
|
||||||
permissions. These tools are only listed for users who have the necessary access.
|
update_chart_preview) require write permissions. These tools are only listed for
|
||||||
If a write tool does not appear in the tool list, the current user lacks write access.
|
users who have the necessary access. If a write tool does not appear in the tool
|
||||||
|
list, the current user lacks write access.
|
||||||
- execute_sql requires SQL Lab access (execute_sql_query permission), which is separate
|
- execute_sql requires SQL Lab access (execute_sql_query permission), which is separate
|
||||||
from write access. A user may have SQL Lab access without having write access to charts
|
from write access. A user may have SQL Lab access without having write access to charts
|
||||||
or dashboards, and vice versa.
|
or dashboards, and vice versa.
|
||||||
@@ -584,39 +536,13 @@ def create_mcp_app(
|
|||||||
|
|
||||||
|
|
||||||
# Create default MCP instance for backward compatibility
|
# Create default MCP instance for backward compatibility
|
||||||
|
# Tool modules can import this and use @mcp.tool decorators
|
||||||
mcp = create_mcp_app()
|
mcp = create_mcp_app()
|
||||||
|
|
||||||
# Initialize MCP dependency injection BEFORE importing tools/prompts
|
|
||||||
# This replaces the abstract @tool and @prompt decorators in superset_core.api.mcp
|
|
||||||
# with concrete implementations that can register with the mcp instance
|
|
||||||
from superset.core.mcp.core_mcp_injection import ( # noqa: E402
|
|
||||||
initialize_core_mcp_dependencies,
|
|
||||||
)
|
|
||||||
|
|
||||||
initialize_core_mcp_dependencies()
|
|
||||||
|
|
||||||
# Suppress known third-party deprecation warnings that leak to MCP clients.
|
|
||||||
# The MCP SDK captures Python warnings and forwards them to clients via
|
|
||||||
# server log entries, wasting LLM tokens and causing clients to act on
|
|
||||||
# irrelevant internal warnings. These warnings come from transitive imports
|
|
||||||
# triggered by tool/schema registration below.
|
|
||||||
import warnings # noqa: E402
|
|
||||||
|
|
||||||
warnings.filterwarnings(
|
|
||||||
"ignore",
|
|
||||||
category=DeprecationWarning,
|
|
||||||
module=r"marshmallow\..*",
|
|
||||||
)
|
|
||||||
warnings.filterwarnings(
|
|
||||||
"ignore",
|
|
||||||
category=FutureWarning,
|
|
||||||
module=r"google\..*",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Import all MCP tools to register them with the mcp instance
|
# Import all MCP tools to register them with the mcp instance
|
||||||
# NOTE: Always add new tool imports here when creating new MCP tools.
|
# NOTE: Always add new tool imports here when creating new MCP tools.
|
||||||
# Tools use the @tool decorator from `superset-core` and register automatically
|
# Tools use @mcp.tool decorators and register automatically on import.
|
||||||
# on import. Import prompts and resources to register them with the mcp instance
|
# Import prompts and resources to register them with the mcp instance
|
||||||
# NOTE: Always add new prompt/resource imports here when creating new prompts/resources.
|
# NOTE: Always add new prompt/resource imports here when creating new prompts/resources.
|
||||||
# Prompts use @mcp.prompt decorators and resources use @mcp.resource decorators.
|
# Prompts use @mcp.prompt decorators and resources use @mcp.resource decorators.
|
||||||
# They register automatically on import, similar to tools.
|
# They register automatically on import, similar to tools.
|
||||||
@@ -646,6 +572,7 @@ from superset.mcp_service.database.tool import ( # noqa: F401, E402
|
|||||||
list_databases,
|
list_databases,
|
||||||
)
|
)
|
||||||
from superset.mcp_service.dataset.tool import ( # noqa: F401, E402
|
from superset.mcp_service.dataset.tool import ( # noqa: F401, E402
|
||||||
|
create_dataset,
|
||||||
create_virtual_dataset,
|
create_virtual_dataset,
|
||||||
get_dataset_info,
|
get_dataset_info,
|
||||||
list_datasets,
|
list_datasets,
|
||||||
|
|||||||
@@ -324,6 +324,37 @@ class GetDatasetInfoRequest(MetadataCacheControl):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class CreateDatasetRequest(BaseModel):
|
||||||
|
"""Request schema for create_dataset to register a physical table as a dataset."""
|
||||||
|
|
||||||
|
database_id: Annotated[
|
||||||
|
int,
|
||||||
|
Field(
|
||||||
|
description="ID of the database connection to register the table against"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
schema: Annotated[
|
||||||
|
str | None,
|
||||||
|
Field(
|
||||||
|
default=None,
|
||||||
|
description="Schema (namespace) where the table lives, e.g. 'public'. "
|
||||||
|
"Optional: omit to use the database default schema.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
table_name: Annotated[
|
||||||
|
str,
|
||||||
|
Field(description="Name of the physical table to register as a dataset"),
|
||||||
|
]
|
||||||
|
owners: Annotated[
|
||||||
|
List[int] | None,
|
||||||
|
Field(
|
||||||
|
default=None,
|
||||||
|
description="Optional list of owner user IDs. "
|
||||||
|
"Defaults to the calling user.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class CreateVirtualDatasetRequest(BaseModel):
|
class CreateVirtualDatasetRequest(BaseModel):
|
||||||
"""Request schema for create_virtual_dataset."""
|
"""Request schema for create_virtual_dataset."""
|
||||||
|
|
||||||
|
|||||||
@@ -15,14 +15,16 @@
|
|||||||
# specific language governing permissions and limitations
|
# specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
|
from .create_dataset import create_dataset
|
||||||
from .create_virtual_dataset import create_virtual_dataset
|
from .create_virtual_dataset import create_virtual_dataset
|
||||||
from .get_dataset_info import get_dataset_info
|
from .get_dataset_info import get_dataset_info
|
||||||
from .list_datasets import list_datasets
|
from .list_datasets import list_datasets
|
||||||
from .query_dataset import query_dataset
|
from .query_dataset import query_dataset
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
"create_dataset",
|
||||||
"create_virtual_dataset",
|
"create_virtual_dataset",
|
||||||
"list_datasets",
|
|
||||||
"get_dataset_info",
|
"get_dataset_info",
|
||||||
|
"list_datasets",
|
||||||
"query_dataset",
|
"query_dataset",
|
||||||
]
|
]
|
||||||
|
|||||||
142
superset/mcp_service/dataset/tool/create_dataset.py
Normal file
142
superset/mcp_service/dataset/tool/create_dataset.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Create dataset FastMCP tool
|
||||||
|
|
||||||
|
Registers a physical table as a Superset dataset against an existing
|
||||||
|
database connection — the programmatic equivalent of Data → Datasets → +Dataset.
|
||||||
|
Returns the same DatasetInfo shape as get_dataset_info so the caller can feed
|
||||||
|
the resulting dataset_id directly into generate_chart.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from fastmcp import Context
|
||||||
|
from superset_core.mcp.decorators import tool, ToolAnnotations
|
||||||
|
|
||||||
|
from superset.extensions import event_logger
|
||||||
|
from superset.mcp_service.dataset.schemas import (
|
||||||
|
CreateDatasetRequest,
|
||||||
|
DatasetError,
|
||||||
|
DatasetInfo,
|
||||||
|
serialize_dataset_object,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
tags=["mutate"],
|
||||||
|
class_permission_name="Dataset",
|
||||||
|
method_permission_name="write",
|
||||||
|
annotations=ToolAnnotations(
|
||||||
|
title="Create dataset",
|
||||||
|
readOnlyHint=False,
|
||||||
|
destructiveHint=False,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
async def create_dataset(
|
||||||
|
request: CreateDatasetRequest, ctx: Context
|
||||||
|
) -> DatasetInfo | DatasetError:
|
||||||
|
"""Register a physical table as a Superset dataset.
|
||||||
|
|
||||||
|
Wraps POST /api/v1/dataset/ — the same endpoint the UI uses when you click
|
||||||
|
Data → Datasets → +Dataset. Returns full dataset metadata (same shape as
|
||||||
|
get_dataset_info) so you can pass the resulting dataset_id straight into
|
||||||
|
generate_chart.
|
||||||
|
|
||||||
|
Required fields:
|
||||||
|
- database_id: ID of the existing database connection
|
||||||
|
- table_name: Exact name of the physical table to register
|
||||||
|
|
||||||
|
Optional fields:
|
||||||
|
- schema: Schema/namespace where the table lives (e.g. "public")
|
||||||
|
- owners: List of user IDs to set as owners (defaults to calling user)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"database_id": 1,
|
||||||
|
"schema": "public",
|
||||||
|
"table_name": "orders"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns DatasetInfo on success or DatasetError on failure.
|
||||||
|
Use list_databases to find the correct database_id.
|
||||||
|
"""
|
||||||
|
await ctx.info(
|
||||||
|
"Creating dataset: database_id=%s, schema=%r, table_name=%r"
|
||||||
|
% (request.database_id, request.schema, request.table_name)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
from superset.commands.dataset.create import CreateDatasetCommand
|
||||||
|
from superset.commands.dataset.exceptions import (
|
||||||
|
DatasetCreateFailedError,
|
||||||
|
DatasetExistsValidationError,
|
||||||
|
DatasetInvalidError,
|
||||||
|
TableNotFoundValidationError,
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset_properties: dict[str, Any] = {
|
||||||
|
"database": request.database_id,
|
||||||
|
"schema": request.schema,
|
||||||
|
"table_name": request.table_name,
|
||||||
|
}
|
||||||
|
if request.owners is not None:
|
||||||
|
dataset_properties["owners"] = request.owners
|
||||||
|
|
||||||
|
with event_logger.log_context(action="mcp.create_dataset"):
|
||||||
|
command = CreateDatasetCommand(dataset_properties)
|
||||||
|
dataset = command.run()
|
||||||
|
|
||||||
|
result = serialize_dataset_object(dataset)
|
||||||
|
if result is None:
|
||||||
|
return DatasetError.create(
|
||||||
|
error="Dataset was created but could not be serialized",
|
||||||
|
error_type="SerializationError",
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Created dataset id=%s table=%s.%s",
|
||||||
|
dataset.id,
|
||||||
|
request.schema,
|
||||||
|
request.table_name,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except DatasetExistsValidationError as e:
|
||||||
|
await ctx.error("Dataset already exists: %s" % (str(e),))
|
||||||
|
return DatasetError.create(error=str(e), error_type="DatasetExistsError")
|
||||||
|
except TableNotFoundValidationError as e:
|
||||||
|
await ctx.error("Table not found: %s" % (str(e),))
|
||||||
|
return DatasetError.create(error=str(e), error_type="TableNotFoundError")
|
||||||
|
except DatasetInvalidError as e:
|
||||||
|
await ctx.error("Dataset validation failed: %s" % (str(e),))
|
||||||
|
return DatasetError.create(error=str(e), error_type="ValidationError")
|
||||||
|
except DatasetCreateFailedError as e:
|
||||||
|
await ctx.error("Dataset creation failed: %s" % (str(e),))
|
||||||
|
return DatasetError.create(error=str(e), error_type="CreateFailedError")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to create dataset: %s", e, exc_info=True)
|
||||||
|
await ctx.error("Unexpected error: %s: %s" % (type(e).__name__, str(e)))
|
||||||
|
return DatasetError.create(
|
||||||
|
error=f"Failed to create dataset: {str(e)}",
|
||||||
|
error_type="InternalError",
|
||||||
|
)
|
||||||
330
tests/unit_tests/mcp_service/dataset/tool/test_create_dataset.py
Normal file
330
tests/unit_tests/mcp_service/dataset/tool/test_create_dataset.py
Normal file
@@ -0,0 +1,330 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
"""Unit tests for create_dataset MCP tool."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastmcp import Client
|
||||||
|
from fastmcp.exceptions import ToolError
|
||||||
|
|
||||||
|
from superset.mcp_service.app import mcp
|
||||||
|
from superset.utils import json
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_mock_dataset(
|
||||||
|
dataset_id: int = 42,
|
||||||
|
table_name: str = "orders",
|
||||||
|
schema: str = "public",
|
||||||
|
database_name: str = "main_db",
|
||||||
|
) -> MagicMock:
|
||||||
|
dataset = MagicMock()
|
||||||
|
dataset.id = dataset_id
|
||||||
|
dataset.table_name = table_name
|
||||||
|
dataset.schema = schema
|
||||||
|
dataset.description = None
|
||||||
|
dataset.changed_by_name = "admin"
|
||||||
|
dataset.changed_on = None
|
||||||
|
dataset.changed_on_humanized = None
|
||||||
|
dataset.created_by_name = "admin"
|
||||||
|
dataset.created_on = None
|
||||||
|
dataset.created_on_humanized = None
|
||||||
|
dataset.tags = []
|
||||||
|
dataset.owners = []
|
||||||
|
dataset.is_virtual = False
|
||||||
|
dataset.database_id = 1
|
||||||
|
dataset.certified_by = None
|
||||||
|
dataset.certification_details = None
|
||||||
|
dataset.schema_perm = f"[{database_name}].[{schema}]"
|
||||||
|
dataset.url = f"/tablemodelview/edit/{dataset_id}"
|
||||||
|
dataset.database = MagicMock()
|
||||||
|
dataset.database.database_name = database_name
|
||||||
|
dataset.sql = None
|
||||||
|
dataset.main_dttm_col = None
|
||||||
|
dataset.offset = 0
|
||||||
|
dataset.cache_timeout = 0
|
||||||
|
dataset.params = {}
|
||||||
|
dataset.template_params = {}
|
||||||
|
dataset.extra = {}
|
||||||
|
dataset.uuid = f"dataset-uuid-{dataset_id}"
|
||||||
|
dataset.columns = []
|
||||||
|
dataset.metrics = []
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mcp_server():
|
||||||
|
return mcp
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def mock_auth():
|
||||||
|
with patch("superset.mcp_service.auth.get_user_from_request") as mock_get_user:
|
||||||
|
mock_user = Mock()
|
||||||
|
mock_user.id = 1
|
||||||
|
mock_user.username = "admin"
|
||||||
|
mock_get_user.return_value = mock_user
|
||||||
|
yield mock_get_user
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateDataset:
|
||||||
|
"""Tests for the create_dataset MCP tool."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_success(self, mcp_server):
|
||||||
|
"""Happy path: tool creates dataset and returns DatasetInfo."""
|
||||||
|
mock_dataset = _make_mock_dataset()
|
||||||
|
mock_command = MagicMock()
|
||||||
|
mock_command.run.return_value = mock_dataset
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"superset.commands.dataset.create.CreateDatasetCommand",
|
||||||
|
return_value=mock_command,
|
||||||
|
) as mock_command_class,
|
||||||
|
patch(
|
||||||
|
"superset.mcp_service.utils.url_utils.get_superset_base_url",
|
||||||
|
return_value="http://localhost:8088",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
result = await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"database_id": 1,
|
||||||
|
"schema": "public",
|
||||||
|
"table_name": "orders",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.content is not None
|
||||||
|
data = json.loads(result.content[0].text)
|
||||||
|
assert data["id"] == 42
|
||||||
|
assert data["table_name"] == "orders"
|
||||||
|
assert data["schema"] == "public"
|
||||||
|
|
||||||
|
# Verify the command was called with the right properties
|
||||||
|
call_kwargs = mock_command_class.call_args[0][0]
|
||||||
|
assert call_kwargs["database"] == 1
|
||||||
|
assert call_kwargs["schema"] == "public"
|
||||||
|
assert call_kwargs["table_name"] == "orders"
|
||||||
|
assert "owners" not in call_kwargs
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_with_owners(self, mcp_server):
|
||||||
|
"""Owners list is forwarded to the command when supplied."""
|
||||||
|
mock_dataset = _make_mock_dataset()
|
||||||
|
mock_command = MagicMock()
|
||||||
|
mock_command.run.return_value = mock_dataset
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"superset.commands.dataset.create.CreateDatasetCommand",
|
||||||
|
return_value=mock_command,
|
||||||
|
) as mock_command_class,
|
||||||
|
patch(
|
||||||
|
"superset.mcp_service.utils.url_utils.get_superset_base_url",
|
||||||
|
return_value="http://localhost:8088",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
result = await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"database_id": 2,
|
||||||
|
"schema": "sales",
|
||||||
|
"table_name": "transactions",
|
||||||
|
"owners": [5, 10],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
data = json.loads(result.content[0].text)
|
||||||
|
assert data["id"] == 42
|
||||||
|
|
||||||
|
call_kwargs = mock_command_class.call_args[0][0]
|
||||||
|
assert call_kwargs["owners"] == [5, 10]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_already_exists(self, mcp_server):
|
||||||
|
"""Returns DatasetError when a dataset for the table already exists."""
|
||||||
|
from superset.commands.dataset.exceptions import DatasetExistsValidationError
|
||||||
|
from superset.sql.parse import Table
|
||||||
|
|
||||||
|
mock_command = MagicMock()
|
||||||
|
mock_command.run.side_effect = DatasetExistsValidationError(
|
||||||
|
Table("orders", "public", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"superset.commands.dataset.create.CreateDatasetCommand",
|
||||||
|
return_value=mock_command,
|
||||||
|
):
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
result = await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"database_id": 1,
|
||||||
|
"schema": "public",
|
||||||
|
"table_name": "orders",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
data = json.loads(result.content[0].text)
|
||||||
|
assert data["error_type"] == "DatasetExistsError"
|
||||||
|
assert "error" in data
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_table_not_found(self, mcp_server):
|
||||||
|
"""Returns DatasetError when the physical table does not exist in the DB."""
|
||||||
|
from superset.commands.dataset.exceptions import TableNotFoundValidationError
|
||||||
|
from superset.sql.parse import Table
|
||||||
|
|
||||||
|
mock_command = MagicMock()
|
||||||
|
mock_command.run.side_effect = TableNotFoundValidationError(
|
||||||
|
Table("missing_table", "public", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"superset.commands.dataset.create.CreateDatasetCommand",
|
||||||
|
return_value=mock_command,
|
||||||
|
):
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
result = await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"database_id": 1,
|
||||||
|
"schema": "public",
|
||||||
|
"table_name": "missing_table",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
data = json.loads(result.content[0].text)
|
||||||
|
assert data["error_type"] == "TableNotFoundError"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_unexpected_error(self, mcp_server):
|
||||||
|
"""Unexpected exceptions are caught and returned as InternalError."""
|
||||||
|
mock_command = MagicMock()
|
||||||
|
mock_command.run.side_effect = RuntimeError("DB connection lost")
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"superset.commands.dataset.create.CreateDatasetCommand",
|
||||||
|
return_value=mock_command,
|
||||||
|
):
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
result = await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"database_id": 1,
|
||||||
|
"schema": "public",
|
||||||
|
"table_name": "orders",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
data = json.loads(result.content[0].text)
|
||||||
|
assert data["error_type"] == "InternalError"
|
||||||
|
assert "DB connection lost" in data["error"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_missing_required_fields(self, mcp_server):
|
||||||
|
"""Missing required fields raise a validation error before the tool runs."""
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
with pytest.raises(ToolError):
|
||||||
|
await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
# database_id and table_name are omitted intentionally
|
||||||
|
"schema": "public",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_dataset_returns_full_dataset_info(self, mcp_server):
|
||||||
|
"""The returned DatasetInfo includes columns, metrics, and all core fields."""
|
||||||
|
mock_dataset = _make_mock_dataset(
|
||||||
|
dataset_id=99, table_name="sales", schema="dw"
|
||||||
|
)
|
||||||
|
|
||||||
|
col = MagicMock()
|
||||||
|
col.column_name = "amount"
|
||||||
|
col.verbose_name = "Amount"
|
||||||
|
col.type = "NUMERIC"
|
||||||
|
col.is_dttm = False
|
||||||
|
col.groupby = True
|
||||||
|
col.filterable = True
|
||||||
|
col.description = "Sale amount"
|
||||||
|
mock_dataset.columns = [col]
|
||||||
|
|
||||||
|
metric = MagicMock()
|
||||||
|
metric.metric_name = "total_sales"
|
||||||
|
metric.verbose_name = "Total Sales"
|
||||||
|
metric.expression = "SUM(amount)"
|
||||||
|
metric.description = "Sum of amounts"
|
||||||
|
metric.d3format = None
|
||||||
|
mock_dataset.metrics = [metric]
|
||||||
|
|
||||||
|
mock_command = MagicMock()
|
||||||
|
mock_command.run.return_value = mock_dataset
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"superset.commands.dataset.create.CreateDatasetCommand",
|
||||||
|
return_value=mock_command,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"superset.mcp_service.utils.url_utils.get_superset_base_url",
|
||||||
|
return_value="http://localhost:8088",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
async with Client(mcp_server) as client:
|
||||||
|
result = await client.call_tool(
|
||||||
|
"create_dataset",
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"database_id": 1,
|
||||||
|
"schema": "dw",
|
||||||
|
"table_name": "sales",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
data = json.loads(result.content[0].text)
|
||||||
|
assert data["id"] == 99
|
||||||
|
assert data["table_name"] == "sales"
|
||||||
|
assert data["schema"] == "dw"
|
||||||
|
assert data["is_virtual"] is False
|
||||||
|
assert len(data["columns"]) == 1
|
||||||
|
assert data["columns"][0]["column_name"] == "amount"
|
||||||
|
assert len(data["metrics"]) == 1
|
||||||
|
assert data["metrics"][0]["metric_name"] == "total_sales"
|
||||||
Reference in New Issue
Block a user