From 6ea9f2ade94aa5c46b3dbd4a54f9552dabe198f8 Mon Sep 17 00:00:00 2001 From: Kamil Gabryjelski Date: Tue, 31 Mar 2026 19:31:02 +0200 Subject: [PATCH] chore(mcp): clarify saved metrics vs columns in MCP instructions (#38981) Co-authored-by: Claude Opus 4.6 (1M context) --- superset/mcp_service/app.py | 20 ++++++++++++++++++- superset/mcp_service/dataset/schemas.py | 10 ++++++++-- .../dataset/tool/get_dataset_info.py | 5 +++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py index 4b05b5d7184..086bc7f668b 100644 --- a/superset/mcp_service/app.py +++ b/superset/mcp_service/app.py @@ -88,11 +88,29 @@ Available Prompts: - quickstart: Interactive guide for getting started with the MCP service - create_chart_guided: Step-by-step chart creation wizard +IMPORTANT - Using Saved Metrics vs Columns: +When get_dataset_info returns a dataset, it includes both 'columns' and 'metrics'. +- 'columns' are raw database columns (e.g., order_date, product_name, revenue) +- 'metrics' are pre-defined saved metrics with SQL expressions + (e.g., count, total_revenue) + +When building chart configurations +(generate_chart, generate_explore_link, update_chart): +- For raw columns: use {{"name": "col_name", "aggregate": "SUM"}} +- For saved metrics: use {{"name": "metric", "saved_metric": true}} + Do NOT add an aggregate when using saved_metric=true + (it's already defined in the metric). + Do NOT use a saved metric name as if it were a column — it will fail. + +Example: If get_dataset_info returns metrics=[{{"metric_name": "count", ...}}], use: + {{"name": "count", "saved_metric": true}} ← CORRECT + {{"name": "count", "aggregate": "COUNT"}} ← WRONG (count is not a column) + Recommended Workflows: To create a chart: 1. list_datasets -> find a dataset -2. get_dataset_info(id) -> examine columns and metrics +2. get_dataset_info(id) -> examine columns AND metrics (note which names are metrics!) 3. generate_explore_link(dataset_id, config) -> preview interactively 4. generate_chart(dataset_id, config, save_chart=True) -> save permanently diff --git a/superset/mcp_service/dataset/schemas.py b/superset/mcp_service/dataset/schemas.py index df5970ea061..5ae92cdd6fb 100644 --- a/superset/mcp_service/dataset/schemas.py +++ b/superset/mcp_service/dataset/schemas.py @@ -85,7 +85,11 @@ class TableColumnInfo(BaseModel): class SqlMetricInfo(BaseModel): - metric_name: str = Field(..., description="Metric name") + metric_name: str = Field( + ..., + description="Saved metric name. In chart configs, reference as " + '{"name": "", "saved_metric": true}.', + ) verbose_name: str | None = Field(None, description="Verbose name") expression: str | None = Field(None, description="SQL expression") description: str | None = Field(None, description="Metric description") @@ -134,7 +138,9 @@ class DatasetInfo(BaseModel): default_factory=list, description="Columns in the dataset" ) metrics: List[SqlMetricInfo] = Field( - default_factory=list, description="Metrics in the dataset" + default_factory=list, + description="Saved metrics (pre-defined aggregations). " + "NOT columns — use saved_metric=true in chart configs.", ) is_favorite: bool | None = Field( None, description="Whether this dataset is favorited by the current user" diff --git a/superset/mcp_service/dataset/tool/get_dataset_info.py b/superset/mcp_service/dataset/tool/get_dataset_info.py index ee74db8c1a0..c211c618d63 100644 --- a/superset/mcp_service/dataset/tool/get_dataset_info.py +++ b/superset/mcp_service/dataset/tool/get_dataset_info.py @@ -62,6 +62,11 @@ async def get_dataset_info( - DO NOT use schema.table_name format (e.g., "public.customers") - To find a dataset ID, use the list_datasets tool first + IMPORTANT - Saved Metrics vs Columns: + The response includes both 'columns' (raw database columns) and 'metrics' + (pre-defined saved metrics). When building chart configs, use saved_metric=true + for metrics — do not treat them as columns. See instructions for details. + Example usage: ```json {