From 6ea9f2ade94aa5c46b3dbd4a54f9552dabe198f8 Mon Sep 17 00:00:00 2001
From: Kamil Gabryjelski <kamil.gabryjelski@gmail.com>
Date: Tue, 31 Mar 2026 19:31:02 +0200
Subject: [PATCH] chore(mcp): clarify saved metrics vs columns in MCP
 instructions (#38981)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 superset/mcp_service/app.py                   | 20 ++++++++++++++++++-
 superset/mcp_service/dataset/schemas.py       | 10 ++++++++--
 .../dataset/tool/get_dataset_info.py          |  5 +++++
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py
index 4b05b5d7184..086bc7f668b 100644
--- a/superset/mcp_service/app.py
+++ b/superset/mcp_service/app.py
@@ -88,11 +88,29 @@ Available Prompts:
 - quickstart: Interactive guide for getting started with the MCP service
 - create_chart_guided: Step-by-step chart creation wizard
 
+IMPORTANT - Using Saved Metrics vs Columns:
+When get_dataset_info returns a dataset, it includes both 'columns' and 'metrics'.
+- 'columns' are raw database columns (e.g., order_date, product_name, revenue)
+- 'metrics' are pre-defined saved metrics with SQL expressions
+  (e.g., count, total_revenue)
+
+When building chart configurations
+(generate_chart, generate_explore_link, update_chart):
+- For raw columns: use {{"name": "col_name", "aggregate": "SUM"}}
+- For saved metrics: use {{"name": "metric", "saved_metric": true}}
+  Do NOT add an aggregate when using saved_metric=true
+  (it's already defined in the metric).
+  Do NOT use a saved metric name as if it were a column — it will fail.
+
+Example: If get_dataset_info returns metrics=[{{"metric_name": "count", ...}}], use:
+  {{"name": "count", "saved_metric": true}}  ← CORRECT
+  {{"name": "count", "aggregate": "COUNT"}}  ← WRONG (count is not a column)
+
 Recommended Workflows:
 
 To create a chart:
 1. list_datasets -> find a dataset
-2. get_dataset_info(id) -> examine columns and metrics
+2. get_dataset_info(id) -> examine columns AND metrics (note which names are metrics!)
 3. generate_explore_link(dataset_id, config) -> preview interactively
 4. generate_chart(dataset_id, config, save_chart=True) -> save permanently
 
diff --git a/superset/mcp_service/dataset/schemas.py b/superset/mcp_service/dataset/schemas.py
index df5970ea061..5ae92cdd6fb 100644
--- a/superset/mcp_service/dataset/schemas.py
+++ b/superset/mcp_service/dataset/schemas.py
@@ -85,7 +85,11 @@ class TableColumnInfo(BaseModel):
 
 
 class SqlMetricInfo(BaseModel):
-    metric_name: str = Field(..., description="Metric name")
+    metric_name: str = Field(
+        ...,
+        description="Saved metric name. In chart configs, reference as "
+        '{"name": "<metric_name>", "saved_metric": true}.',
+    )
     verbose_name: str | None = Field(None, description="Verbose name")
     expression: str | None = Field(None, description="SQL expression")
     description: str | None = Field(None, description="Metric description")
@@ -134,7 +138,9 @@ class DatasetInfo(BaseModel):
         default_factory=list, description="Columns in the dataset"
     )
     metrics: List[SqlMetricInfo] = Field(
-        default_factory=list, description="Metrics in the dataset"
+        default_factory=list,
+        description="Saved metrics (pre-defined aggregations). "
+        "NOT columns — use saved_metric=true in chart configs.",
     )
     is_favorite: bool | None = Field(
         None, description="Whether this dataset is favorited by the current user"
diff --git a/superset/mcp_service/dataset/tool/get_dataset_info.py b/superset/mcp_service/dataset/tool/get_dataset_info.py
index ee74db8c1a0..c211c618d63 100644
--- a/superset/mcp_service/dataset/tool/get_dataset_info.py
+++ b/superset/mcp_service/dataset/tool/get_dataset_info.py
@@ -62,6 +62,11 @@ async def get_dataset_info(
     - DO NOT use schema.table_name format (e.g., "public.customers")
     - To find a dataset ID, use the list_datasets tool first
 
+    IMPORTANT - Saved Metrics vs Columns:
+    The response includes both 'columns' (raw database columns) and 'metrics'
+    (pre-defined saved metrics). When building chart configs, use saved_metric=true
+    for metrics — do not treat them as columns. See instructions for details.
+
     Example usage:
     ```json
     {