diff --git a/.devcontainer/README.md b/.devcontainer/README.md
index e5dda78fe30..6b24183edc5 100644
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -3,14 +3,3 @@
For complete documentation on using GitHub Codespaces with Apache Superset, please see:
**[Setting up a Development Environment - GitHub Codespaces](https://superset.apache.org/docs/contributing/development#github-codespaces-cloud-development)**
-
-## Pre-installed Development Environment
-
-When you create a new Codespace from this repository, it automatically:
-
-1. **Creates a Python virtual environment** using `uv venv`
-2. **Installs all development dependencies** via `uv pip install -r requirements/development.txt`
-3. **Sets up pre-commit hooks** with `pre-commit install`
-4. **Activates the virtual environment** automatically in all terminals
-
-The virtual environment is located at `/workspaces/{repository-name}/.venv` and is automatically activated through environment variables set in the devcontainer configuration.
diff --git a/.devcontainer/default/devcontainer.json b/.devcontainer/default/devcontainer.json
new file mode 100644
index 00000000000..d0988367947
--- /dev/null
+++ b/.devcontainer/default/devcontainer.json
@@ -0,0 +1,19 @@
+{
+ // Extend the base configuration
+ "extends": "../devcontainer-base.json",
+
+ "name": "Apache Superset Development (Default)",
+
+ // Forward ports for development
+ "forwardPorts": [9001],
+ "portsAttributes": {
+ "9001": {
+ "label": "Superset (via Webpack Dev Server)",
+ "onAutoForward": "notify",
+ "visibility": "public"
+ }
+ },
+
+ // Auto-start Superset on Codespace resume
+ "postStartCommand": ".devcontainer/start-superset.sh"
+}
diff --git a/.devcontainer/devcontainer-base.json b/.devcontainer/devcontainer-base.json
new file mode 100644
index 00000000000..59ed6ee1d2f
--- /dev/null
+++ b/.devcontainer/devcontainer-base.json
@@ -0,0 +1,39 @@
+{
+ "name": "Apache Superset Development",
+ // Keep this in sync with the base image in Dockerfile (ARG PY_VER)
+ // Using the same base as Dockerfile, but non-slim for dev tools
+ "image": "python:3.11.13-bookworm",
+
+ "features": {
+ "ghcr.io/devcontainers/features/docker-in-docker:2": {
+ "moby": true,
+ "dockerDashComposeVersion": "v2"
+ },
+ "ghcr.io/devcontainers/features/node:1": {
+ "version": "20"
+ },
+ "ghcr.io/devcontainers/features/git:1": {},
+ "ghcr.io/devcontainers/features/common-utils:2": {
+ "configureZshAsDefaultShell": true
+ },
+ "ghcr.io/devcontainers/features/sshd:1": {
+ "version": "latest"
+ }
+ },
+
+ // Run commands after container is created
+ "postCreateCommand": "chmod +x .devcontainer/setup-dev.sh && .devcontainer/setup-dev.sh",
+
+ // VS Code customizations
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.python",
+ "ms-python.vscode-pylance",
+ "charliermarsh.ruff",
+ "dbaeumer.vscode-eslint",
+ "esbenp.prettier-vscode"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/setup-dev.sh b/.devcontainer/setup-dev.sh
index 91482551bee..f8521189009 100755
--- a/.devcontainer/setup-dev.sh
+++ b/.devcontainer/setup-dev.sh
@@ -3,76 +3,30 @@
echo "๐ง Setting up Superset development environment..."
-# System dependencies and uv are now pre-installed in the Docker image
-# This speeds up Codespace creation significantly!
+# The universal image has most tools, just need Superset-specific libs
+echo "๐ฆ Installing Superset-specific dependencies..."
+sudo apt-get update
+sudo apt-get install -y \
+ libsasl2-dev \
+ libldap2-dev \
+ libpq-dev \
+ tmux \
+ gh
-# Create virtual environment using uv
-echo "๐ Creating Python virtual environment..."
-if ! uv venv; then
- echo "โ Failed to create virtual environment"
- exit 1
-fi
+# Install uv for fast Python package management
+echo "๐ฆ Installing uv..."
+curl -LsSf https://astral.sh/uv/install.sh | sh
-# Install Python dependencies
-echo "๐ฆ Installing Python dependencies..."
-if ! uv pip install -r requirements/development.txt; then
- echo "โ Failed to install Python dependencies"
- echo "๐ก You may need to run this manually after the Codespace starts"
- exit 1
-fi
-
-# Install pre-commit hooks
-echo "๐ช Installing pre-commit hooks..."
-if source .venv/bin/activate && pre-commit install; then
- echo "โ
Pre-commit hooks installed"
-else
- echo "โ ๏ธ Pre-commit hooks installation failed (non-critical)"
-fi
+# Add cargo/bin to PATH for uv
+echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> ~/.bashrc
+echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> ~/.zshrc
# Install Claude Code CLI via npm
echo "๐ค Installing Claude Code..."
-if npm install -g @anthropic-ai/claude-code; then
- echo "โ
Claude Code installed"
-else
- echo "โ ๏ธ Claude Code installation failed (non-critical)"
-fi
+npm install -g @anthropic-ai/claude-code
# Make the start script executable
chmod +x .devcontainer/start-superset.sh
-# Add bashrc additions for automatic venv activation
-echo "๐ง Setting up automatic environment activation..."
-if [ -f ~/.bashrc ]; then
- # Check if we've already added our additions
- if ! grep -q "Superset Codespaces environment setup" ~/.bashrc; then
- echo "" >> ~/.bashrc
- cat .devcontainer/bashrc-additions >> ~/.bashrc
- echo "โ
Added automatic venv activation to ~/.bashrc"
- else
- echo "โ
Bashrc additions already present"
- fi
-else
- # Create bashrc if it doesn't exist
- cat .devcontainer/bashrc-additions > ~/.bashrc
- echo "โ
Created ~/.bashrc with automatic venv activation"
-fi
-
-# Also add to zshrc since that's the default shell
-if [ -f ~/.zshrc ] || [ -n "$ZSH_VERSION" ]; then
- if ! grep -q "Superset Codespaces environment setup" ~/.zshrc; then
- echo "" >> ~/.zshrc
- cat .devcontainer/bashrc-additions >> ~/.zshrc
- echo "โ
Added automatic venv activation to ~/.zshrc"
- fi
-fi
-
echo "โ
Development environment setup complete!"
-echo ""
-echo "๐ The virtual environment will be automatically activated in new terminals"
-echo ""
-echo "๐ To activate in this terminal, run:"
-echo " source ~/.bashrc"
-echo ""
-echo "๐ To start Superset:"
-echo " start-superset"
-echo ""
+echo "๐ Run '.devcontainer/start-superset.sh' to start Superset"
diff --git a/.devcontainer/start-superset.sh b/.devcontainer/start-superset.sh
index 6ba990cae10..b480b04aacb 100755
--- a/.devcontainer/start-superset.sh
+++ b/.devcontainer/start-superset.sh
@@ -1,14 +1,14 @@
#!/bin/bash
# Startup script for Superset in Codespaces
-# Log to a file for debugging
-LOG_FILE="/tmp/superset-startup.log"
-echo "[$(date)] Starting Superset startup script" >> "$LOG_FILE"
-echo "[$(date)] User: $(whoami), PWD: $(pwd)" >> "$LOG_FILE"
-
echo "๐ Starting Superset in Codespaces..."
echo "๐ Frontend will be available at port 9001"
+# Check if MCP is enabled
+if [ "$ENABLE_MCP" = "true" ]; then
+ echo "๐ค MCP Service will be available at port 5008"
+fi
+
# Find the workspace directory (Codespaces clones as 'superset', not 'superset-2')
WORKSPACE_DIR=$(find /workspaces -maxdepth 1 -name "superset*" -type d | head -1)
if [ -n "$WORKSPACE_DIR" ]; then
@@ -18,71 +18,32 @@ else
echo "๐ Using current directory: $(pwd)"
fi
-# Wait for Docker to be available
-echo "โณ Waiting for Docker to start..."
-echo "[$(date)] Waiting for Docker..." >> "$LOG_FILE"
-max_attempts=30
-attempt=0
-while ! docker info > /dev/null 2>&1; do
- if [ $attempt -eq $max_attempts ]; then
- echo "โ Docker failed to start after $max_attempts attempts"
- echo "[$(date)] Docker failed to start after $max_attempts attempts" >> "$LOG_FILE"
- echo "๐ Please restart the Codespace or run this script manually later"
- exit 1
- fi
- echo " Attempt $((attempt + 1))/$max_attempts..."
- echo "[$(date)] Docker check attempt $((attempt + 1))/$max_attempts" >> "$LOG_FILE"
- sleep 2
- attempt=$((attempt + 1))
-done
-echo "โ
Docker is ready!"
-echo "[$(date)] Docker is ready" >> "$LOG_FILE"
-
-# Check if Superset containers are already running
-if docker ps | grep -q "superset"; then
- echo "โ
Superset containers are already running!"
- echo ""
- echo "๐ To access Superset:"
- echo " 1. Click the 'Ports' tab at the bottom of VS Code"
- echo " 2. Find port 9001 and click the globe icon to open"
- echo " 3. Wait 10-20 minutes for initial startup"
- echo ""
- echo "๐ Login credentials: admin/admin"
- exit 0
+# Check if docker is running
+if ! docker info > /dev/null 2>&1; then
+ echo "โณ Waiting for Docker to start..."
+ sleep 5
fi
# Clean up any existing containers
echo "๐งน Cleaning up existing containers..."
-docker-compose -f docker-compose-light.yml down
+docker-compose -f docker-compose-light.yml --profile mcp down
# Start services
-echo "๐๏ธ Starting Superset in background (daemon mode)..."
+echo "๐๏ธ Building and starting services..."
echo ""
+echo "๐ Once started, login with:"
+echo " Username: admin"
+echo " Password: admin"
+echo ""
+echo "๐ Running in foreground with live logs (Ctrl+C to stop)..."
-# Start in detached mode
-docker-compose -f docker-compose-light.yml up -d
-
-echo ""
-echo "โ
Docker Compose started successfully!"
-echo ""
-echo "๐ Important information:"
-echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
-echo "โฑ๏ธ Initial startup takes 10-20 minutes"
-echo "๐ Check the 'Ports' tab for your Superset URL (port 9001)"
-echo "๐ค Login: admin / admin"
-echo ""
-echo "๐ Useful commands:"
-echo " docker-compose -f docker-compose-light.yml logs -f # Follow logs"
-echo " docker-compose -f docker-compose-light.yml ps # Check status"
-echo " docker-compose -f docker-compose-light.yml down # Stop services"
-echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
-echo ""
-echo "๐ค Keeping terminal open for 60 seconds to test persistence..."
-sleep 60
-echo "โ
Test complete - check if this terminal is still visible!"
-
-# Show final status
-docker-compose -f docker-compose-light.yml ps
+# Run docker-compose and capture exit code
+if [ "$ENABLE_MCP" = "true" ]; then
+ echo "๐ค Starting with MCP Service enabled..."
+ docker-compose -f docker-compose-light.yml --profile mcp up
+else
+ docker-compose -f docker-compose-light.yml up
+fi
EXIT_CODE=$?
# If it failed, provide helpful instructions
diff --git a/.devcontainer/with-mcp/devcontainer.json b/.devcontainer/with-mcp/devcontainer.json
new file mode 100644
index 00000000000..c3f8b654ebc
--- /dev/null
+++ b/.devcontainer/with-mcp/devcontainer.json
@@ -0,0 +1,29 @@
+{
+ // Extend the base configuration
+ "extends": "../devcontainer-base.json",
+
+ "name": "Apache Superset Development with MCP",
+
+ // Forward ports for development
+ "forwardPorts": [9001, 5008],
+ "portsAttributes": {
+ "9001": {
+ "label": "Superset (via Webpack Dev Server)",
+ "onAutoForward": "notify",
+ "visibility": "public"
+ },
+ "5008": {
+ "label": "MCP Service (Model Context Protocol)",
+ "onAutoForward": "notify",
+ "visibility": "private"
+ }
+ },
+
+ // Auto-start Superset with MCP on Codespace resume
+ "postStartCommand": "ENABLE_MCP=true .devcontainer/start-superset.sh",
+
+ // Environment variables
+ "containerEnv": {
+ "ENABLE_MCP": "true"
+ }
+}
diff --git a/docker/docker-bootstrap.sh b/docker/docker-bootstrap.sh
index 9d18b66626c..d8524d928f8 100755
--- a/docker/docker-bootstrap.sh
+++ b/docker/docker-bootstrap.sh
@@ -86,6 +86,10 @@ case "${1}" in
echo "Starting web app..."
/usr/bin/run-server.sh
;;
+ mcp)
+ echo "Starting MCP service..."
+ superset mcp run --host 0.0.0.0 --port ${MCP_PORT:-5008} --debug
+ ;;
*)
echo "Unknown Operation!!!"
;;
diff --git a/superset/mcp_service/CLAUDE.md b/superset/mcp_service/CLAUDE.md
new file mode 100644
index 00000000000..94b33a1bce4
--- /dev/null
+++ b/superset/mcp_service/CLAUDE.md
@@ -0,0 +1,523 @@
+# MCP Service - LLM Agent Guide
+
+This guide helps LLM agents understand the Superset MCP (Model Context Protocol) service architecture and development conventions.
+
+## โ ๏ธ CRITICAL: Apache License Headers
+
+**EVERY Python file in the MCP service MUST have the Apache Software Foundation license header.**
+
+This includes:
+- All `.py` files (tool files, schemas, __init__.py files, etc.)
+- **NEVER remove existing license headers during refactoring or edits**
+- **ALWAYS add license headers when creating new files**
+- **ALWAYS verify license headers are present after editing files**
+
+If you see a file without a license header, ADD IT IMMEDIATELY. If you accidentally remove one during editing, ADD IT BACK.
+
+Use this exact template at the top of EVERY Python file:
+
+```python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+```
+
+**Note**: LLM instruction files like `CLAUDE.md`, `AGENTS.md`, etc. are excluded from this requirement (listed in `.rat-excludes`) to avoid token overhead, but ALL other Python files require it.
+
+## Architecture Overview
+
+The MCP service provides programmatic access to Superset via the Model Context Protocol, allowing AI assistants to interact with dashboards, charts, datasets, SQL Lab, and instance metadata.
+
+### Key Components
+
+```
+superset/mcp_service/
+โโโ app.py # FastMCP app factory and tool registration
+โโโ auth.py # Authentication and authorization
+โโโ mcp_config.py # Default configuration
+โโโ mcp_core.py # Reusable core classes for tools
+โโโ flask_singleton.py # Flask app singleton for MCP context
+โโโ chart/ # Chart-related tools
+โ โโโ schemas.py # Pydantic schemas for chart responses
+โ โโโ tool/ # Chart tool implementations
+โ โโโ __init__.py # Tool exports
+โ โโโ list_charts.py
+โ โโโ get_chart_info.py
+โโโ dashboard/ # Dashboard-related tools
+โ โโโ schemas.py
+โ โโโ tool/
+โโโ dataset/ # Dataset-related tools
+โ โโโ schemas.py
+โ โโโ tool/
+โโโ system/ # System/instance tools
+ โโโ schemas.py
+ โโโ tool/
+```
+
+## Critical Convention: Tool, Prompt, and Resource Registration
+
+**IMPORTANT**: When creating new MCP tools, prompts, or resources, you MUST add their imports to `app.py` for auto-registration. Do NOT add them to `server.py` - that approach doesn't work properly.
+
+### How to Add a New Tool
+
+1. **Create the tool file** in the appropriate directory (e.g., `chart/tool/my_new_tool.py`)
+2. **Decorate with `@mcp.tool`** to register it with FastMCP
+3. **Add import to `app.py`** at the bottom of the file where other tools are imported (around line 210-242)
+
+**Example**:
+```python
+# superset/mcp_service/chart/tool/my_new_tool.py
+from superset.mcp_service.app import mcp
+from superset.mcp_service.auth import mcp_auth_hook
+
+@mcp.tool
+@mcp_auth_hook
+def my_new_tool(param: str) -> dict:
+ """Tool description for LLMs."""
+ return {"result": "success"}
+```
+
+**Then add to app.py**:
+```python
+# superset/mcp_service/app.py (at the bottom, around line 207-224)
+from superset.mcp_service.chart.tool import ( # noqa: F401, E402
+ get_chart_info,
+ list_charts,
+ my_new_tool, # ADD YOUR TOOL HERE
+)
+```
+
+**Why this matters**: Tools use `@mcp.tool` decorators and register automatically on import. The import MUST be in `app.py` at the bottom of the file (after the `mcp` instance is created). If you don't import the tool in `app.py`, it won't be available to MCP clients. DO NOT add imports to `server.py` - that file is for running the server only.
+
+### How to Add a New Prompt
+
+1. **Create the prompt file** in the appropriate directory (e.g., `chart/prompts/my_new_prompt.py`)
+2. **Decorate with `@mcp.prompt`** to register it with FastMCP
+3. **Add import to module's `__init__.py`** (e.g., `chart/prompts/__init__.py`)
+4. **Ensure module is imported in `app.py`** (around line 244-253)
+
+**Example**:
+```python
+# superset/mcp_service/chart/prompts/my_new_prompt.py
+from superset.mcp_service.app import mcp
+from superset.mcp_service.auth import mcp_auth_hook
+
+@mcp.prompt("my_new_prompt")
+@mcp_auth_hook
+async def my_new_prompt_handler(ctx: Context) -> str:
+ """Interactive prompt for doing something."""
+ return "Prompt instructions here..."
+```
+
+**Then add to `chart/prompts/__init__.py`**:
+```python
+# superset/mcp_service/chart/prompts/__init__.py
+from . import create_chart_guided # existing
+from . import my_new_prompt # ADD YOUR PROMPT HERE
+```
+
+**Verify module import exists in `app.py`** (around line 248):
+```python
+# superset/mcp_service/app.py
+from superset.mcp_service.chart import prompts as chart_prompts # This imports all prompts
+```
+
+### How to Add a New Resource
+
+1. **Create the resource file** in the appropriate directory (e.g., `chart/resources/my_new_resource.py`)
+2. **Decorate with `@mcp.resource`** to register it with FastMCP
+3. **Add import to module's `__init__.py`** (e.g., `chart/resources/__init__.py`)
+4. **Ensure module is imported in `app.py`** (around line 244-253)
+
+**Example**:
+```python
+# superset/mcp_service/chart/resources/my_new_resource.py
+from superset.mcp_service.app import mcp
+from superset.mcp_service.auth import mcp_auth_hook
+
+@mcp.resource("superset://chart/my_resource")
+@mcp_auth_hook
+def get_my_resource() -> str:
+ """Resource description for LLMs."""
+ return "Resource data here..."
+```
+
+**Then add to `chart/resources/__init__.py`**:
+```python
+# superset/mcp_service/chart/resources/__init__.py
+from . import chart_configs # existing
+from . import my_new_resource # ADD YOUR RESOURCE HERE
+```
+
+**Verify module import exists in `app.py`** (around line 249):
+```python
+# superset/mcp_service/app.py
+from superset.mcp_service.chart import resources as chart_resources # This imports all resources
+```
+
+**Why this matters**: Prompts and resources work similarly to tools - they use decorators and register on import. The module-level imports (`chart/prompts/__init__.py`, `chart/resources/__init__.py`) ensure individual files are imported when the module is imported. The `app.py` imports ensure the modules are loaded when the MCP service starts.
+
+## Tool Development Patterns
+
+### 1. Use Core Classes for Reusability
+
+The `mcp_core.py` module provides reusable patterns:
+
+- **`ModelListCore`**: For listing resources (dashboards, charts, datasets)
+- **`ModelGetInfoCore`**: For getting resource details by ID/UUID
+- **`ModelGetAvailableFiltersCore`**: For retrieving filterable columns
+
+**Example**:
+```python
+from superset.mcp_service.mcp_core import ModelListCore
+from superset.daos.dashboard import DashboardDAO
+from superset.mcp_service.dashboard.schemas import DashboardList
+
+list_core = ModelListCore(
+ dao_class=DashboardDAO,
+ output_schema=DashboardList,
+ logger=logger,
+)
+
+@mcp.tool
+@mcp_auth_hook
+def list_dashboards(filters: List[DashboardFilter], page: int = 1) -> DashboardList:
+ return list_core.run_tool(filters=filters, page=page, page_size=10)
+```
+
+### 2. Always Use Authentication
+
+**Every tool must use `@mcp_auth_hook`** to ensure:
+- User authentication from JWT or configured admin user
+- Permission checking via JWT scopes
+- Audit logging of tool access
+
+```python
+from superset.mcp_service.auth import mcp_auth_hook
+
+@mcp.tool
+@mcp_auth_hook # REQUIRED
+def my_tool() -> dict:
+ # g.user is set by mcp_auth_hook
+ return {"user": g.user.username}
+```
+
+### 3. Use Pydantic Schemas
+
+**All tool inputs and outputs must be Pydantic models** for:
+- Automatic validation
+- LLM-friendly schema generation
+- Type safety
+
+**Convention**: Place schemas in `{module}/schemas.py`
+
+```python
+from pydantic import BaseModel, Field
+
+class MyToolRequest(BaseModel):
+ param: str = Field(..., description="Parameter description for LLMs")
+
+class MyToolResponse(BaseModel):
+ result: str = Field(..., description="Result description")
+ timestamp: datetime = Field(
+ default_factory=lambda: datetime.now(timezone.utc),
+ description="Response timestamp"
+ )
+```
+
+### 4. Follow the DAO Pattern
+
+**Use Superset's DAO (Data Access Object) layer** instead of direct database queries:
+
+```python
+from superset.daos.dashboard import DashboardDAO
+
+# GOOD: Use DAO
+dashboard = DashboardDAO.find_by_id(dashboard_id)
+
+# BAD: Don't query directly
+dashboard = db.session.query(Dashboard).filter_by(id=dashboard_id).first()
+```
+
+### 5. Python Type Hints (Python 3.10+ Style)
+
+**CRITICAL**: Always use modern Python 3.10+ union syntax for type hints.
+
+```python
+# GOOD - Modern Python 3.10+ syntax
+from typing import List, Dict, Any
+from pydantic import BaseModel, Field
+
+class MySchema(BaseModel):
+ name: str | None = Field(None, description="Optional name")
+ tags: List[str] = Field(default_factory=list)
+ metadata: Dict[str, Any] = Field(default_factory=dict)
+
+def my_function(
+ id: int,
+ filters: List[str] | None = None,
+ options: Dict[str, Any] | None = None
+) -> MySchema | None:
+ pass
+
+# BAD - Old-style Optional (DO NOT USE)
+from typing import Optional, List, Dict, Any
+
+class MySchema(BaseModel):
+ name: Optional[str] = Field(None, description="Optional name") # Wrong!
+
+def my_function(
+ id: int,
+ filters: Optional[List[str]] = None, # Wrong!
+ options: Optional[Dict[str, Any]] = None # Wrong!
+) -> Optional[MySchema]: # Wrong!
+ pass
+```
+
+**Key rules:**
+- Use `T | None` instead of `Optional[T]`
+- Do NOT import `Optional` from typing
+- Still import `List`, `Dict`, `Any`, etc. from typing (for now)
+- All new code must follow this pattern
+
+### 6. Error Handling
+
+**Use consistent error schemas**:
+
+```python
+class MyError(BaseModel):
+ error: str = Field(..., description="Error message")
+ error_type: str = Field(..., description="Type of error")
+ timestamp: datetime = Field(
+ default_factory=lambda: datetime.now(timezone.utc),
+ description="Error timestamp"
+ )
+
+@mcp.tool
+@mcp_auth_hook
+def my_tool(id: int) -> MyResponse:
+ try:
+ result = process_data(id)
+ return MyResponse(data=result)
+ except NotFound:
+ raise ValueError(f"Resource {id} not found")
+```
+
+## Testing Conventions
+
+### Unit Tests
+
+Place unit tests in `tests/unit_tests/mcp_service/{module}/tool/test_{tool_name}.py`
+
+**Test structure**:
+```python
+from unittest.mock import MagicMock, patch
+import pytest
+
+class TestMyTool:
+ @pytest.fixture
+ def mock_dao(self):
+ """Create mock DAO for testing."""
+ dao = MagicMock()
+ dao.find_by_id.return_value = create_mock_object()
+ return dao
+
+ @patch("superset.mcp_service.chart.tool.my_tool.ChartDAO")
+ def test_my_tool_success(self, mock_dao_class, mock_dao):
+ """Test successful tool execution."""
+ mock_dao_class.return_value = mock_dao
+
+ result = my_tool(id=1)
+
+ assert result.data is not None
+ mock_dao.find_by_id.assert_called_once_with(1)
+```
+
+### Integration Tests
+
+Use Flask test client for integration tests:
+
+```python
+def test_tool_with_flask_context(app):
+ """Test tool with full Flask app context."""
+ with app.app_context():
+ result = my_tool(id=1)
+ assert result is not None
+```
+
+## Common Pitfalls to Avoid
+
+### 1. โ Forgetting Tool Import in app.py
+**Problem**: Tool exists but isn't available to MCP clients.
+**Solution**: Always add tool import to `app.py` (at the bottom) after creating it. Never add to `server.py`.
+
+### 2. โ Adding Tool Imports to server.py
+**Problem**: Tools won't register properly, causing runtime errors.
+**Solution**: Tool imports must be in `app.py` at the bottom of the file, not in `server.py`. The `server.py` file is only for running the server.
+
+### 3. โ Missing @mcp_auth_hook Decorator
+**Problem**: Tool bypasses authentication and authorization.
+**Solution**: Always use `@mcp_auth_hook` on every tool.
+
+### 4. โ Using `Optional` Instead of Union Syntax
+**Problem**: Old-style Optional[T] is not Python 3.10+ style.
+**Solution**: Use `T | None` instead of `Optional[T]` for all type hints.
+```python
+# GOOD - Modern Python 3.10+ syntax
+def my_function(param: str | None = None) -> int | None:
+ pass
+
+# BAD - Old-style Optional
+from typing import Optional
+def my_function(param: Optional[str] = None) -> Optional[int]:
+ pass
+```
+
+### 5. โ Using `any` Types in Schemas
+**Problem**: Violates TypeScript modernization goals, no validation.
+**Solution**: Use proper Pydantic types with Field descriptions.
+
+### 6. โ Direct Database Queries
+**Problem**: Bypasses Superset's security and caching layers.
+**Solution**: Use DAO classes (ChartDAO, DashboardDAO, etc.).
+
+### 7. โ Not Using Core Classes
+**Problem**: Duplicating list/get_info/filter logic across tools.
+**Solution**: Use ModelListCore, ModelGetInfoCore, ModelGetAvailableFiltersCore.
+
+### 8. โ Missing Apache License Headers
+**Problem**: CI fails on license check.
+**Solution**: Add Apache license header to all new .py files. Use this exact template at the top of every new Python file:
+
+```python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+```
+
+**Note**: LLM instruction files like `CLAUDE.md`, `AGENTS.md`, etc. are excluded from this requirement (listed in `.rat-excludes`) to avoid token overhead.
+
+### 9. โ Using `@mcp.tool()` with Empty Parentheses
+**Problem**: Inconsistent decorator style.
+**Solution**: Use `@mcp.tool` without parentheses unless passing arguments.
+```python
+# GOOD
+@mcp.tool
+def my_tool():
+ pass
+
+# BAD
+@mcp.tool()
+def my_tool():
+ pass
+```
+
+### 10. โ Circular Imports
+**Problem**: Importing `mcp` from `app.py` creates circular dependency.
+**Solution**: Import `mcp` at module level in tool files:
+```python
+# GOOD
+from superset.mcp_service.app import mcp
+
+@mcp.tool
+def my_tool():
+ pass
+
+# BAD - causes circular import
+from superset.mcp_service.app import mcp, some_other_function
+```
+
+## Configuration
+
+Default configuration is in `mcp_config.py`. Users can override in `superset_config.py`:
+
+```python
+# superset_config.py
+MCP_ADMIN_USERNAME = "your_admin"
+MCP_AUTH_ENABLED = True
+MCP_JWT_PUBLIC_KEY = "your_public_key"
+```
+
+## Tool Discovery
+
+MCP clients discover tools via:
+1. **Tool listing**: All tools with `@mcp.tool` are automatically listed
+2. **Schema introspection**: Pydantic schemas generate JSON Schema for LLMs
+3. **Instructions**: `DEFAULT_INSTRUCTIONS` in `app.py` documents available tools
+
+## Resources for Learning
+
+- **MCP Specification**: https://modelcontextprotocol.io/
+- **FastMCP Documentation**: https://github.com/jlowin/fastmcp
+- **Superset DAO Patterns**: See `superset/daos/` for examples
+- **Pydantic Documentation**: https://docs.pydantic.dev/
+
+## Quick Checklist for New Tools
+
+- [ ] Created tool file in `{module}/tool/{tool_name}.py`
+- [ ] Added `@mcp.tool` decorator
+- [ ] Added `@mcp_auth_hook` decorator
+- [ ] Created Pydantic request/response schemas in `{module}/schemas.py`
+- [ ] Used DAO classes instead of direct queries
+- [ ] Added tool import to `app.py` (around line 210-242)
+- [ ] Added Apache license header to new files
+- [ ] Created unit tests in `tests/unit_tests/mcp_service/{module}/tool/test_{tool_name}.py`
+- [ ] Updated `DEFAULT_INSTRUCTIONS` in `app.py` if adding new capability
+- [ ] Tested locally with MCP client (e.g., Claude Desktop)
+
+## Quick Checklist for New Prompts
+
+- [ ] Created prompt file in `{module}/prompts/{prompt_name}.py`
+- [ ] Added `@mcp.prompt("prompt_name")` decorator
+- [ ] Added `@mcp_auth_hook` decorator
+- [ ] Made function async: `async def prompt_handler(ctx: Context) -> str`
+- [ ] Added import to `{module}/prompts/__init__.py`
+- [ ] Verified module import exists in `app.py` (around line 244-253)
+- [ ] Added Apache license header to new file
+- [ ] Updated `DEFAULT_INSTRUCTIONS` in `app.py` to list the new prompt
+- [ ] Tested locally with MCP client (e.g., Claude Desktop)
+
+## Quick Checklist for New Resources
+
+- [ ] Created resource file in `{module}/resources/{resource_name}.py`
+- [ ] Added `@mcp.resource("superset://{path}")` decorator with unique URI
+- [ ] Added `@mcp_auth_hook` decorator
+- [ ] Implemented resource data retrieval logic
+- [ ] Added import to `{module}/resources/__init__.py`
+- [ ] Verified module import exists in `app.py` (around line 244-253)
+- [ ] Added Apache license header to new file
+- [ ] Updated `DEFAULT_INSTRUCTIONS` in `app.py` to list the new resource
+- [ ] Tested locally with MCP client (e.g., Claude Desktop)
+
+## Getting Help
+
+- Check existing tool implementations for patterns (chart/tool/, dashboard/tool/)
+- Review core classes in `mcp_core.py` for reusable functionality
+- See `CLAUDE.md` in project root for general Superset development guidelines
+- Consult Superset documentation: https://superset.apache.org/docs/
diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py
index 57f55d59102..a091b9943a6 100644
--- a/superset/mcp_service/app.py
+++ b/superset/mcp_service/app.py
@@ -77,6 +77,40 @@ Available Prompts:
- superset_quickstart: Interactive guide for getting started with the MCP service
- create_chart_guided: Step-by-step chart creation wizard
+Common Chart Types (viz_type) and Behaviors:
+
+Interactive Charts (support sorting, filtering, drill-down):
+- table: Standard table view with sorting and filtering
+- pivot_table_v2: Pivot table with grouping and aggregations
+- echarts_timeseries_line: Time series line chart
+- echarts_timeseries_bar: Time series bar chart
+- echarts_timeseries_area: Time series area chart
+- echarts_timeseries_scatter: Time series scatter plot
+- mixed_timeseries: Combined line/bar time series
+
+Common Visualization Types:
+- big_number: Single metric display
+- big_number_total: Total value display
+- pie: Pie chart for proportions
+- echarts_timeseries: Generic time series chart
+- funnel: Funnel chart for conversion analysis
+- gauge_chart: Gauge/speedometer visualization
+- heatmap_v2: Heat map for correlation analysis
+- sankey_v2: Sankey diagram for flow visualization
+- sunburst_v2: Sunburst chart for hierarchical data
+- treemap_v2: Tree map for hierarchical proportions
+- word_cloud: Word cloud visualization
+- world_map: Geographic world map
+- box_plot: Box plot for distribution analysis
+- bubble: Bubble chart for 3-dimensional data
+
+Query Examples:
+- List all interactive tables:
+ filters=[{"col": "viz_type", "opr": "in", "value": ["table", "pivot_table_v2"]}]
+- List time series charts:
+ filters=[{"col": "viz_type", "opr": "sw", "value": "echarts_timeseries"}]
+- Search by name: search="sales"
+
General usage tips:
- All listing tools use 1-based pagination (first page is 1)
- Use 'filters' parameter for advanced queries (see *_available_filters tools)
@@ -202,13 +236,56 @@ def create_mcp_app(
# Create default MCP instance for backward compatibility
# Tool modules can import this and use @mcp.tool decorators
-mcp = create_mcp_app()
+mcp = create_mcp_app(stateless_http=True)
-from superset.mcp_service.chart.tool import ( # noqa: F401, E402
- get_chart_info,
- list_charts,
+# Import all MCP tools to register them with the mcp instance
+# NOTE: Always add new tool imports here when creating new MCP tools.
+# Tools use @mcp.tool decorators and register automatically on import.
+# Import prompts and resources to register them with the mcp instance
+# NOTE: Always add new prompt/resource imports here when creating new prompts/resources.
+# Prompts use @mcp.prompt decorators and resources use @mcp.resource decorators.
+# They register automatically on import, similar to tools.
+from superset.mcp_service.chart import ( # noqa: F401, E402
+ prompts as chart_prompts,
+ resources as chart_resources,
+)
+from superset.mcp_service.chart.tool import ( # noqa: F401, E402
+ generate_chart,
+ get_chart_available_filters,
+ get_chart_data,
+ get_chart_info,
+ get_chart_preview,
+ list_charts,
+ update_chart,
+ update_chart_preview,
+)
+from superset.mcp_service.dashboard.tool import ( # noqa: F401, E402
+ add_chart_to_existing_dashboard,
+ generate_dashboard,
+ get_dashboard_available_filters,
+ get_dashboard_info,
+ list_dashboards,
+)
+from superset.mcp_service.dataset.tool import ( # noqa: F401, E402
+ get_dataset_available_filters,
+ get_dataset_info,
+ list_datasets,
+)
+from superset.mcp_service.explore.tool import ( # noqa: F401, E402
+ generate_explore_link,
+)
+from superset.mcp_service.sql_lab.tool import ( # noqa: F401, E402
+ execute_sql,
+ open_sql_lab_with_context,
+)
+from superset.mcp_service.system import ( # noqa: F401, E402
+ prompts as system_prompts,
+ resources as system_resources,
+)
+from superset.mcp_service.system.tool import ( # noqa: F401, E402
+ get_superset_instance_info,
+ health_check,
)
-from superset.mcp_service.system.tool import health_check # noqa: F401, E402
def init_fastmcp_server(
diff --git a/superset/mcp_service/auth.py b/superset/mcp_service/auth.py
index 9b1fc1de9ba..1a1e68054f4 100644
--- a/superset/mcp_service/auth.py
+++ b/superset/mcp_service/auth.py
@@ -28,11 +28,14 @@ Future enhancements (to be added in separate PRs):
"""
import logging
-from typing import Any, Callable, TypeVar
+from typing import Any, Callable, TYPE_CHECKING, TypeVar
from flask import g
from flask_appbuilder.security.sqla.models import User
+if TYPE_CHECKING:
+ from superset.connectors.sqla.models import SqlaTable
+
# Type variable for decorated functions
F = TypeVar("F", bound=Callable[..., Any])
@@ -45,15 +48,14 @@ def get_user_from_request() -> User:
TODO (future PR): Add JWT token extraction and validation.
TODO (future PR): Add user impersonation support.
+ TODO (future PR): Add fallback user configuration.
- For now, this uses MCP_DEV_USERNAME from configuration for development.
-
- Raises:
- ValueError: If MCP_DEV_USERNAME is not configured or user doesn't exist
+ For now, this returns the admin user for development.
"""
from flask import current_app
+ from sqlalchemy.orm import joinedload
- from superset import security_manager
+ from superset.extensions import db
# TODO: Extract from JWT token once authentication is implemented
# For now, use MCP_DEV_USERNAME from configuration
@@ -62,14 +64,57 @@ def get_user_from_request() -> User:
if not username:
raise ValueError("Username not configured")
- user = security_manager.find_user(username)
+ # Query user directly with eager loading to ensure fresh session-bound object
+ # Do NOT use security_manager.find_user() as it may return cached/detached user
+ user = (
+ db.session.query(User)
+ .options(joinedload(User.roles), joinedload(User.groups))
+ .filter(User.username == username)
+ .first()
+ )
if not user:
- raise ValueError(f"User '{username}' not found")
+ raise ValueError(
+ f"User '{username}' not found. "
+ f"Please create admin user with: superset fab create-admin"
+ )
return user
+def has_dataset_access(dataset: "SqlaTable") -> bool:
+ """
+ Validate user has access to the dataset.
+
+ This function checks if the current user (from Flask g.user context)
+ has permission to access the given dataset using Superset's security manager.
+
+ Args:
+ dataset: The SqlaTable dataset to check access for
+
+ Returns:
+ True if user has access, False otherwise
+
+ Security Note:
+ This should be called after mcp_auth_hook has set g.user.
+ Returns False on any error to fail securely.
+ """
+ try:
+ from superset import security_manager
+
+ # Check if user has read access to the dataset
+ if hasattr(g, "user") and g.user:
+ # Use Superset's security manager to check dataset access
+ return security_manager.can_access_datasource(datasource=dataset)
+
+ # If no user context, deny access
+ return False
+
+ except Exception as e:
+ logger.warning("Error checking dataset access: %s", e)
+ return False # Deny access on error
+
+
def mcp_auth_hook(tool_func: F) -> F:
"""
Authentication and authorization decorator for MCP tools.
@@ -87,17 +132,48 @@ def mcp_auth_hook(tool_func: F) -> F:
@functools.wraps(tool_func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
- # Get user and set Flask context
+ from superset.extensions import db
+
+ # Get user and set Flask context OUTSIDE try block
user = get_user_from_request()
+
+ # Force load relationships NOW while session is definitely active
+ _ = user.roles
+ if hasattr(user, "groups"):
+ _ = user.groups
+
g.user = user
- # TODO: Add permission checks here in future PR
- # TODO: Add audit logging here in future PR
+ try:
+ # TODO: Add permission checks here in future PR
+ # TODO: Add audit logging here in future PR
- logger.debug(
- "MCP tool call: user=%s, tool=%s", user.username, tool_func.__name__
- )
+ logger.debug(
+ "MCP tool call: user=%s, tool=%s", user.username, tool_func.__name__
+ )
- return tool_func(*args, **kwargs)
+ result = tool_func(*args, **kwargs)
+
+ return result
+
+ except Exception:
+ # On error, rollback and cleanup session
+ # pylint: disable=consider-using-transaction
+ try:
+ db.session.rollback()
+ db.session.remove()
+ except Exception as e:
+ logger.warning("Error cleaning up session after exception: %s", e)
+ raise
+
+ finally:
+ # Only rollback if session is still active (no exception occurred)
+ # Do NOT call remove() on success to avoid detaching user
+ try:
+ if db.session.is_active:
+ # pylint: disable=consider-using-transaction
+ db.session.rollback()
+ except Exception as e:
+ logger.warning("Error in finally block: %s", e)
return wrapper # type: ignore[return-value]
diff --git a/superset/mcp_service/chart/chart_utils.py b/superset/mcp_service/chart/chart_utils.py
new file mode 100644
index 00000000000..633e3c6a9c6
--- /dev/null
+++ b/superset/mcp_service/chart/chart_utils.py
@@ -0,0 +1,484 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Shared chart utilities for MCP tools
+
+This module contains shared logic for chart configuration mapping and explore link
+generation that can be used by both generate_chart and generate_explore_link tools.
+"""
+
+from typing import Any, Dict
+
+from superset.mcp_service.chart.schemas import (
+ ChartCapabilities,
+ ChartSemantics,
+ ColumnRef,
+ TableChartConfig,
+ XYChartConfig,
+)
+from superset.mcp_service.utils.url_utils import get_superset_base_url
+from superset.utils import json
+
+
+def generate_explore_link(dataset_id: int | str, form_data: Dict[str, Any]) -> str:
+ """Generate an explore link for the given dataset and form data."""
+ base_url = get_superset_base_url()
+ numeric_dataset_id = None
+
+ try:
+ from superset.commands.explore.form_data.parameters import CommandParameters
+
+ # Find the dataset to get its numeric ID
+ from superset.daos.dataset import DatasetDAO
+ from superset.mcp_service.commands.create_form_data import (
+ MCPCreateFormDataCommand,
+ )
+ from superset.utils.core import DatasourceType
+
+ dataset = None
+
+ if isinstance(dataset_id, int) or (
+ isinstance(dataset_id, str) and dataset_id.isdigit()
+ ):
+ numeric_dataset_id = (
+ int(dataset_id) if isinstance(dataset_id, str) else dataset_id
+ )
+ dataset = DatasetDAO.find_by_id(numeric_dataset_id)
+ else:
+ # Try UUID lookup using DAO flexible method
+ dataset = DatasetDAO.find_by_id(dataset_id, id_column="uuid")
+ if dataset:
+ numeric_dataset_id = dataset.id
+
+ if not dataset or numeric_dataset_id is None:
+ # Fallback to basic explore URL
+ return (
+ f"{base_url}/explore/?datasource_type=table&datasource_id={dataset_id}"
+ )
+
+ # Add datasource to form_data
+ form_data_with_datasource = {
+ **form_data,
+ "datasource": f"{numeric_dataset_id}__table",
+ }
+
+ # Try to create form_data in cache using MCP-specific CreateFormDataCommand
+ cmd_params = CommandParameters(
+ datasource_type=DatasourceType.TABLE,
+ datasource_id=numeric_dataset_id,
+ chart_id=0, # 0 for new charts
+ tab_id=None,
+ form_data=json.dumps(form_data_with_datasource),
+ )
+
+ # Create the form_data cache entry and get the key
+ form_data_key = MCPCreateFormDataCommand(cmd_params).run()
+
+ # Return URL with just the form_data_key
+ return f"{base_url}/explore/?form_data_key={form_data_key}"
+
+ except Exception:
+ # Fallback to basic explore URL with numeric ID if available
+ if numeric_dataset_id is not None:
+ return (
+ f"{base_url}/explore/?datasource_type=table"
+ f"&datasource_id={numeric_dataset_id}"
+ )
+ else:
+ return (
+ f"{base_url}/explore/?datasource_type=table&datasource_id={dataset_id}"
+ )
+
+
+def map_config_to_form_data(
+ config: TableChartConfig | XYChartConfig,
+) -> Dict[str, Any]:
+ """Map chart config to Superset form_data."""
+ if isinstance(config, TableChartConfig):
+ return map_table_config(config)
+ elif isinstance(config, XYChartConfig):
+ return map_xy_config(config)
+ else:
+ raise ValueError(f"Unsupported config type: {type(config)}")
+
+
+def map_table_config(config: TableChartConfig) -> Dict[str, Any]:
+ """Map table chart config to form_data with defensive validation."""
+ # Early validation to prevent empty charts
+ if not config.columns:
+ raise ValueError("Table chart must have at least one column")
+
+ # Separate columns with aggregates from raw columns
+ raw_columns = []
+ aggregated_metrics = []
+
+ for col in config.columns:
+ if col.aggregate:
+ # Column has aggregation - treat as metric
+ aggregated_metrics.append(create_metric_object(col))
+ else:
+ # No aggregation - treat as raw column
+ raw_columns.append(col.name)
+
+ # Final validation - ensure we have some data to display
+ if not raw_columns and not aggregated_metrics:
+ raise ValueError("Table chart configuration resulted in no displayable columns")
+
+ form_data: Dict[str, Any] = {
+ "viz_type": "table",
+ }
+
+ # Handle raw columns (no aggregation)
+ if raw_columns and not aggregated_metrics:
+ # Pure raw columns - show individual rows
+ form_data.update(
+ {
+ "all_columns": raw_columns,
+ "query_mode": "raw",
+ "include_time": False,
+ "order_desc": True,
+ "row_limit": 1000, # Reasonable limit for raw data
+ }
+ )
+
+ # Handle aggregated columns only
+ elif aggregated_metrics and not raw_columns:
+ # Pure aggregation - show totals
+ form_data.update(
+ {
+ "metrics": aggregated_metrics,
+ "query_mode": "aggregate",
+ }
+ )
+
+ # Handle mixed columns (raw + aggregated)
+ elif raw_columns and aggregated_metrics:
+ # Mixed mode - group by raw columns, aggregate metrics
+ form_data.update(
+ {
+ "all_columns": raw_columns,
+ "metrics": aggregated_metrics,
+ "groupby": raw_columns,
+ "query_mode": "aggregate",
+ }
+ )
+
+ if config.filters:
+ form_data["adhoc_filters"] = [
+ {
+ "clause": "WHERE",
+ "expressionType": "SIMPLE",
+ "subject": filter_config.column,
+ "operator": map_filter_operator(filter_config.op),
+ "comparator": filter_config.value,
+ }
+ for filter_config in config.filters
+ if filter_config is not None
+ ]
+
+ if config.sort_by:
+ form_data["order_by_cols"] = config.sort_by
+
+ return form_data
+
+
+def create_metric_object(col: ColumnRef) -> Dict[str, Any]:
+ """Create a metric object for a column with enhanced validation."""
+ # Ensure aggregate is valid - default to SUM if not specified or invalid
+ valid_aggregates = {
+ "SUM",
+ "COUNT",
+ "AVG",
+ "MIN",
+ "MAX",
+ "COUNT_DISTINCT",
+ "STDDEV",
+ "VAR",
+ "MEDIAN",
+ "PERCENTILE",
+ }
+ aggregate = col.aggregate or "SUM"
+
+ # Validate aggregate function (final safety check)
+ if aggregate.upper() not in valid_aggregates:
+ aggregate = "SUM" # Safe fallback
+
+ return {
+ "aggregate": aggregate.upper(),
+ "column": {
+ "column_name": col.name,
+ },
+ "expressionType": "SIMPLE",
+ "label": col.label or f"{aggregate.upper()}({col.name})",
+ "optionName": f"metric_{col.name}",
+ "sqlExpression": None,
+ "hasCustomLabel": bool(col.label),
+ "datasourceWarning": False,
+ }
+
+
+def add_axis_config(form_data: Dict[str, Any], config: XYChartConfig) -> None:
+ """Add axis configurations to form_data."""
+ if config.x_axis:
+ if config.x_axis.title:
+ form_data["x_axis_title"] = config.x_axis.title
+ if config.x_axis.format:
+ form_data["x_axis_format"] = config.x_axis.format
+
+ if config.y_axis:
+ if config.y_axis.title:
+ form_data["y_axis_title"] = config.y_axis.title
+ if config.y_axis.format:
+ form_data["y_axis_format"] = config.y_axis.format
+ if config.y_axis.scale == "log":
+ form_data["y_axis_scale"] = "log"
+
+
+def add_legend_config(form_data: Dict[str, Any], config: XYChartConfig) -> None:
+ """Add legend configuration to form_data."""
+ if config.legend:
+ if not config.legend.show:
+ form_data["show_legend"] = False
+ if config.legend.position:
+ form_data["legend_orientation"] = config.legend.position
+
+
+def map_xy_config(config: XYChartConfig) -> Dict[str, Any]:
+ """Map XY chart config to form_data with defensive validation."""
+ # Early validation to prevent empty charts
+ if not config.y:
+ raise ValueError("XY chart must have at least one Y-axis metric")
+
+ # Map chart kind to viz_type
+ viz_type_map = {
+ "line": "echarts_timeseries_line",
+ "bar": "echarts_timeseries_bar",
+ "area": "echarts_area",
+ "scatter": "echarts_timeseries_scatter",
+ }
+
+ # Convert Y columns to metrics with validation
+ metrics = []
+ for col in config.y:
+ if not col.name.strip(): # Validate column name is not empty
+ raise ValueError("Y-axis column name cannot be empty")
+ metrics.append(create_metric_object(col))
+
+ # Final validation - ensure we have metrics to display
+ if not metrics:
+ raise ValueError("XY chart configuration resulted in no displayable metrics")
+
+ form_data: Dict[str, Any] = {
+ "viz_type": viz_type_map.get(config.kind, "echarts_timeseries_line"),
+ "x_axis": config.x.name,
+ "metrics": metrics,
+ }
+
+ # CRITICAL FIX: For time series charts, handle groupby carefully to avoid duplicates
+ # The x_axis field already tells Superset which column to use for time grouping
+ groupby_columns = []
+
+ # Only add groupby columns if there's an explicit group_by specified
+ # The x_axis column should NOT be duplicated in groupby as it causes
+ # "Duplicate column/metric labels" errors in Superset
+ # Only add group_by column if it's specified AND different from x_axis
+ # NEVER add the x_axis column to groupby as it creates duplicate labels
+ if config.group_by and config.group_by.name != config.x.name:
+ groupby_columns.append(config.group_by.name)
+
+ # Set the groupby in form_data only if we have valid columns
+ # Don't set empty groupby - let Superset handle x_axis grouping automatically
+ if groupby_columns:
+ form_data["groupby"] = groupby_columns
+
+ # Add filters if specified
+ if config.filters:
+ form_data["adhoc_filters"] = [
+ {
+ "clause": "WHERE",
+ "expressionType": "SIMPLE",
+ "subject": filter_config.column,
+ "operator": map_filter_operator(filter_config.op),
+ "comparator": filter_config.value,
+ }
+ for filter_config in config.filters
+ if filter_config is not None
+ ]
+
+ # Add configurations
+ add_axis_config(form_data, config)
+ add_legend_config(form_data, config)
+
+ return form_data
+
+
+def map_filter_operator(op: str) -> str:
+ """Map filter operator to Superset format."""
+ operator_map = {
+ "=": "==",
+ ">": ">",
+ "<": "<",
+ ">=": ">=",
+ "<=": "<=",
+ "!=": "!=",
+ }
+ return operator_map.get(op, op)
+
+
+def generate_chart_name(config: TableChartConfig | XYChartConfig) -> str:
+ """Generate a chart name based on the configuration."""
+ if isinstance(config, TableChartConfig):
+ return f"Table Chart - {', '.join(col.name for col in config.columns)}"
+ elif isinstance(config, XYChartConfig):
+ chart_type = config.kind.capitalize()
+ x_col = config.x.name
+ y_cols = ", ".join(col.name for col in config.y)
+ return f"{chart_type} Chart - {x_col} vs {y_cols}"
+ else:
+ return "Chart"
+
+
+def analyze_chart_capabilities(chart: Any | None, config: Any) -> ChartCapabilities:
+ """Analyze chart capabilities based on type and configuration."""
+ if chart:
+ viz_type = getattr(chart, "viz_type", "unknown")
+ else:
+ # Map config chart_type to viz_type
+ chart_type = getattr(config, "chart_type", "unknown")
+ if chart_type == "xy":
+ kind = getattr(config, "kind", "line")
+ viz_type_map = {
+ "line": "echarts_timeseries_line",
+ "bar": "echarts_timeseries_bar",
+ "area": "echarts_area",
+ "scatter": "echarts_timeseries_scatter",
+ }
+ viz_type = viz_type_map.get(kind, "echarts_timeseries_line")
+ elif chart_type == "table":
+ viz_type = "table"
+ else:
+ viz_type = "unknown"
+
+ # Determine interaction capabilities based on chart type
+ interactive_types = [
+ "echarts_timeseries_line",
+ "echarts_timeseries_bar",
+ "echarts_area",
+ "echarts_timeseries_scatter",
+ "deck_scatter",
+ "deck_hex",
+ ]
+
+ supports_interaction = viz_type in interactive_types
+ supports_drill_down = viz_type in ["table", "pivot_table_v2"]
+ supports_real_time = viz_type in [
+ "echarts_timeseries_line",
+ "echarts_timeseries_bar",
+ ]
+
+ # Determine optimal formats
+ optimal_formats = ["url"] # Always include static image
+ if supports_interaction:
+ optimal_formats.extend(["interactive", "vega_lite"])
+ optimal_formats.extend(["ascii", "table"])
+
+ # Classify data types
+ data_types = []
+ if hasattr(config, "x") and config.x:
+ data_types.append("categorical" if not config.x.aggregate else "metric")
+ if hasattr(config, "y") and config.y:
+ data_types.extend(["metric"] * len(config.y))
+ if "time" in viz_type or "timeseries" in viz_type:
+ data_types.append("time_series")
+
+ return ChartCapabilities(
+ supports_interaction=supports_interaction,
+ supports_real_time=supports_real_time,
+ supports_drill_down=supports_drill_down,
+ supports_export=True, # All charts can be exported
+ optimal_formats=optimal_formats,
+ data_types=list(set(data_types)),
+ )
+
+
+def analyze_chart_semantics(chart: Any | None, config: Any) -> ChartSemantics:
+ """Generate semantic understanding of the chart."""
+ if chart:
+ viz_type = getattr(chart, "viz_type", "unknown")
+ else:
+ # Map config chart_type to viz_type
+ chart_type = getattr(config, "chart_type", "unknown")
+ if chart_type == "xy":
+ kind = getattr(config, "kind", "line")
+ viz_type_map = {
+ "line": "echarts_timeseries_line",
+ "bar": "echarts_timeseries_bar",
+ "area": "echarts_area",
+ "scatter": "echarts_timeseries_scatter",
+ }
+ viz_type = viz_type_map.get(kind, "echarts_timeseries_line")
+ elif chart_type == "table":
+ viz_type = "table"
+ else:
+ viz_type = "unknown"
+
+ # Generate primary insight based on chart type
+ insights_map = {
+ "echarts_timeseries_line": "Shows trends and changes over time",
+ "echarts_timeseries_bar": "Compares values across categories or time periods",
+ "table": "Displays detailed data in tabular format",
+ "pie": "Shows proportional relationships within a dataset",
+ "echarts_area": "Emphasizes cumulative totals and part-to-whole relationships",
+ }
+
+ primary_insight = insights_map.get(
+ viz_type, f"Visualizes data using {viz_type} format"
+ )
+
+ # Generate data story
+ columns = []
+ if hasattr(config, "x") and config.x:
+ columns.append(config.x.name)
+ if hasattr(config, "y") and config.y:
+ columns.extend([col.name for col in config.y])
+
+ if columns:
+ ellipsis = "..." if len(columns) > 3 else ""
+ data_story = (
+ f"This {viz_type} chart analyzes {', '.join(columns[:3])}{ellipsis}"
+ )
+ else:
+ data_story = "This chart provides insights into the selected dataset"
+
+ # Generate recommended actions
+ recommended_actions = [
+ "Review data patterns and trends",
+ "Consider filtering or drilling down for more detail",
+ "Export chart for reporting or sharing",
+ ]
+
+ if viz_type in ["echarts_timeseries_line", "echarts_timeseries_bar"]:
+ recommended_actions.append("Analyze seasonal patterns or cyclical trends")
+
+ return ChartSemantics(
+ primary_insight=primary_insight,
+ data_story=data_story,
+ recommended_actions=recommended_actions,
+ anomalies=[], # Would need actual data analysis to populate
+ statistical_summary={}, # Would need actual data analysis to populate
+ )
diff --git a/superset/mcp_service/chart/preview_utils.py b/superset/mcp_service/chart/preview_utils.py
new file mode 100644
index 00000000000..3db475c0da1
--- /dev/null
+++ b/superset/mcp_service/chart/preview_utils.py
@@ -0,0 +1,561 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Preview utilities for chart generation without saving.
+
+This module provides utilities for generating chart previews
+from form data without requiring a saved chart object.
+"""
+
+import logging
+from typing import Any, Dict, List
+
+from superset.commands.chart.data.get_data_command import ChartDataCommand
+from superset.mcp_service.chart.schemas import (
+ ASCIIPreview,
+ ChartError,
+ TablePreview,
+ VegaLitePreview,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def generate_preview_from_form_data(
+ form_data: Dict[str, Any], dataset_id: int, preview_format: str
+) -> Any:
+ """
+ Generate preview from form data without a saved chart.
+
+ Args:
+ form_data: Chart configuration form data
+ dataset_id: Dataset ID
+ preview_format: Preview format (ascii, table, etc.)
+
+ Returns:
+ Preview object or ChartError
+ """
+ try:
+ # Execute query to get data
+ from superset.connectors.sqla.models import SqlaTable
+ from superset.extensions import db
+
+ dataset = db.session.query(SqlaTable).get(dataset_id)
+ if not dataset:
+ return ChartError(
+ error=f"Dataset {dataset_id} not found", error_type="DatasetNotFound"
+ )
+
+ # Create query context from form data using factory
+ from superset.common.query_context_factory import QueryContextFactory
+
+ factory = QueryContextFactory()
+ query_context_obj = factory.create(
+ datasource={"id": dataset_id, "type": "table"},
+ queries=[
+ {
+ "columns": form_data.get("columns", []),
+ "metrics": form_data.get("metrics", []),
+ "orderby": form_data.get("orderby", []),
+ "row_limit": form_data.get("row_limit", 100),
+ "filters": form_data.get("adhoc_filters", []),
+ "time_range": form_data.get("time_range", "No filter"),
+ }
+ ],
+ form_data=form_data,
+ )
+
+ # Execute query
+ command = ChartDataCommand(query_context_obj)
+ result = command.run()
+
+ if not result or not result.get("queries"):
+ return ChartError(
+ error="No data returned from query", error_type="EmptyResult"
+ )
+
+ query_result = result["queries"][0]
+ data = query_result.get("data", [])
+
+ # Generate preview based on format
+ if preview_format == "ascii":
+ return _generate_ascii_preview_from_data(data, form_data)
+ elif preview_format == "table":
+ return _generate_table_preview_from_data(data, form_data)
+ elif preview_format == "vega_lite":
+ return _generate_vega_lite_preview_from_data(data, form_data)
+ else:
+ return ChartError(
+ error=f"Unsupported preview format: {preview_format}",
+ error_type="UnsupportedFormat",
+ )
+
+ except Exception as e:
+ logger.error("Preview generation from form data failed: %s", e)
+ return ChartError(
+ error=f"Failed to generate preview: {str(e)}", error_type="PreviewError"
+ )
+
+
+def _generate_ascii_preview_from_data(
+ data: List[Dict[str, Any]], form_data: Dict[str, Any]
+) -> ASCIIPreview:
+ """Generate ASCII preview from raw data."""
+ viz_type = form_data.get("viz_type", "table")
+
+ # Handle different chart types
+ if viz_type in ["bar", "dist_bar", "column"]:
+ content = _generate_safe_ascii_bar_chart(data)
+ elif viz_type in ["line", "area"]:
+ content = _generate_safe_ascii_line_chart(data)
+ elif viz_type == "pie":
+ content = _generate_safe_ascii_pie_chart(data)
+ else:
+ content = _generate_safe_ascii_table(data)
+
+ return ASCIIPreview(
+ ascii_content=content, width=80, height=20, supports_color=False
+ )
+
+
+def _calculate_column_widths(
+ display_columns: List[str], data: List[Dict[str, Any]]
+) -> Dict[str, int]:
+ """Calculate optimal width for each column."""
+ column_widths = {}
+ for col in display_columns:
+ # Start with column name length
+ max_width = len(str(col))
+
+ # Check data values to determine width
+ for row in data[:20]: # Sample first 20 rows
+ val = row.get(col, "")
+ if isinstance(val, float):
+ val_str = f"{val:.2f}"
+ elif isinstance(val, int):
+ val_str = str(val)
+ else:
+ val_str = str(val)
+ max_width = max(max_width, len(val_str))
+
+ # Set reasonable bounds
+ column_widths[col] = min(max(max_width, 8), 25)
+ return column_widths
+
+
+def _format_value(val: Any, width: int) -> str:
+ """Format a value based on its type."""
+ if isinstance(val, float):
+ if abs(val) >= 1000000:
+ val_str = f"{val:.2e}" # Scientific notation for large numbers
+ elif abs(val) >= 1000:
+ val_str = f"{val:,.2f}" # Thousands separator
+ else:
+ val_str = f"{val:.2f}"
+ elif isinstance(val, int):
+ if abs(val) >= 1000:
+ val_str = f"{val:,}" # Thousands separator
+ else:
+ val_str = str(val)
+ elif val is None:
+ val_str = "NULL"
+ else:
+ val_str = str(val)
+
+ # Truncate if too long
+ if len(val_str) > width:
+ val_str = val_str[: width - 2] + ".."
+ return val_str
+
+
+def _generate_table_preview_from_data(
+ data: List[Dict[str, Any]], form_data: Dict[str, Any]
+) -> TablePreview:
+ """Generate table preview from raw data with improved formatting."""
+ if not data:
+ return TablePreview(
+ table_data="No data available", row_count=0, supports_sorting=False
+ )
+
+ # Get columns
+ columns = list(data[0].keys()) if data else []
+
+ # Determine optimal column widths and how many columns to show
+ max_columns = 8 # Show more columns than before
+ display_columns = columns[:max_columns]
+
+ # Calculate optimal width for each column
+ column_widths = _calculate_column_widths(display_columns, data)
+
+ # Format table with proper alignment
+ lines = ["Table Preview", "=" * 80]
+
+ # Header with dynamic width
+ header_parts = []
+ separator_parts = []
+ for col in display_columns:
+ width = column_widths[col]
+ col_name = str(col)
+ if len(col_name) > width:
+ col_name = col_name[: width - 2] + ".."
+ header_parts.append(f"{col_name:<{width}}")
+ separator_parts.append("-" * width)
+
+ lines.append(" | ".join(header_parts))
+ lines.append("-+-".join(separator_parts))
+
+ # Data rows with proper formatting
+ rows_shown = min(len(data), 15) # Show more rows
+ for row in data[:rows_shown]:
+ row_parts = []
+ for col in display_columns:
+ width = column_widths[col]
+ val = row.get(col, "")
+ val_str = _format_value(val, width)
+ row_parts.append(f"{val_str:<{width}}")
+ lines.append(" | ".join(row_parts))
+
+ # Summary information
+ if len(data) > rows_shown:
+ lines.append(f"... and {len(data) - rows_shown} more rows")
+
+ if len(columns) > max_columns:
+ lines.append(f"... and {len(columns) - max_columns} more columns")
+
+ lines.append("")
+ lines.append(f"Total: {len(data)} rows ร {len(columns)} columns")
+
+ return TablePreview(
+ table_data="\n".join(lines), row_count=len(data), supports_sorting=True
+ )
+
+
+def _generate_safe_ascii_bar_chart(data: List[Dict[str, Any]]) -> str:
+ """Generate ASCII bar chart with proper error handling."""
+ if not data:
+ return "No data available for bar chart"
+
+ lines = ["ASCII Bar Chart", "=" * 50]
+
+ # Extract values safely
+ values = []
+ labels = []
+
+ for row in data[:10]:
+ label = None
+ value = None
+
+ for _, val in row.items():
+ if isinstance(val, (int, float)) and not _is_nan(val) and value is None:
+ value = val
+ elif isinstance(val, str) and label is None:
+ label = val
+
+ if value is not None:
+ values.append(value)
+ labels.append(label or f"Item {len(values)}")
+
+ if not values:
+ return "No numeric data found for bar chart"
+
+ # Generate bars
+ max_val = max(values)
+ if max_val == 0:
+ return "All values are zero"
+
+ for label, value in zip(labels, values, strict=False):
+ bar_length = int((value / max_val) * 30)
+ bar = "โ" * bar_length
+ lines.append(f"{label[:10]:>10} |{bar:<30} {value:.2f}")
+
+ return "\n".join(lines)
+
+
+def _generate_safe_ascii_line_chart(data: List[Dict[str, Any]]) -> str:
+ """Generate ASCII line chart with proper NaN handling."""
+ if not data:
+ return "No data available for line chart"
+
+ lines = ["ASCII Line Chart", "=" * 50]
+ values = _extract_numeric_values_safe(data)
+
+ if not values:
+ return "No valid numeric data found for line chart"
+
+ range_str = _format_range_display(values)
+ lines.append(range_str)
+
+ sparkline = _generate_sparkline_safe(values)
+ lines.append(sparkline)
+
+ return "\n".join(lines)
+
+
+def _extract_numeric_values_safe(data: List[Dict[str, Any]]) -> List[float]:
+ """Extract numeric values safely from data."""
+ values = []
+ for row in data[:20]:
+ for _, val in row.items():
+ if isinstance(val, (int, float)) and not _is_nan(val):
+ values.append(val)
+ break
+ return values
+
+
+def _format_range_display(values: List[float]) -> str:
+ """Format range display safely."""
+ min_val = min(values)
+ max_val = max(values)
+
+ if _is_nan(min_val) or _is_nan(max_val):
+ return "Range: Unable to calculate"
+ else:
+ return f"Range: {min_val:.2f} to {max_val:.2f}"
+
+
+def _generate_sparkline_safe(values: List[float]) -> str:
+ """Generate sparkline from values."""
+ if not values:
+ return ""
+
+ min_val = min(values)
+
+ if (max_val := max(values)) != min_val:
+ sparkline = ""
+ for val in values:
+ normalized = (val - min_val) / (max_val - min_val)
+ if normalized < 0.2:
+ sparkline += "โ"
+ elif normalized < 0.4:
+ sparkline += "โ"
+ elif normalized < 0.6:
+ sparkline += "โ"
+ elif normalized < 0.8:
+ sparkline += "โ"
+ else:
+ sparkline += "โ"
+ return sparkline
+ else:
+ return "โ" * len(values) # Flat line if all values are same
+
+
+def _generate_safe_ascii_pie_chart(data: List[Dict[str, Any]]) -> str:
+ """Generate ASCII pie chart representation."""
+ if not data:
+ return "No data available for pie chart"
+
+ lines = ["ASCII Pie Chart", "=" * 50]
+
+ # Extract values and labels
+ values = []
+ labels = []
+
+ for row in data[:8]: # Limit slices
+ label = None
+ value = None
+
+ for _, val in row.items():
+ if isinstance(val, (int, float)) and not _is_nan(val) and value is None:
+ value = val
+ elif isinstance(val, str) and label is None:
+ label = val
+
+ if value is not None and value > 0:
+ values.append(value)
+ labels.append(label or f"Slice {len(values)}")
+
+ if not values:
+ return "No valid data for pie chart"
+
+ # Calculate percentages
+ total = sum(values)
+ if total == 0:
+ return "Total is zero"
+
+ for label, value in zip(labels, values, strict=False):
+ percentage = (value / total) * 100
+ bar_length = int(percentage / 3) # Scale to fit
+ bar = "โ" * bar_length
+ lines.append(f"{label[:15]:>15}: {bar} {percentage:.1f}%")
+
+ return "\n".join(lines)
+
+
+def _generate_safe_ascii_table(data: List[Dict[str, Any]]) -> str:
+ """Generate ASCII table with safe formatting."""
+ if not data:
+ return "No data available"
+
+ lines = ["Data Table", "=" * 50]
+
+ # Get columns
+ columns = list(data[0].keys()) if data else []
+
+ # Format header
+ header = " | ".join(str(col)[:10] for col in columns[:5])
+ lines.append(header)
+ lines.append("-" * len(header))
+
+ # Format rows
+ for row in data[:10]:
+ row_str = " | ".join(str(row.get(col, ""))[:10] for col in columns[:5])
+ lines.append(row_str)
+
+ if len(data) > 10:
+ lines.append(f"... {len(data) - 10} more rows")
+
+ return "\n".join(lines)
+
+
+def _is_nan(value: Any) -> bool:
+ """Check if a value is NaN."""
+ try:
+ import math
+
+ return math.isnan(float(value))
+ except (ValueError, TypeError):
+ return False
+
+
+def _generate_vega_lite_preview_from_data( # noqa: C901
+ data: List[Dict[str, Any]], form_data: Dict[str, Any]
+) -> VegaLitePreview:
+ """Generate Vega-Lite preview from raw data and form_data."""
+ viz_type = form_data.get("viz_type", "table")
+
+ # Map Superset viz types to Vega-Lite marks
+ viz_to_mark = {
+ "echarts_timeseries_line": "line",
+ "echarts_timeseries_bar": "bar",
+ "echarts_area": "area",
+ "echarts_timeseries_scatter": "point",
+ "bar": "bar",
+ "line": "line",
+ "area": "area",
+ "scatter": "point",
+ "pie": "arc",
+ "table": "text",
+ }
+
+ mark = viz_to_mark.get(viz_type, "bar")
+
+ # Basic Vega-Lite spec
+ spec = {
+ "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
+ "data": {"values": data},
+ "mark": mark,
+ }
+
+ # Get x_axis and metrics from form_data
+ x_axis = form_data.get("x_axis")
+ metrics = form_data.get("metrics", [])
+ groupby = form_data.get("groupby", [])
+
+ # Build encoding based on available fields
+ encoding = {}
+
+ # Handle X-axis
+ if x_axis and x_axis in (data[0] if data else {}):
+ # Detect field type from data
+ field_type = "nominal" # default
+ if data and len(data) > 0:
+ sample_val = data[0].get(x_axis)
+ if isinstance(sample_val, str):
+ # Check if it's a date/time
+ if any(char in str(sample_val) for char in ["-", "/", ":"]):
+ field_type = "temporal"
+ else:
+ field_type = "nominal"
+ elif isinstance(sample_val, (int, float)):
+ field_type = "quantitative"
+
+ encoding["x"] = {
+ "field": x_axis,
+ "type": field_type,
+ "title": x_axis,
+ }
+
+ # Handle Y-axis (metrics)
+ if metrics and data:
+ # Find the first metric column in the data
+ metric_col = None
+ for col in data[0].keys():
+ # Check if this is a metric column (usually has aggregation in name)
+ if any(
+ agg in str(col).upper()
+ for agg in ["SUM", "AVG", "COUNT", "MIN", "MAX", "TOTAL"]
+ ):
+ metric_col = col
+ break
+ # Or check if it's numeric
+ elif isinstance(data[0].get(col), (int, float)):
+ metric_col = col
+ break
+
+ if metric_col:
+ encoding["y"] = {
+ "field": metric_col,
+ "type": "quantitative",
+ "title": metric_col,
+ }
+
+ # Handle color encoding for groupby
+ if groupby and len(groupby) > 0 and groupby[0] in (data[0] if data else {}):
+ encoding["color"] = {
+ "field": groupby[0],
+ "type": "nominal",
+ "title": groupby[0],
+ }
+
+ # Special handling for pie charts
+ if mark == "arc" and data:
+ # For pie charts, we need theta encoding
+ if "y" in encoding:
+ encoding["theta"] = encoding.pop("y")
+ encoding["theta"]["stack"] = True
+ if "x" in encoding:
+ # Use x as color for pie
+ encoding["color"] = {
+ "field": encoding["x"]["field"],
+ "type": "nominal",
+ }
+ del encoding["x"]
+
+ # Add encoding to spec
+ if encoding:
+ spec["encoding"] = encoding
+
+ # Add responsive sizing - Vega-Lite supports "container" as a special width value
+ spec["width"] = "container"
+ spec["height"] = 400 # type: ignore
+
+ # Add interactivity
+ if mark in ["line", "point", "bar", "area"]:
+ spec["selection"] = {
+ "highlight": {
+ "type": "single",
+ "on": "mouseover",
+ "empty": "none",
+ }
+ }
+
+ return VegaLitePreview(
+ specification=spec,
+ data_url=None,
+ supports_streaming=False,
+ )
diff --git a/superset/mcp_service/chart/prompts/__init__.py b/superset/mcp_service/chart/prompts/__init__.py
new file mode 100644
index 00000000000..ce7fa27ec7f
--- /dev/null
+++ b/superset/mcp_service/chart/prompts/__init__.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Chart prompts for Superset MCP service"""
+
+# Import to register prompts when module is imported
+from . import create_chart_guided # noqa: F401
diff --git a/superset/mcp_service/chart/prompts/create_chart_guided.py b/superset/mcp_service/chart/prompts/create_chart_guided.py
new file mode 100644
index 00000000000..7213b856f4c
--- /dev/null
+++ b/superset/mcp_service/chart/prompts/create_chart_guided.py
@@ -0,0 +1,195 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Chart prompts for visualization guidance
+"""
+
+import logging
+
+from superset.mcp_service.app import mcp
+from superset.mcp_service.auth import mcp_auth_hook
+
+logger = logging.getLogger(__name__)
+
+
+@mcp.prompt("create_chart_guided")
+@mcp_auth_hook
+async def create_chart_guided_prompt(
+ chart_type: str = "auto", business_goal: str = "exploration"
+) -> str:
+ """
+ AI-powered chart creation guide following Anthropic's agent design principles.
+
+ This prompt implements:
+ - Transparency: Clear reasoning at each step
+ - Proactive Intelligence: Suggests insights before being asked
+ - Context Awareness: Maintains conversational flow
+ - Business Focus: Translates data into actionable insights
+ - Validation: Verifies choices before proceeding
+ - Natural Interaction: Conversational, not configuration-driven
+
+ Args:
+ chart_type: Preferred chart type (auto, line, bar, pie, table, scatter, area)
+ business_goal: Purpose (exploration, reporting, monitoring, presentation)
+ """
+
+ # Enhanced chart intelligence with business context
+ chart_intelligence = {
+ "line": {
+ "description": "Time series visualization for trend analysis",
+ "best_for": "Tracking performance over time, identifying patterns",
+ "business_value": "Reveals growth trends, seasonality, and patterns",
+ "data_requirements": "Temporal column + continuous metrics",
+ },
+ "bar": {
+ "description": "Category comparison visualization",
+ "best_for": "Ranking, comparisons, and performance by category",
+ "business_value": "Identifies top performers, bottlenecks, and gaps",
+ "data_requirements": "Categorical dimensions + aggregatable metrics",
+ },
+ "scatter": {
+ "description": "Correlation and relationship analysis",
+ "best_for": "Finding relationships, outlier detection, clustering",
+ "business_value": "Uncovers hidden correlations and identifies anomalies",
+ "data_requirements": "Two continuous variables, optional grouping",
+ },
+ "table": {
+ "description": "Detailed data exploration and exact values",
+ "best_for": "Detailed analysis, data validation, precise values",
+ "business_value": "Provides granular insights and detailed reporting",
+ "data_requirements": "Any combination of dimensions and metrics",
+ },
+ "area": {
+ "description": "Volume and composition over time",
+ "best_for": "Showing cumulative effects, stacked comparisons",
+ "business_value": "Visualizes contribution and total volume trends",
+ "data_requirements": "Temporal dimension + stackable metrics",
+ },
+ "auto": {
+ "description": "AI-powered visualization recommendation",
+ "best_for": "When you're not sure what chart type to use",
+ "business_value": "Optimizes chart choice based on data characteristics",
+ "data_requirements": "I'll analyze your data and recommend the best type",
+ },
+ }
+
+ # Business context intelligence
+ goal_intelligence = {
+ "exploration": {
+ "approach": "Interactive discovery and pattern finding",
+ "features": "Filters, drill-downs, multiple perspectives",
+ "outcome": "Uncover hidden insights and generate hypotheses",
+ },
+ "reporting": {
+ "approach": "Clear, professional, and consistent presentation",
+ "features": "Clean design, appropriate aggregation, clear labels",
+ "outcome": "Reliable, repeatable business reporting",
+ },
+ "monitoring": {
+ "approach": "Real-time tracking with clear thresholds",
+ "features": "Alert conditions, trend indicators, key metrics",
+ "outcome": "Proactive issue detection and performance tracking",
+ },
+ "presentation": {
+ "approach": "Compelling visual storytelling",
+ "features": "Engaging colors, clear messaging, audience-appropriate detail",
+ "outcome": "Persuasive data-driven presentations for stakeholders",
+ },
+ }
+
+ selected_chart = chart_intelligence.get(chart_type, chart_intelligence["auto"])
+ selected_goal = goal_intelligence.get(
+ business_goal, goal_intelligence["exploration"]
+ )
+
+ return f"""๐ฏ **AI-Powered Chart Creation Assistant**
+
+I'm your intelligent data visualization partner! Let me help you create charts.
+
+**Your Visualization Goal:**
+๐ **Chart Focus**: {chart_type.title()} - {selected_chart["description"]}
+๐ฏ **Business Purpose**: {business_goal.title()} - {selected_goal["approach"]}
+๐ก **Expected Value**: {selected_chart["business_value"]}
+
+---
+
+## ๐ My Intelligent Approach
+
+### **Phase 1: Data Intelligence** ๐
+I'll automatically analyze your dataset to understand:
+- **Data characteristics** (types, distributions, quality)
+- **Business relationships** (correlations, hierarchies, trends)
+- **Visualization opportunities** (what stories your data can tell)
+- **Performance considerations** (size, complexity, aggregation needs)
+
+*Why this matters: The right chart depends on your data's unique characteristics*
+
+### **Phase 2: Smart Recommendations** ๐ง
+Based on your data analysis, I'll:
+- **Recommend optimal chart types** with confidence scores and reasoning
+- **Suggest meaningful metrics** that align with your business goal
+- **Identify interesting patterns** you might want to highlight
+- **Propose filters** to focus on what matters most
+
+*Why this matters: I'll spot opportunities you might miss and save you time*
+
+### **Phase 3: Intelligent Configuration** โ๏ธ
+I'll configure your chart with:
+- **Business-appropriate aggregations** (daily, weekly, monthly for time series)
+- **Meaningful labels and formatting** (currency, percentages, readable names)
+- **Performance optimizations** (appropriate limits, caching strategies)
+- **Visual best practices** (colors, scales, legends that enhance understanding)
+
+*Why this matters: Proper configuration makes charts both beautiful and actionable*
+
+### **Phase 4: Validation & Refinement** ๐ฏ
+Before finalizing, I'll:
+- **Verify the chart answers your business question**
+- **Check data quality and completeness**
+- **Suggest improvements** based on visualization best practices
+- **Provide preview** so you can see exactly what you're getting
+
+*Why this matters: Great charts require iteration and validation*
+
+---
+
+## ๐ฌ Let's Begin Your Data Story
+
+I'm ready to be your proactive data exploration partner. Here's how we can start:
+
+**Option 1: Quick Start** โก
+Tell me: *"What business question are you trying to answer?"*
+(e.g., "How are our sales trending?" or "Which products perform best?")
+
+**Option 2: Dataset Exploration** ๐
+I can show you available datasets: `list_datasets`
+Or explore a specific one: `get_dataset_info [dataset_id]`
+
+**Option 3: Visual Inspiration** ๐จ
+Browse pre-built chart configurations: `superset://chart/configs` resource
+Perfect for when you want to see examples of great charts!
+
+**Option 4: Autonomous Discovery** ๐ค
+Just point me to a dataset and say *"Find something interesting"*
+I'll explore autonomously and surface the most compelling insights!
+
+---
+
+๐ก **Pro Tip**: Great charts combine business intuition with data analysis!
+
+**What's your data challenge today?** ๐"""
diff --git a/superset/mcp_service/chart/resources/__init__.py b/superset/mcp_service/chart/resources/__init__.py
new file mode 100644
index 00000000000..4b886b25762
--- /dev/null
+++ b/superset/mcp_service/chart/resources/__init__.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Chart resources for Superset MCP service"""
+
+# Import to register resources when module is imported
+from . import chart_configs # noqa: F401
diff --git a/superset/mcp_service/chart/resources/chart_configs.py b/superset/mcp_service/chart/resources/chart_configs.py
new file mode 100644
index 00000000000..49eb109ae80
--- /dev/null
+++ b/superset/mcp_service/chart/resources/chart_configs.py
@@ -0,0 +1,362 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Chart resources for valid ChartConfig examples and templates
+"""
+
+import logging
+
+from superset.mcp_service.app import mcp
+from superset.mcp_service.auth import mcp_auth_hook
+
+logger = logging.getLogger(__name__)
+
+
+@mcp.resource("superset://chart/configs")
+@mcp_auth_hook
+def get_chart_configs_resource() -> str:
+ """
+ Provide valid ChartConfig examples that match the exact schema.
+
+ This resource gives LLMs access to:
+ - Valid ChartConfig examples for XYChartConfig and TableChartConfig
+ - Working configurations that pass schema validation
+ - Examples with proper ColumnRef, FilterConfig, AxisConfig, and LegendConfig
+ - Best practices for each chart type configuration
+ """
+
+ # Valid XYChartConfig examples - these match the exact schema
+ xy_chart_configs = {
+ "line_chart": {
+ "description": "Basic line chart for time series analysis",
+ "config": {
+ "chart_type": "xy",
+ "kind": "line",
+ "x": {"name": "created_on", "label": "Date Created"},
+ "y": [
+ {
+ "name": "count_metric",
+ "aggregate": "COUNT",
+ "label": "Total Count",
+ }
+ ],
+ },
+ "use_cases": [
+ "Time series trends",
+ "Historical analysis",
+ "Growth tracking",
+ ],
+ },
+ "bar_chart": {
+ "description": "Bar chart for category comparison",
+ "config": {
+ "chart_type": "xy",
+ "kind": "bar",
+ "x": {"name": "category", "label": "Category"},
+ "y": [{"name": "sales", "aggregate": "SUM", "label": "Total Sales"}],
+ "x_axis": {"title": "Product Categories", "scale": "linear"},
+ "y_axis": {
+ "title": "Revenue ($)",
+ "format": "$,.0f",
+ "scale": "linear",
+ },
+ },
+ "use_cases": ["Category comparison", "Rankings", "Performance metrics"],
+ },
+ "multi_metric_line": {
+ "description": "Multi-metric line chart with grouping",
+ "config": {
+ "chart_type": "xy",
+ "kind": "line",
+ "x": {"name": "date_column", "label": "Date"},
+ "y": [
+ {"name": "revenue", "aggregate": "SUM", "label": "Revenue"},
+ {
+ "name": "users",
+ "aggregate": "COUNT_DISTINCT",
+ "label": "Unique Users",
+ },
+ ],
+ "group_by": {"name": "region", "label": "Region"},
+ "legend": {"show": True, "position": "right"},
+ "filters": [{"column": "status", "op": "=", "value": "active"}],
+ },
+ "use_cases": [
+ "Multi-dimensional analysis",
+ "Regional comparisons",
+ "KPI tracking",
+ ],
+ },
+ "scatter_plot": {
+ "description": "Scatter plot for correlation analysis",
+ "config": {
+ "chart_type": "xy",
+ "kind": "scatter",
+ "x": {
+ "name": "advertising_spend",
+ "aggregate": "AVG",
+ "label": "Avg Ad Spend",
+ },
+ "y": [
+ {
+ "name": "conversion_rate",
+ "aggregate": "AVG",
+ "label": "Avg Conversion Rate",
+ }
+ ],
+ "group_by": {"name": "campaign_type", "label": "Campaign Type"},
+ "x_axis": {"title": "Average Advertising Spend", "format": "$,.0f"},
+ "y_axis": {"title": "Conversion Rate", "format": ".2%"},
+ },
+ "use_cases": [
+ "Correlation analysis",
+ "Outlier detection",
+ "Performance relationships",
+ ],
+ },
+ "area_chart": {
+ "description": "Area chart for volume visualization",
+ "config": {
+ "chart_type": "xy",
+ "kind": "area",
+ "x": {"name": "month", "label": "Month"},
+ "y": [
+ {"name": "signups", "aggregate": "SUM", "label": "Monthly Signups"}
+ ],
+ "filters": [
+ {"column": "year", "op": ">=", "value": 2023},
+ {"column": "active", "op": "=", "value": True},
+ ],
+ },
+ "use_cases": ["Volume trends", "Cumulative metrics", "Stacked comparisons"],
+ },
+ }
+
+ # Valid TableChartConfig examples - these match the exact schema
+ table_chart_configs = {
+ "basic_table": {
+ "description": "Basic data table with multiple columns",
+ "config": {
+ "chart_type": "table",
+ "columns": [
+ {"name": "name", "label": "Customer Name"},
+ {"name": "email", "label": "Email Address"},
+ {"name": "orders", "aggregate": "COUNT", "label": "Total Orders"},
+ {"name": "revenue", "aggregate": "SUM", "label": "Total Revenue"},
+ ],
+ "sort_by": ["Total Revenue"],
+ },
+ "use_cases": [
+ "Detailed data views",
+ "Customer lists",
+ "Transaction records",
+ ],
+ },
+ "aggregated_table": {
+ "description": "Table with aggregated metrics and filters",
+ "config": {
+ "chart_type": "table",
+ "columns": [
+ {"name": "region", "label": "Sales Region"},
+ {
+ "name": "sales_amount",
+ "aggregate": "SUM",
+ "label": "Total Sales",
+ },
+ {
+ "name": "sales_amount",
+ "aggregate": "AVG",
+ "label": "Average Sale",
+ },
+ {
+ "name": "customer_id",
+ "aggregate": "COUNT_DISTINCT",
+ "label": "Unique Customers",
+ },
+ ],
+ "filters": [
+ {"column": "sale_date", "op": ">=", "value": "2024-01-01"},
+ {"column": "status", "op": "!=", "value": "cancelled"},
+ ],
+ "sort_by": ["Total Sales", "Sales Region"],
+ },
+ "use_cases": ["Summary reports", "Regional analysis", "Performance tables"],
+ },
+ }
+
+ # Schema reference for developers
+ schema_reference = {
+ "ChartConfig": {
+ "description": "Union type - XYChartConfig or TableChartConfig by type",
+ "discriminator": "chart_type",
+ "types": ["xy", "table"],
+ },
+ "XYChartConfig": {
+ "required_fields": ["chart_type", "x", "y"],
+ "optional_fields": [
+ "kind",
+ "group_by",
+ "x_axis",
+ "y_axis",
+ "legend",
+ "filters",
+ ],
+ "chart_type": "xy",
+ "kind_options": ["line", "bar", "area", "scatter"],
+ "validation_rules": [
+ "All column labels must be unique across x, y, and group_by",
+ "Y-axis must have at least one column",
+ "Column names must match pattern: ^[a-zA-Z0-9_][a-zA-Z0-9_\\s\\-\\.]*$",
+ ],
+ },
+ "TableChartConfig": {
+ "required_fields": ["chart_type", "columns"],
+ "optional_fields": ["filters", "sort_by"],
+ "chart_type": "table",
+ "validation_rules": [
+ "Must have at least one column",
+ "All column labels must be unique",
+ "Column names must match pattern: ^[a-zA-Z0-9_][a-zA-Z0-9_\\s\\-\\.]*$",
+ ],
+ },
+ "ColumnRef": {
+ "required_fields": ["name"],
+ "optional_fields": ["label", "dtype", "aggregate"],
+ "aggregate_options": [
+ "SUM",
+ "COUNT",
+ "AVG",
+ "MIN",
+ "MAX",
+ "COUNT_DISTINCT",
+ "STDDEV",
+ "VAR",
+ "MEDIAN",
+ "PERCENTILE",
+ ],
+ "validation_rules": [
+ "Name cannot be empty and must follow pattern",
+ "Labels are HTML-escaped to prevent XSS",
+ "Aggregates are validated against allowed functions",
+ ],
+ },
+ "FilterConfig": {
+ "required_fields": ["column", "op", "value"],
+ "operator_options": ["=", ">", "<", ">=", "<=", "!="],
+ "value_types": ["string", "number", "boolean"],
+ "validation_rules": [
+ "Column names are sanitized to prevent injection",
+ "Values are checked for malicious patterns",
+ "String values are HTML-escaped",
+ ],
+ },
+ "AxisConfig": {
+ "optional_fields": ["title", "scale", "format"],
+ "scale_options": ["linear", "log"],
+ "format_examples": ["$,.2f", ".2%", ",.0f", ".1f"],
+ },
+ "LegendConfig": {
+ "optional_fields": ["show", "position"],
+ "show_default": True,
+ "position_options": ["top", "bottom", "left", "right"],
+ "position_default": "right",
+ },
+ }
+
+ # Best practices for each configuration type
+ best_practices = {
+ "xy_charts": [
+ "Use descriptive labels for axes and metrics",
+ "Choose appropriate aggregation functions for your data",
+ "Limit the number of Y-axis metrics (3-5 maximum)",
+ "Use filters to focus on relevant data",
+ "Configure axis formatting for better readability",
+ "Consider grouping when comparing categories",
+ "Use chart kinds: line for trends, bar for comparisons, scatter plots",
+ ],
+ "table_charts": [
+ "Include essential columns only to avoid clutter",
+ "Use meaningful column labels",
+ "Apply sorting to highlight important data",
+ "Use filters to limit result sets",
+ "Mix dimensions and aggregated metrics appropriately",
+ "Ensure unique labels to avoid conflicts",
+ "Consider performance with large datasets",
+ ],
+ "general": [
+ "Always specify chart_type as the first field",
+ "Use consistent naming conventions for columns",
+ "Validate column names exist in your dataset",
+ "Test configurations with actual data",
+ "Consider caching for frequently accessed charts",
+ "Apply security best practices - avoid user input in column names",
+ ],
+ }
+
+ # Common patterns and examples
+ common_patterns = {
+ "time_series": {
+ "description": "Standard time-based analysis",
+ "x_column_types": ["date", "datetime", "timestamp"],
+ "recommended_aggregations": ["SUM", "COUNT", "AVG"],
+ "best_chart_types": ["line", "area", "bar"],
+ },
+ "categorical_analysis": {
+ "description": "Comparing discrete categories",
+ "x_column_types": ["string", "category", "enum"],
+ "recommended_aggregations": ["SUM", "COUNT", "COUNT_DISTINCT", "AVG"],
+ "best_chart_types": ["bar", "table"],
+ },
+ "correlation_analysis": {
+ "description": "Finding relationships between variables",
+ "requirements": ["Two numerical metrics"],
+ "recommended_aggregations": ["AVG", "SUM", "MEDIAN"],
+ "best_chart_types": ["scatter"],
+ },
+ }
+
+ resource_data = {
+ "xy_chart_configs": xy_chart_configs,
+ "table_chart_configs": table_chart_configs,
+ "schema_reference": schema_reference,
+ "best_practices": best_practices,
+ "common_patterns": common_patterns,
+ "metadata": {
+ "version": "1.0",
+ "schema_version": "ChartConfig v1.0",
+ "last_updated": "2025-08-07",
+ "usage_notes": [
+ "All examples are valid ChartConfig objects that pass validation",
+ "Copy these configurations directly into generate_chart requests",
+ "Modify column names and labels to match your actual dataset",
+ "Test configurations with get_dataset_info to verify columns",
+ "All examples follow security best practices and input validation",
+ ],
+ "validation_info": [
+ "Column names must match: ^[a-zA-Z0-9_][a-zA-Z0-9_\\s\\-\\.]*$",
+ "Labels are automatically HTML-escaped for security",
+ "Filter values are sanitized to prevent injection attacks",
+ "All field lengths are validated against schema limits",
+ "Duplicate labels are automatically detected and rejected",
+ ],
+ },
+ }
+
+ from superset.utils import json
+
+ return json.dumps(resource_data, indent=2)
diff --git a/superset/mcp_service/chart/schemas.py b/superset/mcp_service/chart/schemas.py
index a2afd40a447..b7ccbb363ba 100644
--- a/superset/mcp_service/chart/schemas.py
+++ b/superset/mcp_service/chart/schemas.py
@@ -21,6 +21,8 @@ Pydantic schemas for chart-related responses
from __future__ import annotations
+import html
+import re
from datetime import datetime, timezone
from typing import Annotated, Any, Dict, List, Literal, Protocol
@@ -28,12 +30,19 @@ from pydantic import (
BaseModel,
ConfigDict,
Field,
+ field_validator,
model_validator,
PositiveInt,
)
from superset.daos.base import ColumnOperator, ColumnOperatorEnum
-from superset.mcp_service.common.cache_schemas import MetadataCacheControl
+from superset.mcp_service.common.cache_schemas import (
+ CacheStatus,
+ FormDataCacheControl,
+ MetadataCacheControl,
+ QueryCacheControl,
+)
+from superset.mcp_service.common.error_schemas import ChartGenerationError
from superset.mcp_service.system.schemas import (
PaginationInfo,
TagInfo,
@@ -101,6 +110,25 @@ class ChartInfo(BaseModel):
model_config = ConfigDict(from_attributes=True, ser_json_timedelta="iso8601")
+class GetChartAvailableFiltersRequest(BaseModel):
+ """
+ Request schema for get_chart_available_filters tool.
+
+ Currently has no parameters but provides consistent API for future extensibility.
+ """
+
+ model_config = ConfigDict(
+ extra="forbid",
+ str_strip_whitespace=True,
+ )
+
+
+class ChartAvailableFiltersResponse(BaseModel):
+ column_operators: Dict[str, Any] = Field(
+ ..., description="Available filter operators and metadata for each column"
+ )
+
+
class ChartError(BaseModel):
error: str = Field(..., description="Error message")
error_type: str = Field(..., description="Type of error")
@@ -111,6 +139,63 @@ class ChartError(BaseModel):
model_config = ConfigDict(ser_json_timedelta="iso8601")
+class ChartCapabilities(BaseModel):
+ """Describes what the chart can do for LLM understanding."""
+
+ supports_interaction: bool = Field(description="Chart supports user interaction")
+ supports_real_time: bool = Field(description="Chart supports live data updates")
+ supports_drill_down: bool = Field(
+ description="Chart supports drill-down navigation"
+ )
+ supports_export: bool = Field(description="Chart can be exported to other formats")
+ optimal_formats: List[str] = Field(description="Recommended preview formats")
+ data_types: List[str] = Field(
+ description="Types of data shown (time_series, categorical, etc)"
+ )
+
+
+class ChartSemantics(BaseModel):
+ """Semantic information for LLM reasoning."""
+
+ primary_insight: str = Field(
+ description="Main insight or pattern the chart reveals"
+ )
+ data_story: str = Field(description="Narrative description of what the data shows")
+ recommended_actions: List[str] = Field(
+ description="Suggested next steps based on data"
+ )
+ anomalies: List[str] = Field(description="Notable outliers or unusual patterns")
+ statistical_summary: Dict[str, Any] = Field(
+ description="Key statistics (mean, median, trends)"
+ )
+
+
+class PerformanceMetadata(BaseModel):
+ """Performance information for LLM cost understanding."""
+
+ query_duration_ms: int = Field(description="Query execution time")
+ estimated_cost: str | None = Field(None, description="Resource cost estimate")
+ cache_status: str = Field(description="Cache hit/miss status")
+ optimization_suggestions: List[str] = Field(
+ default_factory=list, description="Performance improvement tips"
+ )
+
+
+class AccessibilityMetadata(BaseModel):
+ """Accessibility information for inclusive visualization."""
+
+ color_blind_safe: bool = Field(description="Uses colorblind-safe palette")
+ alt_text: str = Field(description="Screen reader description")
+ high_contrast_available: bool = Field(description="High contrast version available")
+
+
+class VersionedResponse(BaseModel):
+ """Base class for versioned API responses."""
+
+ schema_version: str = Field("2.0", description="Response schema version")
+ api_version: str = Field("v1", description="MCP API version")
+
+
class GetChartInfoRequest(BaseModel):
"""Request schema for get_chart_info with support for ID or UUID."""
@@ -124,12 +209,13 @@ def serialize_chart_object(chart: ChartLike | None) -> ChartInfo | None:
if not chart:
return None
- # TODO (Phase 3): Generate MCP service screenshot URL
- # For now, use chart's native URL instead of screenshot URL
- # Screenshot functionality will be added in Phase 3 PR
+ # Generate MCP service screenshot URL instead of chart's native URL
+ from superset.mcp_service.utils.url_utils import get_chart_screenshot_url
chart_id = getattr(chart, "id", None)
- chart_url = getattr(chart, "url", None)
+ screenshot_url = None
+ if chart_id:
+ screenshot_url = get_chart_screenshot_url(chart_id)
return ChartInfo(
id=chart_id,
@@ -137,7 +223,7 @@ def serialize_chart_object(chart: ChartLike | None) -> ChartInfo | None:
viz_type=getattr(chart, "viz_type", None),
datasource_name=getattr(chart, "datasource_name", None),
datasource_type=getattr(chart, "datasource_type", None),
- url=chart_url,
+ url=screenshot_url,
description=getattr(chart, "description", None),
cache_timeout=getattr(chart, "cache_timeout", None),
form_data=getattr(chart, "form_data", None),
@@ -214,6 +300,409 @@ class ChartList(BaseModel):
model_config = ConfigDict(ser_json_timedelta="iso8601")
+# --- Simplified schemas for generate_chart tool ---
+
+
+# Common pieces
+class ColumnRef(BaseModel):
+ name: str = Field(
+ ...,
+ description="Column name",
+ min_length=1,
+ max_length=255,
+ pattern=r"^[a-zA-Z0-9_][a-zA-Z0-9_\s\-\.]*$",
+ )
+ label: str | None = Field(
+ None, description="Display label for the column", max_length=500
+ )
+ dtype: str | None = Field(None, description="Data type hint")
+ aggregate: (
+ Literal[
+ "SUM",
+ "COUNT",
+ "AVG",
+ "MIN",
+ "MAX",
+ "COUNT_DISTINCT",
+ "STDDEV",
+ "VAR",
+ "MEDIAN",
+ "PERCENTILE",
+ ]
+ | None
+ ) = Field(
+ None,
+ description="SQL aggregation function. Only these validated functions are "
+ "supported to prevent SQL errors.",
+ )
+
+ @field_validator("name")
+ @classmethod
+ def sanitize_name(cls, v: str) -> str:
+ """Sanitize column name to prevent XSS and SQL injection."""
+ if not v or not v.strip():
+ raise ValueError("Column name cannot be empty")
+
+ # Length check first to prevent ReDoS attacks
+ if len(v) > 255:
+ raise ValueError(
+ f"Column name too long ({len(v)} characters). "
+ f"Maximum allowed length is 255 characters."
+ )
+
+ # Remove HTML tags and decode entities
+ sanitized = html.escape(v.strip())
+
+ # Check for dangerous HTML tags using substring checks (safe)
+ dangerous_tags = ["",
+ "