diff --git a/superset/mcp_service/jwt_verifier.py b/superset/mcp_service/jwt_verifier.py index abd8019d9e4..23b517f3889 100644 --- a/superset/mcp_service/jwt_verifier.py +++ b/superset/mcp_service/jwt_verifier.py @@ -26,8 +26,10 @@ HTTP responses always return generic errors per RFC 6750 Section 3.1. """ import base64 +import html as html_module import logging import time +from collections.abc import Callable from contextvars import ContextVar from typing import Any, cast @@ -44,8 +46,9 @@ from mcp.server.auth.middleware.bearer_auth import BearerAuthBackend from starlette.authentication import AuthenticationError from starlette.middleware import Middleware from starlette.middleware.authentication import AuthenticationMiddleware -from starlette.requests import HTTPConnection -from starlette.responses import JSONResponse +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import HTTPConnection, Request +from starlette.responses import HTMLResponse, JSONResponse, Response from superset.utils import json @@ -61,21 +64,247 @@ _jwt_failure_reason: ContextVar[str | None] = ContextVar( "_jwt_failure_reason", default=None ) +_HTML_STYLES = """ + *, *::before, *::after { box-sizing: border-box; } + body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + background: #f5f5f5; + color: #1a1a1a; + margin: 0; + padding: 40px 16px; + line-height: 1.6; + } + .card { + max-width: 640px; + margin: 0 auto; + background: #ffffff; + border-radius: 8px; + box-shadow: 0 1px 4px rgba(0,0,0,.12); + padding: 40px 40px 32px; + } + h1 { font-size: 1.4rem; margin: 0 0 8px; } + .badge { + display: inline-block; + background: #e8f4fd; + color: #0070c0; + font-size: .75rem; + font-weight: 600; + padding: 2px 8px; + border-radius: 4px; + margin-bottom: 20px; + letter-spacing: .04em; + text-transform: uppercase; + } + p { margin: 0 0 20px; color: #444; } + h2 { font-size: 1rem; margin: 24px 0 8px; color: #1a1a1a; } + pre { + background: #f0f0f0; + border-radius: 6px; + padding: 16px; + font-size: .85rem; + overflow-x: auto; + margin: 0 0 24px; + } + code { + font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; + } + .note { + font-size: .85rem; + color: #666; + border-left: 3px solid #ddd; + padding-left: 12px; + margin-top: 24px; + } + .logo { + max-height: 48px; max-width: 200px; margin-bottom: 20px; display: block; + }""" -def _json_auth_error_handler( - conn: HTTPConnection, exc: AuthenticationError -) -> JSONResponse: - """JSON 401 error handler for authentication failures. +_DEFAULT_CLIENTS = [ + "Claude Desktop", + "Claude Code (CLI)", + "Cursor", +] - Per RFC 6750 Section 3.1, error responses MUST NOT leak server - configuration or token claim values. Only generic error codes are - returned to clients. Detailed failure reasons are logged server-side - only for debugging. +_DEFAULT_HELLO_PAGE_CONFIG: dict[str, Any] = { + # Page heading and browser tab title + "title": "Superset MCP Server", + # Key name used in the mcpServers config snippet (e.g. "superset", "my-company") + "server_key": "superset", + # Include "transport": "streamable-http" in the config snippet. + # Recommended: Claude Desktop defaults to SSE so the transport must be explicit. + "show_transport": True, + # Supported MCP clients listed on the page + "clients": _DEFAULT_CLIENTS, +} + + +def _build_config_snippet( + auth_enabled: bool, server_key: str, show_transport: bool +) -> str: + # superset.utils.json.dumps() ensures the key is a valid JSON string. + from superset.utils import json as superset_json + + key_json = superset_json.dumps(server_key) + inner_parts = [' "url": ""'] + if show_transport: + inner_parts.append(' "transport": "streamable-http"') + if auth_enabled: + inner_parts.append( + ' "headers": {\n' + ' "Authorization": "Bearer "\n' + " }" + ) + inner = ",\n".join(inner_parts) + return f'{{\n "mcpServers": {{\n {key_json}: {{\n{inner}\n }}\n }}\n}}' + + +def _build_browser_hello_html( + auth_enabled: bool, + page_config: dict[str, Any] | None = None, +) -> str: + cfg = {**_DEFAULT_HELLO_PAGE_CONFIG, **(page_config or {})} + title: str = html_module.escape(str(cfg["title"])) + server_key: str = cfg["server_key"] + show_transport: bool = cfg["show_transport"] + clients: list[str] = [html_module.escape(str(c)) for c in cfg["clients"]] + app_name: str = html_module.escape(str(cfg.get("app_name", "Apache Superset"))) + logo_url: str | None = None + if logo_url_raw := cfg.get("logo_url"): + logo_url_stripped = str(logo_url_raw).strip() + if logo_url_stripped.startswith(("http://", "https://")): + logo_url = html_module.escape(logo_url_stripped) + + # html.escape() ensures server_key and all other content in the snippet + # cannot break out of the
 block (json.dumps does not escape HTML).
+    config_block = html_module.escape(
+        _build_config_snippet(auth_enabled, server_key, show_transport)
+    )
+
+    if auth_enabled:
+        connect_desc = (
+            "Add the following to your MCP client configuration, "
+            "replacing the URL and API key with your actual values:"
+        )
+        note = (
+            "Replace <this-url> with the full URL of this page "
+            "and <your-api-key> with a valid API key or JWT token."
+        )
+    else:
+        connect_desc = (
+            "Add the following to your MCP client configuration, "
+            "replacing the URL with your actual server URL:"
+        )
+        note = "Replace <this-url> with the full URL of this page."
+
+    client_items = "\n".join(f"      
  • {c}
  • " for c in clients) + logo_html = ( + f'\n ' if logo_url else "" + ) + + return f""" + + + + + {title} + + + +
    + {logo_html}
    MCP API Endpoint
    +

    {title}

    +

    + This is the Model Context Protocol (MCP) endpoint for + {app_name}. It is an API designed for AI coding assistants — + not a web page to browse directly. +

    +

    How to connect

    +

    {connect_desc}

    +
    {config_block}
    +

    Supported clients

    +

    This endpoint works with any MCP-compatible client, including:

    +
      +{client_items} +
    • Any client that supports the streamable-http transport
    • +
    +
    + {note} +
    +
    + +""" + + +# Pre-built for _auth_error_handler (auth-required context, default config) +_MCP_BROWSER_HELLO_HTML = _build_browser_hello_html(auth_enabled=True) + + +def _prefers_browser_html(conn: HTTPConnection) -> bool: + """Return True when the request looks like a browser navigation. + + Checks both the HTTP method (GET/HEAD only) and the Accept header + (text/html present, application/json and text/event-stream absent). + Case-insensitive to handle unusual but valid header values. + """ + if conn.scope.get("method") not in ("GET", "HEAD"): + return False + accept = conn.headers.get("accept", "").lower() + return ( + "text/html" in accept + and "application/json" not in accept + and "text/event-stream" not in accept + ) + + +class BrowserHelloMiddleware(BaseHTTPMiddleware): + """Starlette middleware that returns a browser-friendly hello page. + + Intercepts GET/HEAD requests with a browser Accept header before they + reach FastMCP's router (which returns 405 for GET). Works regardless + of whether MCP_AUTH_ENABLED is True or False. + + When auth_enabled=True the page includes Bearer token setup instructions. + When auth_enabled=False the page omits the Authorization header from the + config snippet since no credentials are required. + """ + + def __init__( + self, + app: Any, + auth_enabled: bool = False, + page_config: dict[str, Any] | None = None, + ) -> None: + super().__init__(app) + self._html = _build_browser_hello_html( + auth_enabled=auth_enabled, page_config=page_config + ) + + async def dispatch( + self, request: Request, call_next: Callable[..., Any] + ) -> Response: + if request.method in ("GET", "HEAD") and _prefers_browser_html(request): + return HTMLResponse(content=self._html, status_code=200) + return await call_next(request) + + +def _auth_error_handler(conn: HTTPConnection, exc: AuthenticationError) -> Response: + """Auth error handler for unauthenticated MCP requests. + + Returns a friendly HTML page for browser navigation requests so users + who open the MCP URL in a browser see setup instructions instead of a + raw JSON 401. + + For all other clients (API, SSE, non-GET methods) returns a standard + JSON 401 per RFC 6750 Section 3.1. References: - RFC 6750 Section 3.1: https://datatracker.ietf.org/doc/html/rfc6750#section-3.1 - CVE-2022-29266, CVE-2019-7644: verbose JWT errors led to exploits """ + if _prefers_browser_html(conn): + return HTMLResponse(status_code=200, content=_MCP_BROWSER_HELLO_HTML) + # Log detailed reason server-side only logger.warning("JWT authentication failed: %s", exc) @@ -91,6 +320,28 @@ def _json_auth_error_handler( ) +class MCPJWTVerifier(JWTVerifier): + """JWTVerifier with Superset MCP auth error handling. + + Provides browser-friendly HTML responses for unauthenticated browser + navigation requests (GET/HEAD with Accept: text/html), while maintaining + RFC 6750-compliant JSON 401 responses for API and SSE clients. + + Use this as the base for all Superset JWT verifiers so the browser hello + page is active regardless of which verifier variant is configured. + """ + + def get_middleware(self) -> list[Any]: + return [ + Middleware( + AuthenticationMiddleware, + backend=BearerAuthBackend(self), + on_error=_auth_error_handler, + ), + Middleware(AuthContextMiddleware), + ] + + class DetailedBearerAuthBackend(BearerAuthBackend): """ Bearer auth backend that raises AuthenticationError with specific @@ -124,7 +375,7 @@ class DetailedBearerAuthBackend(BearerAuthBackend): return None -class DetailedJWTVerifier(JWTVerifier): +class DetailedJWTVerifier(MCPJWTVerifier): """ JWT verifier with tiered server-side logging for each validation step. @@ -300,7 +551,7 @@ class DetailedJWTVerifier(JWTVerifier): Middleware( AuthenticationMiddleware, backend=DetailedBearerAuthBackend(self), - on_error=_json_auth_error_handler, + on_error=_auth_error_handler, ), Middleware(AuthContextMiddleware), ] diff --git a/superset/mcp_service/mcp_config.py b/superset/mcp_service/mcp_config.py index d12b44bbc87..997755f1b1c 100644 --- a/superset/mcp_service/mcp_config.py +++ b/superset/mcp_service/mcp_config.py @@ -343,10 +343,10 @@ def create_default_mcp_auth_factory(app: Flask) -> Optional[Any]: auth_provider = DetailedJWTVerifier(**common_kwargs) else: - # Default JWTVerifier: minimal logging, generic error responses. - from fastmcp.server.auth.providers.jwt import JWTVerifier + # MCPJWTVerifier: minimal logging + browser-friendly error page. + from superset.mcp_service.jwt_verifier import MCPJWTVerifier - auth_provider = JWTVerifier(**common_kwargs) + auth_provider = MCPJWTVerifier(**common_kwargs) return auth_provider except Exception: diff --git a/superset/mcp_service/server.py b/superset/mcp_service/server.py index dc4bffbb598..b8cb2cc0133 100644 --- a/superset/mcp_service/server.py +++ b/superset/mcp_service/server.py @@ -32,6 +32,7 @@ from fastmcp.exceptions import ToolError from fastmcp.server.middleware import Middleware from superset.mcp_service.app import create_mcp_app, init_fastmcp_server +from superset.mcp_service.jwt_verifier import BrowserHelloMiddleware from superset.mcp_service.mcp_config import ( get_mcp_factory_config, MCP_STORE_CONFIG, @@ -717,6 +718,53 @@ def build_middleware_list() -> list[Middleware]: ] +def _build_starlette_middleware( + flask_app: Any | None = None, auth_provider: Any | None = None +) -> list[Any]: + from starlette.middleware import Middleware as StarletteMiddleware + + if flask_app is None: + from superset.mcp_service.flask_singleton import get_flask_app + + flask_app = get_flask_app() + # Auth is active only when an instantiated provider was passed in. + # Config-flag presence is not sufficient — MCP_AUTH_FACTORY may return + # None, and use_factory_config auth lives outside Flask config entirely. + auth_enabled = auth_provider is not None + app_name: str = flask_app.config.get("APP_NAME", "Superset") + app_icon: str = flask_app.config.get("APP_ICON", "") + base_page_config: dict[str, Any] = { + "title": f"{app_name} MCP Server", + "server_key": app_name.lower().replace(" ", "-"), + "app_name": app_name, + } + if app_icon: + if app_icon.startswith(("http://", "https://")): + base_page_config["logo_url"] = app_icon + elif app_icon.startswith("/"): + # Relative path — combine with Superset webserver address if configured + superset_addr = flask_app.config.get( + "SUPERSET_WEBSERVER_ADDRESS", "" + ).rstrip("/") + if superset_addr: + base_page_config["logo_url"] = f"{superset_addr}{app_icon}" + mcp_hello_page = flask_app.config.get("MCP_HELLO_PAGE") + if mcp_hello_page is not None and not isinstance(mcp_hello_page, dict): + logger.warning( + "MCP_HELLO_PAGE must be a dict, ignoring value of type %s", + type(mcp_hello_page).__name__, + ) + mcp_hello_page = None + page_config: dict[str, Any] = {**base_page_config, **(mcp_hello_page or {})} + return [ + StarletteMiddleware( + BrowserHelloMiddleware, + auth_enabled=auth_enabled, + page_config=page_config, + ) + ] + + def run_server( host: str = "127.0.0.1", port: int = 5008, @@ -750,6 +798,9 @@ def run_server( logging.info("Creating MCP app from factory configuration...") factory_config = get_mcp_factory_config() mcp_instance = create_mcp_app(**factory_config) + # Capture the actual auth object so the hello page reflects real auth state + auth_provider = factory_config.get("auth") + flask_app = None # Apply tool search transform if configured tool_search_config = MCP_TOOL_SEARCH_CONFIG @@ -794,6 +845,11 @@ def run_server( # Create EventStore for session management (Redis for multi-pod, None for in-memory) event_store = create_event_store(event_store_config) + starlette_middleware = _build_starlette_middleware( + flask_app=flask_app, + auth_provider=auth_provider, + ) + env_key = f"FASTMCP_RUNNING_{port}" if not os.environ.get(env_key): os.environ[env_key] = "1" @@ -807,6 +863,7 @@ def run_server( transport="streamable-http", event_store=event_store, stateless_http=True, + middleware=starlette_middleware, ) uvicorn.run(app, host=host, port=port) else: @@ -817,6 +874,7 @@ def run_server( host=host, port=port, stateless_http=True, + middleware=starlette_middleware, ) except Exception as e: logging.error("FastMCP failed: %s", e) diff --git a/tests/unit_tests/mcp_service/test_jwt_verifier.py b/tests/unit_tests/mcp_service/test_jwt_verifier.py index b6ba58aa7f2..0eb934696bc 100644 --- a/tests/unit_tests/mcp_service/test_jwt_verifier.py +++ b/tests/unit_tests/mcp_service/test_jwt_verifier.py @@ -26,7 +26,7 @@ import pytest from authlib.jose.errors import BadSignatureError, DecodeError, ExpiredTokenError from superset.mcp_service.jwt_verifier import ( - _json_auth_error_handler, + _auth_error_handler, _jwt_failure_reason, DetailedBearerAuthBackend, DetailedJWTVerifier, @@ -400,7 +400,7 @@ def test_get_middleware_returns_custom_components(hs256_verifier): == "DetailedBearerAuthBackend" ) # on_error should be the RFC 6750-compliant generic handler - assert auth_middleware.kwargs["on_error"] is _json_auth_error_handler + assert auth_middleware.kwargs["on_error"] is _auth_error_handler class _FakeHeaders(dict[str, str]): @@ -496,7 +496,7 @@ def test_error_handler_never_leaks_jwt_details(): for reason in sensitive_reasons: exc = AuthenticationError(reason) - response = _json_auth_error_handler(mock_conn, exc) + response = _auth_error_handler(mock_conn, exc) assert response.status_code == 401 diff --git a/tests/unit_tests/mcp_service/test_jwt_verifier_browser_hello.py b/tests/unit_tests/mcp_service/test_jwt_verifier_browser_hello.py new file mode 100644 index 00000000000..baece40b1be --- /dev/null +++ b/tests/unit_tests/mcp_service/test_jwt_verifier_browser_hello.py @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Tests for browser-friendly hello page in _auth_error_handler.""" + +from unittest.mock import MagicMock + +from starlette.authentication import AuthenticationError +from starlette.responses import HTMLResponse, JSONResponse + +from superset.mcp_service.jwt_verifier import _auth_error_handler + + +def _make_conn(accept: str, method: str = "GET") -> MagicMock: + conn = MagicMock() + conn.headers = {"accept": accept} if accept else {} + conn.scope = {"method": method} + return conn + + +def test_browser_accept_returns_html_200() -> None: + conn = _make_conn("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + exc = AuthenticationError("no token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, HTMLResponse) + assert response.status_code == 200 + assert b"MCP" in response.body + assert b"mcpServers" in response.body + + +def test_browser_accept_html_only_returns_200() -> None: + conn = _make_conn("text/html") + exc = AuthenticationError("no token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, HTMLResponse) + assert response.status_code == 200 + + +def test_json_accept_returns_401() -> None: + conn = _make_conn("application/json") + exc = AuthenticationError("bad token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, JSONResponse) + assert response.status_code == 401 + + +def test_event_stream_accept_returns_401() -> None: + conn = _make_conn("text/event-stream") + exc = AuthenticationError("bad token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, JSONResponse) + assert response.status_code == 401 + + +def test_no_accept_header_returns_401() -> None: + conn = MagicMock() + conn.headers = {} + conn.scope = {"method": "GET"} + exc = AuthenticationError("bad token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, JSONResponse) + assert response.status_code == 401 + + +def test_json_401_body_is_rfc6750_compliant() -> None: + conn = _make_conn("application/json") + exc = AuthenticationError("expired") + response = _auth_error_handler(conn, exc) + assert response.status_code == 401 + assert response.headers.get("www-authenticate") == 'Bearer error="invalid_token"' + + +def test_html_accepted_alongside_other_types_but_not_json() -> None: + conn = _make_conn("text/html, */*") + exc = AuthenticationError("no token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, HTMLResponse) + assert response.status_code == 200 + + +def test_accept_both_html_and_json_returns_401() -> None: + # When a client lists both, treat it as an API client (application/json wins) + conn = _make_conn("text/html, application/json") + exc = AuthenticationError("bad token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, JSONResponse) + assert response.status_code == 401 + + +def test_post_with_html_accept_returns_401() -> None: + # Non-GET/HEAD methods always get JSON 401, even with text/html Accept + conn = _make_conn("text/html", method="POST") + exc = AuthenticationError("no token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, JSONResponse) + assert response.status_code == 401 + + +def test_head_with_html_accept_returns_html_200() -> None: + conn = _make_conn("text/html", method="HEAD") + exc = AuthenticationError("no token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, HTMLResponse) + assert response.status_code == 200 + + +def test_accept_header_case_insensitive() -> None: + conn = _make_conn("TEXT/HTML") + exc = AuthenticationError("no token") + response = _auth_error_handler(conn, exc) + assert isinstance(response, HTMLResponse) + assert response.status_code == 200