fix(mcp): sanitize read path output for LLM context (#39738)

This commit is contained in:
Richard Fogaca Nienkotter
2026-04-29 19:06:19 -03:00
committed by GitHub
parent 81a08f0a0e
commit c2b9272f4c
22 changed files with 2781 additions and 321 deletions

View File

@@ -22,7 +22,7 @@ Tool for generating SQL Lab URLs with pre-populated sql and context.
"""
import logging
from urllib.parse import urlencode
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
from fastmcp import Context
from superset_core.mcp.decorators import tool, ToolAnnotations
@@ -32,10 +32,51 @@ from superset.mcp_service.sql_lab.schemas import (
OpenSqlLabRequest,
SqlLabResponse,
)
from superset.mcp_service.utils import sanitize_for_llm_context
from superset.mcp_service.utils.url_utils import get_superset_base_url
logger = logging.getLogger(__name__)
SQL_LAB_QUERY_PARAMS_TO_SANITIZE = frozenset({"sql", "title"})
def _sanitize_sql_lab_url_for_llm_context(url: str) -> str:
"""Wrap user-controlled SQL Lab query values while preserving navigation."""
if not url:
return url
parsed = urlsplit(url)
query_params = parse_qsl(parsed.query, keep_blank_values=True)
if not query_params:
return url
sanitized_params = [
(
name,
sanitize_for_llm_context(value, field_path=(name,))
if name in SQL_LAB_QUERY_PARAMS_TO_SANITIZE
else value,
)
for name, value in query_params
]
return urlunsplit(parsed._replace(query=urlencode(sanitized_params)))
def _sanitize_sql_lab_response_for_llm_context(
response: SqlLabResponse,
) -> SqlLabResponse:
"""Wrap user-controlled SQL Lab response content before LLM exposure."""
payload = response.model_dump(mode="python")
payload["url"] = _sanitize_sql_lab_url_for_llm_context(payload.get("url", ""))
for field_name in ("title", "error"):
payload[field_name] = sanitize_for_llm_context(
payload.get(field_name),
field_path=(field_name,),
)
return SqlLabResponse.model_validate(payload)
@tool(
tags=["explore"],
@@ -61,12 +102,17 @@ def open_sql_lab_with_context(
# Validate database exists and is accessible
database = DatabaseDAO.find_by_id(request.database_connection_id)
if not database:
return SqlLabResponse(
url="",
database_id=request.database_connection_id,
schema_name=request.schema_name,
title=request.title,
error=f"Database with ID {request.database_connection_id} not found",
error_message = (
f"Database with ID {request.database_connection_id} not found"
)
return _sanitize_sql_lab_response_for_llm_context(
SqlLabResponse(
url="",
database_id=request.database_connection_id,
schema_name=request.schema_name,
title=request.title,
error=error_message,
)
)
# Build query parameters for SQL Lab URL
@@ -109,12 +155,14 @@ def open_sql_lab_with_context(
"Generated SQL Lab URL for database %s", request.database_connection_id
)
return SqlLabResponse(
url=url,
database_id=request.database_connection_id,
schema_name=request.schema_name,
title=request.title,
error=None,
return _sanitize_sql_lab_response_for_llm_context(
SqlLabResponse(
url=url,
database_id=request.database_connection_id,
schema_name=request.schema_name,
title=request.title,
error=None,
)
)
except Exception as e:
@@ -128,10 +176,12 @@ def open_sql_lab_with_context(
"Database rollback failed during error handling", exc_info=True
)
logger.error("Error generating SQL Lab URL: %s", e)
return SqlLabResponse(
url="",
database_id=request.database_connection_id,
schema_name=request.schema_name,
title=request.title,
error=f"Failed to generate SQL Lab URL: {str(e)}",
return _sanitize_sql_lab_response_for_llm_context(
SqlLabResponse(
url="",
database_id=request.database_connection_id,
schema_name=request.schema_name,
title=request.title,
error=f"Failed to generate SQL Lab URL: {str(e)}",
)
)