fix(mcp): unwrap ToolResult payload before truncation in ResponseSizeGuardMiddleware

FastMCP converts tool return values into ToolResult objects before
middleware sees them. The actual data (e.g. DashboardInfo) is serialized
as a JSON string inside content[0].text. The ResponseSizeGuardMiddleware
was operating on the ToolResult wrapper instead of the actual payload,
causing two problems:

1. Token estimation was double-serializing (the JSON string inside text
   gets escaped again), producing inflated estimates
2. Truncation phases (truncate charts list, truncate strings, etc.)
   could not find the right keys because they were looking at the
   ToolResult structure, not the dashboard/chart/dataset data

This caused get_dashboard_info to produce broken truncated responses
for dashboards with many charts — the middleware would char-truncate
content[0].text mid-JSON instead of intelligently reducing the payload.

The fix extracts the payload from content[0].text, parses it back to a
dict, runs the 5-phase truncation on the actual data, then re-wraps the
result into a ToolResult.
This commit is contained in:
Amin Ghadersohi
2026-04-22 19:42:39 -04:00
committed by Elizabeth
parent bc2ffc66e5
commit 5348b92e3a

View File

@@ -951,6 +951,59 @@ class ResponseSizeGuardMiddleware(Middleware):
excluded_tools = [excluded_tools]
self.excluded_tools = set(excluded_tools or [])
@staticmethod
def _extract_payload_from_tool_result(
response: Any,
) -> tuple[dict[str, Any], bool] | None:
"""Extract the JSON payload dict from a ToolResult's content[0].text.
FastMCP converts tool return values into ToolResult before middleware
sees them. The actual data (e.g. DashboardInfo dict) is serialized
as a JSON string inside ``content[0].text``. Truncation must operate
on that parsed dict — not on the ToolResult wrapper — otherwise
phases like "truncate charts list" never find the right keys.
Returns ``(payload_dict, True)`` when extraction succeeds, or
``None`` when the response is not a ToolResult or cannot be parsed.
"""
from fastmcp.tools.tool import ToolResult
from superset.utils.json import loads as json_loads
if not isinstance(response, ToolResult):
return None
if (
not response.content
or not hasattr(response.content[0], "text")
or not response.content[0].text
):
return None
try:
payload = json_loads(response.content[0].text)
except (ValueError, TypeError):
return None
if not isinstance(payload, dict):
return None
return payload, True
@staticmethod
def _rewrap_as_tool_result(payload: dict[str, Any], original: Any) -> Any:
"""Re-serialize a truncated payload dict back into a ToolResult."""
from fastmcp.tools.tool import ToolResult
from mcp.types import TextContent
from superset.utils.json import dumps as json_dumps
text = json_dumps(payload)
return ToolResult(
content=[TextContent(type="text", text=text)],
meta=original.meta if isinstance(original, ToolResult) else None,
)
def _try_truncate_info_response(
self,
tool_name: str,
@@ -960,15 +1013,28 @@ class ResponseSizeGuardMiddleware(Middleware):
"""Attempt to dynamically truncate an info tool response to fit the limit.
Returns the truncated response if successful, None otherwise.
When the response is a ToolResult (the normal case — FastMCP wraps
every tool return value), the actual data lives inside
``content[0].text`` as a JSON string. We parse that string, run the
truncation phases on the resulting dict, then re-wrap the result.
"""
from superset.mcp_service.utils.token_utils import (
estimate_response_tokens,
truncate_oversized_response,
)
# Unwrap ToolResult so truncation operates on the real payload
extracted = self._extract_payload_from_tool_result(response)
if extracted is not None:
payload, _ = extracted
truncation_target = payload
else:
truncation_target = response
try:
truncated, was_truncated, notes = truncate_oversized_response(
response, self.token_limit
truncation_target, self.token_limit
)
except (MemoryError, RecursionError) as trunc_error:
logger.warning(
@@ -1015,6 +1081,10 @@ class ResponseSizeGuardMiddleware(Middleware):
truncated["_response_truncated"] = True
truncated["_truncation_notes"] = notes
# Re-wrap into ToolResult if we unwrapped one
if extracted is not None and isinstance(truncated, dict):
return self._rewrap_as_tool_result(truncated, response)
return truncated
async def on_call_tool(
@@ -1038,8 +1108,14 @@ class ResponseSizeGuardMiddleware(Middleware):
format_size_limit_error,
)
# When the response is a ToolResult, estimate tokens on the actual
# payload inside content[0].text rather than on the ToolResult
# wrapper (which would double-serialize the JSON string).
extracted = self._extract_payload_from_tool_result(response)
estimation_target = extracted[0] if extracted is not None else response
try:
estimated_tokens = estimate_response_tokens(response)
estimated_tokens = estimate_response_tokens(estimation_target)
except MemoryError as me:
logger.warning(
"MemoryError while estimating tokens for %s: %s", tool_name, me