fix(mcp): unwrap ToolResult payload before truncation in ResponseSizeGuardMiddleware

FastMCP converts tool return values into ToolResult objects before middleware sees them. The actual data (e.g. DashboardInfo) is serialized as a JSON string inside content[0].text. The ResponseSizeGuardMiddleware was operating on the ToolResult wrapper instead of the actual payload, causing two problems: 1. Token estimation was double-serializing (the JSON string inside text gets escaped again), producing inflated estimates 2. Truncation phases (truncate charts list, truncate strings, etc.) could not find the right keys because they were looking at the ToolResult structure, not the dashboard/chart/dataset data This caused get_dashboard_info to produce broken truncated responses for dashboards with many charts — the middleware would char-truncate content[0].text mid-JSON instead of intelligently reducing the payload. The fix extracts the payload from content[0].text, parses it back to a dict, runs the 5-phase truncation on the actual data, then re-wraps the result into a ToolResult.
2026-05-21 15:55:10 +00:00 · 2026-04-22 19:42:39 -04:00
parent bc2ffc66e5
commit 5348b92e3a
1 changed files with 78 additions and 2 deletions
--- a/superset/mcp_service/middleware.py
+++ b/superset/mcp_service/middleware.py
@@ -951,6 +951,59 @@ class ResponseSizeGuardMiddleware(Middleware):
            excluded_tools = [excluded_tools]
        self.excluded_tools = set(excluded_tools or [])

+    @staticmethod
+    def _extract_payload_from_tool_result(
+        response: Any,
+    ) -> tuple[dict[str, Any], bool] | None:
+        """Extract the JSON payload dict from a ToolResult's content[0].text.
+
+        FastMCP converts tool return values into ToolResult before middleware
+        sees them.  The actual data (e.g. DashboardInfo dict) is serialized
+        as a JSON string inside ``content[0].text``.  Truncation must operate
+        on that parsed dict — not on the ToolResult wrapper — otherwise
+        phases like "truncate charts list" never find the right keys.
+
+        Returns ``(payload_dict, True)`` when extraction succeeds, or
+        ``None`` when the response is not a ToolResult or cannot be parsed.
+        """
+        from fastmcp.tools.tool import ToolResult
+
+        from superset.utils.json import loads as json_loads
+
+        if not isinstance(response, ToolResult):
+            return None
+
+        if (
+            not response.content
+            or not hasattr(response.content[0], "text")
+            or not response.content[0].text
+        ):
+            return None
+
+        try:
+            payload = json_loads(response.content[0].text)
+        except (ValueError, TypeError):
+            return None
+
+        if not isinstance(payload, dict):
+            return None
+
+        return payload, True
+
+    @staticmethod
+    def _rewrap_as_tool_result(payload: dict[str, Any], original: Any) -> Any:
+        """Re-serialize a truncated payload dict back into a ToolResult."""
+        from fastmcp.tools.tool import ToolResult
+        from mcp.types import TextContent
+
+        from superset.utils.json import dumps as json_dumps
+
+        text = json_dumps(payload)
+        return ToolResult(
+            content=[TextContent(type="text", text=text)],
+            meta=original.meta if isinstance(original, ToolResult) else None,
+        )
+
    def _try_truncate_info_response(
        self,
        tool_name: str,
@@ -960,15 +1013,28 @@ class ResponseSizeGuardMiddleware(Middleware):
        """Attempt to dynamically truncate an info tool response to fit the limit.

        Returns the truncated response if successful, None otherwise.
+
+        When the response is a ToolResult (the normal case — FastMCP wraps
+        every tool return value), the actual data lives inside
+        ``content[0].text`` as a JSON string.  We parse that string, run the
+        truncation phases on the resulting dict, then re-wrap the result.
        """
        from superset.mcp_service.utils.token_utils import (
            estimate_response_tokens,
            truncate_oversized_response,
        )

+        # Unwrap ToolResult so truncation operates on the real payload
+        extracted = self._extract_payload_from_tool_result(response)
+        if extracted is not None:
+            payload, _ = extracted
+            truncation_target = payload
+        else:
+            truncation_target = response
+
        try:
            truncated, was_truncated, notes = truncate_oversized_response(
-                response, self.token_limit
+                truncation_target, self.token_limit
            )
        except (MemoryError, RecursionError) as trunc_error:
            logger.warning(
@@ -1015,6 +1081,10 @@ class ResponseSizeGuardMiddleware(Middleware):
            truncated["_response_truncated"] = True
            truncated["_truncation_notes"] = notes

+        # Re-wrap into ToolResult if we unwrapped one
+        if extracted is not None and isinstance(truncated, dict):
+            return self._rewrap_as_tool_result(truncated, response)
+
        return truncated

    async def on_call_tool(
@@ -1038,8 +1108,14 @@ class ResponseSizeGuardMiddleware(Middleware):
            format_size_limit_error,
        )

+        # When the response is a ToolResult, estimate tokens on the actual
+        # payload inside content[0].text rather than on the ToolResult
+        # wrapper (which would double-serialize the JSON string).
+        extracted = self._extract_payload_from_tool_result(response)
+        estimation_target = extracted[0] if extracted is not None else response
+
        try:
-            estimated_tokens = estimate_response_tokens(response)
+            estimated_tokens = estimate_response_tokens(estimation_target)
        except MemoryError as me:
            logger.warning(
                "MemoryError while estimating tokens for %s: %s", tool_name, me