fix(mcp): normalize call_tool proxy arguments to prevent encoding TypeError (#38775)

2026-04-19 16:14:52 +00:00 · 2026-03-20 15:42:40 -04:00
parent 28d67d59cd
commit 0d5721910e
2 changed files with 210 additions and 8 deletions
--- a/superset/mcp_service/server.py
+++ b/superset/mcp_service/server.py
@@ -25,7 +25,7 @@ For multi-pod deployments, configure MCP_EVENT_STORE_CONFIG with Redis URL.
 import logging
 import os
 from collections.abc import Sequence
-from typing import Any
+from typing import Annotated, Any

 import uvicorn

@@ -41,6 +41,7 @@ from superset.mcp_service.middleware import (
    LoggingMiddleware,
 )
 from superset.mcp_service.storage import _create_redis_store
+from superset.utils import json

 logger = logging.getLogger(__name__)

@@ -213,6 +214,37 @@ def _fix_call_tool_arguments(tool: Any) -> Any:
    return tool


+def _normalize_call_tool_arguments(
+    arguments: dict[str, Any] | None,
+    tool_schema: dict[str, Any] | None,
+) -> dict[str, Any] | None:
+    """JSON-serialize dict/list values when the tool schema accepts both
+    string and object variants (anyOf or oneOf with a string type).
+
+    When the BM25/regex ``call_tool`` proxy forwards arguments to the
+    actual tool, dict/list values must be serialized if the tool's schema
+    declares ``anyOf``/``oneOf`` with a string variant
+    (e.g. ``request: str | RequestModel``).
+
+    Without this, the MCP transport calls ``bytes(dict, 'utf-8')``
+    which raises ``TypeError: encoding without a string argument``.
+    """
+    if not arguments or not isinstance(tool_schema, dict):
+        return arguments
+
+    properties = tool_schema.get("properties", {})
+    result = dict(arguments)
+    for key, value in result.items():
+        if not isinstance(value, (dict, list)) or key not in properties:
+            continue
+        prop_schema = properties[key]
+        variants = prop_schema.get("oneOf") or prop_schema.get("anyOf") or []
+        has_string = any(v.get("type") == "string" for v in variants)
+        if has_string:
+            result[key] = json.dumps(value)
+    return result
+
+
 def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> None:
    """Apply tool search transform to reduce initial context size.

@@ -221,12 +253,16 @@ def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> N
    discover other tools on-demand via natural language search.

    Uses subclassing (not monkey-patching) to override ``_make_call_tool``
-    and fix the ``arguments`` schema for MCP bridge compatibility.
+    and fix the ``arguments`` schema for MCP bridge compatibility, and
+    normalize forwarded arguments to prevent encoding errors.

    NOTE: ``_make_call_tool`` is a private API in FastMCP 3.x
    (fastmcp>=3.1.0,<4.0). If FastMCP changes or removes this method
    in a future major version, these subclasses will need to be updated.
    """
+    from fastmcp.server.context import Context
+    from fastmcp.tools.tool import Tool, ToolResult
+
    strategy = config.get("strategy", "bm25")
    kwargs: dict[str, Any] = {
        "max_results": config.get("max_results", 5),
@@ -236,24 +272,61 @@ def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> N
        "search_result_serializer": _serialize_tools_without_output_schema,
    }

+    def _make_normalizing_call_tool(transform: Any) -> Tool:
+        """Create a call_tool proxy that normalizes arguments before forwarding.
+
+        This fixes two issues:
+        1. anyOf schema incompatibility with MCP bridges (schema fix).
+        2. ``encoding without a string argument`` TypeError when dict/list
+           values are forwarded for parameters declared as
+           ``str | SomeModel`` (argument normalization).
+        """
+
+        async def call_tool(
+            name: Annotated[str, "The name of the tool to call"],
+            arguments: Annotated[
+                dict[str, Any] | None, "Arguments to pass to the tool"
+            ] = None,
+            ctx: Context = None,
+        ) -> ToolResult:
+            """Call a tool by name with the given arguments.
+
+            Use this to execute tools discovered via search_tools.
+            """
+            if name in {transform._call_tool_name, transform._search_tool_name}:
+                raise ValueError(
+                    f"'{name}' is a synthetic search tool and cannot be "
+                    f"called via the call_tool proxy"
+                )
+            if arguments:
+                target_tool = await ctx.fastmcp.get_tool(name)
+                if target_tool is not None:
+                    arguments = _normalize_call_tool_arguments(
+                        arguments, target_tool.parameters
+                    )
+            return await ctx.fastmcp.call_tool(name, arguments)
+
+        tool = Tool.from_function(fn=call_tool, name=transform._call_tool_name)
+        return _fix_call_tool_arguments(tool)
+
    if strategy == "regex":
        from fastmcp.server.transforms.search import RegexSearchTransform

        class _FixedRegexSearchTransform(RegexSearchTransform):
-            """Regex search with fixed call_tool arguments schema."""
+            """Regex search with fixed call_tool schema and arg normalization."""

-            def _make_call_tool(self) -> Any:
-                return _fix_call_tool_arguments(super()._make_call_tool())
+            def _make_call_tool(self) -> Tool:
+                return _make_normalizing_call_tool(self)

        transform = _FixedRegexSearchTransform(**kwargs)
    else:
        from fastmcp.server.transforms.search import BM25SearchTransform

        class _FixedBM25SearchTransform(BM25SearchTransform):
-            """BM25 search with fixed call_tool arguments schema."""
+            """BM25 search with fixed call_tool schema and arg normalization."""

-            def _make_call_tool(self) -> Any:
-                return _fix_call_tool_arguments(super()._make_call_tool())
+            def _make_call_tool(self) -> Tool:
+                return _make_normalizing_call_tool(self)

        transform = _FixedBM25SearchTransform(**kwargs)

--- a/tests/unit_tests/mcp_service/test_tool_search_transform.py
+++ b/tests/unit_tests/mcp_service/test_tool_search_transform.py
@@ -26,8 +26,10 @@ from superset.mcp_service.mcp_config import MCP_TOOL_SEARCH_CONFIG
 from superset.mcp_service.server import (
    _apply_tool_search_transform,
    _fix_call_tool_arguments,
+    _normalize_call_tool_arguments,
    _serialize_tools_without_output_schema,
 )
+from superset.utils import json


 def test_tool_search_config_defaults():
@@ -171,3 +173,130 @@ def test_serialize_tools_handles_no_output_schema():
    assert len(result) == 1
    assert result[0]["name"] == "simple_tool"
    assert "outputSchema" not in result[0]
+
+
+# -- _normalize_call_tool_arguments tests --
+
+
+def test_normalize_serializes_dict_with_anyof_string():
+    """Dict value is JSON-serialized when schema has anyOf with string type."""
+    arguments = {"request": {"dataset_id": 1, "config": {"key": "val"}}}
+    schema = {
+        "properties": {
+            "request": {
+                "anyOf": [
+                    {"type": "string"},
+                    {"$ref": "#/$defs/SomeModel"},
+                ]
+            }
+        }
+    }
+
+    result = _normalize_call_tool_arguments(arguments, schema)
+
+    assert isinstance(result["request"], str)
+    assert json.loads(result["request"]) == {
+        "dataset_id": 1,
+        "config": {"key": "val"},
+    }
+
+
+def test_normalize_serializes_dict_with_oneof_string():
+    """Dict value is JSON-serialized when schema has oneOf with string type."""
+    arguments = {"request": {"name": "test"}}
+    schema = {
+        "properties": {
+            "request": {
+                "oneOf": [
+                    {"type": "string"},
+                    {"type": "object"},
+                ]
+            }
+        }
+    }
+
+    result = _normalize_call_tool_arguments(arguments, schema)
+
+    assert isinstance(result["request"], str)
+
+
+def test_normalize_leaves_dict_without_string_variant():
+    """Dict value is left as-is when schema has no string variant."""
+    arguments = {"config": {"key": "val"}}
+    schema = {
+        "properties": {
+            "config": {
+                "anyOf": [
+                    {"type": "object"},
+                    {"type": "null"},
+                ]
+            }
+        }
+    }
+
+    result = _normalize_call_tool_arguments(arguments, schema)
+
+    assert isinstance(result["config"], dict)
+    assert result["config"] == {"key": "val"}
+
+
+def test_normalize_leaves_non_dict_unchanged():
+    """Non-dict/list values pass through unchanged."""
+    arguments = {"name": "test", "count": 42, "flag": True}
+    schema = {
+        "properties": {
+            "name": {"type": "string"},
+            "count": {"type": "integer"},
+            "flag": {"type": "boolean"},
+        }
+    }
+
+    result = _normalize_call_tool_arguments(arguments, schema)
+
+    assert result == {"name": "test", "count": 42, "flag": True}
+
+
+def test_normalize_returns_none_for_none_arguments():
+    """None arguments returns None."""
+    result = _normalize_call_tool_arguments(None, {"properties": {}})
+
+    assert result is None
+
+
+def test_normalize_returns_arguments_for_non_dict_schema():
+    """Non-dict schema returns arguments unchanged."""
+    arguments = {"request": {"key": "val"}}
+
+    result = _normalize_call_tool_arguments(arguments, None)
+
+    assert result is arguments
+
+
+def test_normalize_serializes_list_with_anyof_string():
+    """List value is JSON-serialized when schema has anyOf with string type."""
+    arguments = {"items": [1, 2, 3]}
+    schema = {
+        "properties": {
+            "items": {
+                "anyOf": [
+                    {"type": "string"},
+                    {"type": "array", "items": {"type": "integer"}},
+                ]
+            }
+        }
+    }
+
+    result = _normalize_call_tool_arguments(arguments, schema)
+
+    assert isinstance(result["items"], str)
+    assert json.loads(result["items"]) == [1, 2, 3]
+
+
+def test_normalize_ignores_keys_not_in_schema():
+    """Dict values for keys not in schema properties are left unchanged."""
+    arguments = {"unknown_key": {"nested": True}}
+    schema = {"properties": {"other_key": {"type": "string"}}}
+
+    result = _normalize_call_tool_arguments(arguments, schema)
+
+    assert isinstance(result["unknown_key"], dict)