fix(mcp): normalize call_tool proxy arguments to prevent encoding TypeError (#38775)

This commit is contained in:
Amin Ghadersohi
2026-03-20 15:42:40 -04:00
committed by GitHub
parent 28d67d59cd
commit 0d5721910e
2 changed files with 210 additions and 8 deletions

View File

@@ -25,7 +25,7 @@ For multi-pod deployments, configure MCP_EVENT_STORE_CONFIG with Redis URL.
import logging
import os
from collections.abc import Sequence
from typing import Any
from typing import Annotated, Any
import uvicorn
@@ -41,6 +41,7 @@ from superset.mcp_service.middleware import (
LoggingMiddleware,
)
from superset.mcp_service.storage import _create_redis_store
from superset.utils import json
logger = logging.getLogger(__name__)
@@ -213,6 +214,37 @@ def _fix_call_tool_arguments(tool: Any) -> Any:
return tool
def _normalize_call_tool_arguments(
arguments: dict[str, Any] | None,
tool_schema: dict[str, Any] | None,
) -> dict[str, Any] | None:
"""JSON-serialize dict/list values when the tool schema accepts both
string and object variants (anyOf or oneOf with a string type).
When the BM25/regex ``call_tool`` proxy forwards arguments to the
actual tool, dict/list values must be serialized if the tool's schema
declares ``anyOf``/``oneOf`` with a string variant
(e.g. ``request: str | RequestModel``).
Without this, the MCP transport calls ``bytes(dict, 'utf-8')``
which raises ``TypeError: encoding without a string argument``.
"""
if not arguments or not isinstance(tool_schema, dict):
return arguments
properties = tool_schema.get("properties", {})
result = dict(arguments)
for key, value in result.items():
if not isinstance(value, (dict, list)) or key not in properties:
continue
prop_schema = properties[key]
variants = prop_schema.get("oneOf") or prop_schema.get("anyOf") or []
has_string = any(v.get("type") == "string" for v in variants)
if has_string:
result[key] = json.dumps(value)
return result
def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> None:
"""Apply tool search transform to reduce initial context size.
@@ -221,12 +253,16 @@ def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> N
discover other tools on-demand via natural language search.
Uses subclassing (not monkey-patching) to override ``_make_call_tool``
and fix the ``arguments`` schema for MCP bridge compatibility.
and fix the ``arguments`` schema for MCP bridge compatibility, and
normalize forwarded arguments to prevent encoding errors.
NOTE: ``_make_call_tool`` is a private API in FastMCP 3.x
(fastmcp>=3.1.0,<4.0). If FastMCP changes or removes this method
in a future major version, these subclasses will need to be updated.
"""
from fastmcp.server.context import Context
from fastmcp.tools.tool import Tool, ToolResult
strategy = config.get("strategy", "bm25")
kwargs: dict[str, Any] = {
"max_results": config.get("max_results", 5),
@@ -236,24 +272,61 @@ def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> N
"search_result_serializer": _serialize_tools_without_output_schema,
}
def _make_normalizing_call_tool(transform: Any) -> Tool:
"""Create a call_tool proxy that normalizes arguments before forwarding.
This fixes two issues:
1. anyOf schema incompatibility with MCP bridges (schema fix).
2. ``encoding without a string argument`` TypeError when dict/list
values are forwarded for parameters declared as
``str | SomeModel`` (argument normalization).
"""
async def call_tool(
name: Annotated[str, "The name of the tool to call"],
arguments: Annotated[
dict[str, Any] | None, "Arguments to pass to the tool"
] = None,
ctx: Context = None,
) -> ToolResult:
"""Call a tool by name with the given arguments.
Use this to execute tools discovered via search_tools.
"""
if name in {transform._call_tool_name, transform._search_tool_name}:
raise ValueError(
f"'{name}' is a synthetic search tool and cannot be "
f"called via the call_tool proxy"
)
if arguments:
target_tool = await ctx.fastmcp.get_tool(name)
if target_tool is not None:
arguments = _normalize_call_tool_arguments(
arguments, target_tool.parameters
)
return await ctx.fastmcp.call_tool(name, arguments)
tool = Tool.from_function(fn=call_tool, name=transform._call_tool_name)
return _fix_call_tool_arguments(tool)
if strategy == "regex":
from fastmcp.server.transforms.search import RegexSearchTransform
class _FixedRegexSearchTransform(RegexSearchTransform):
"""Regex search with fixed call_tool arguments schema."""
"""Regex search with fixed call_tool schema and arg normalization."""
def _make_call_tool(self) -> Any:
return _fix_call_tool_arguments(super()._make_call_tool())
def _make_call_tool(self) -> Tool:
return _make_normalizing_call_tool(self)
transform = _FixedRegexSearchTransform(**kwargs)
else:
from fastmcp.server.transforms.search import BM25SearchTransform
class _FixedBM25SearchTransform(BM25SearchTransform):
"""BM25 search with fixed call_tool arguments schema."""
"""BM25 search with fixed call_tool schema and arg normalization."""
def _make_call_tool(self) -> Any:
return _fix_call_tool_arguments(super()._make_call_tool())
def _make_call_tool(self) -> Tool:
return _make_normalizing_call_tool(self)
transform = _FixedBM25SearchTransform(**kwargs)

View File

@@ -26,8 +26,10 @@ from superset.mcp_service.mcp_config import MCP_TOOL_SEARCH_CONFIG
from superset.mcp_service.server import (
_apply_tool_search_transform,
_fix_call_tool_arguments,
_normalize_call_tool_arguments,
_serialize_tools_without_output_schema,
)
from superset.utils import json
def test_tool_search_config_defaults():
@@ -171,3 +173,130 @@ def test_serialize_tools_handles_no_output_schema():
assert len(result) == 1
assert result[0]["name"] == "simple_tool"
assert "outputSchema" not in result[0]
# -- _normalize_call_tool_arguments tests --
def test_normalize_serializes_dict_with_anyof_string():
"""Dict value is JSON-serialized when schema has anyOf with string type."""
arguments = {"request": {"dataset_id": 1, "config": {"key": "val"}}}
schema = {
"properties": {
"request": {
"anyOf": [
{"type": "string"},
{"$ref": "#/$defs/SomeModel"},
]
}
}
}
result = _normalize_call_tool_arguments(arguments, schema)
assert isinstance(result["request"], str)
assert json.loads(result["request"]) == {
"dataset_id": 1,
"config": {"key": "val"},
}
def test_normalize_serializes_dict_with_oneof_string():
"""Dict value is JSON-serialized when schema has oneOf with string type."""
arguments = {"request": {"name": "test"}}
schema = {
"properties": {
"request": {
"oneOf": [
{"type": "string"},
{"type": "object"},
]
}
}
}
result = _normalize_call_tool_arguments(arguments, schema)
assert isinstance(result["request"], str)
def test_normalize_leaves_dict_without_string_variant():
"""Dict value is left as-is when schema has no string variant."""
arguments = {"config": {"key": "val"}}
schema = {
"properties": {
"config": {
"anyOf": [
{"type": "object"},
{"type": "null"},
]
}
}
}
result = _normalize_call_tool_arguments(arguments, schema)
assert isinstance(result["config"], dict)
assert result["config"] == {"key": "val"}
def test_normalize_leaves_non_dict_unchanged():
"""Non-dict/list values pass through unchanged."""
arguments = {"name": "test", "count": 42, "flag": True}
schema = {
"properties": {
"name": {"type": "string"},
"count": {"type": "integer"},
"flag": {"type": "boolean"},
}
}
result = _normalize_call_tool_arguments(arguments, schema)
assert result == {"name": "test", "count": 42, "flag": True}
def test_normalize_returns_none_for_none_arguments():
"""None arguments returns None."""
result = _normalize_call_tool_arguments(None, {"properties": {}})
assert result is None
def test_normalize_returns_arguments_for_non_dict_schema():
"""Non-dict schema returns arguments unchanged."""
arguments = {"request": {"key": "val"}}
result = _normalize_call_tool_arguments(arguments, None)
assert result is arguments
def test_normalize_serializes_list_with_anyof_string():
"""List value is JSON-serialized when schema has anyOf with string type."""
arguments = {"items": [1, 2, 3]}
schema = {
"properties": {
"items": {
"anyOf": [
{"type": "string"},
{"type": "array", "items": {"type": "integer"}},
]
}
}
}
result = _normalize_call_tool_arguments(arguments, schema)
assert isinstance(result["items"], str)
assert json.loads(result["items"]) == [1, 2, 3]
def test_normalize_ignores_keys_not_in_schema():
"""Dict values for keys not in schema properties are left unchanged."""
arguments = {"unknown_key": {"nested": True}}
schema = {"properties": {"other_key": {"type": "string"}}}
result = _normalize_call_tool_arguments(arguments, schema)
assert isinstance(result["unknown_key"], dict)