mirror of
https://github.com/apache/superset.git
synced 2026-04-19 16:14:52 +00:00
fix(mcp): normalize call_tool proxy arguments to prevent encoding TypeError (#38775)
This commit is contained in:
@@ -25,7 +25,7 @@ For multi-pod deployments, configure MCP_EVENT_STORE_CONFIG with Redis URL.
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
from typing import Annotated, Any
|
||||
|
||||
import uvicorn
|
||||
|
||||
@@ -41,6 +41,7 @@ from superset.mcp_service.middleware import (
|
||||
LoggingMiddleware,
|
||||
)
|
||||
from superset.mcp_service.storage import _create_redis_store
|
||||
from superset.utils import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -213,6 +214,37 @@ def _fix_call_tool_arguments(tool: Any) -> Any:
|
||||
return tool
|
||||
|
||||
|
||||
def _normalize_call_tool_arguments(
|
||||
arguments: dict[str, Any] | None,
|
||||
tool_schema: dict[str, Any] | None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""JSON-serialize dict/list values when the tool schema accepts both
|
||||
string and object variants (anyOf or oneOf with a string type).
|
||||
|
||||
When the BM25/regex ``call_tool`` proxy forwards arguments to the
|
||||
actual tool, dict/list values must be serialized if the tool's schema
|
||||
declares ``anyOf``/``oneOf`` with a string variant
|
||||
(e.g. ``request: str | RequestModel``).
|
||||
|
||||
Without this, the MCP transport calls ``bytes(dict, 'utf-8')``
|
||||
which raises ``TypeError: encoding without a string argument``.
|
||||
"""
|
||||
if not arguments or not isinstance(tool_schema, dict):
|
||||
return arguments
|
||||
|
||||
properties = tool_schema.get("properties", {})
|
||||
result = dict(arguments)
|
||||
for key, value in result.items():
|
||||
if not isinstance(value, (dict, list)) or key not in properties:
|
||||
continue
|
||||
prop_schema = properties[key]
|
||||
variants = prop_schema.get("oneOf") or prop_schema.get("anyOf") or []
|
||||
has_string = any(v.get("type") == "string" for v in variants)
|
||||
if has_string:
|
||||
result[key] = json.dumps(value)
|
||||
return result
|
||||
|
||||
|
||||
def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> None:
|
||||
"""Apply tool search transform to reduce initial context size.
|
||||
|
||||
@@ -221,12 +253,16 @@ def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> N
|
||||
discover other tools on-demand via natural language search.
|
||||
|
||||
Uses subclassing (not monkey-patching) to override ``_make_call_tool``
|
||||
and fix the ``arguments`` schema for MCP bridge compatibility.
|
||||
and fix the ``arguments`` schema for MCP bridge compatibility, and
|
||||
normalize forwarded arguments to prevent encoding errors.
|
||||
|
||||
NOTE: ``_make_call_tool`` is a private API in FastMCP 3.x
|
||||
(fastmcp>=3.1.0,<4.0). If FastMCP changes or removes this method
|
||||
in a future major version, these subclasses will need to be updated.
|
||||
"""
|
||||
from fastmcp.server.context import Context
|
||||
from fastmcp.tools.tool import Tool, ToolResult
|
||||
|
||||
strategy = config.get("strategy", "bm25")
|
||||
kwargs: dict[str, Any] = {
|
||||
"max_results": config.get("max_results", 5),
|
||||
@@ -236,24 +272,61 @@ def _apply_tool_search_transform(mcp_instance: Any, config: dict[str, Any]) -> N
|
||||
"search_result_serializer": _serialize_tools_without_output_schema,
|
||||
}
|
||||
|
||||
def _make_normalizing_call_tool(transform: Any) -> Tool:
|
||||
"""Create a call_tool proxy that normalizes arguments before forwarding.
|
||||
|
||||
This fixes two issues:
|
||||
1. anyOf schema incompatibility with MCP bridges (schema fix).
|
||||
2. ``encoding without a string argument`` TypeError when dict/list
|
||||
values are forwarded for parameters declared as
|
||||
``str | SomeModel`` (argument normalization).
|
||||
"""
|
||||
|
||||
async def call_tool(
|
||||
name: Annotated[str, "The name of the tool to call"],
|
||||
arguments: Annotated[
|
||||
dict[str, Any] | None, "Arguments to pass to the tool"
|
||||
] = None,
|
||||
ctx: Context = None,
|
||||
) -> ToolResult:
|
||||
"""Call a tool by name with the given arguments.
|
||||
|
||||
Use this to execute tools discovered via search_tools.
|
||||
"""
|
||||
if name in {transform._call_tool_name, transform._search_tool_name}:
|
||||
raise ValueError(
|
||||
f"'{name}' is a synthetic search tool and cannot be "
|
||||
f"called via the call_tool proxy"
|
||||
)
|
||||
if arguments:
|
||||
target_tool = await ctx.fastmcp.get_tool(name)
|
||||
if target_tool is not None:
|
||||
arguments = _normalize_call_tool_arguments(
|
||||
arguments, target_tool.parameters
|
||||
)
|
||||
return await ctx.fastmcp.call_tool(name, arguments)
|
||||
|
||||
tool = Tool.from_function(fn=call_tool, name=transform._call_tool_name)
|
||||
return _fix_call_tool_arguments(tool)
|
||||
|
||||
if strategy == "regex":
|
||||
from fastmcp.server.transforms.search import RegexSearchTransform
|
||||
|
||||
class _FixedRegexSearchTransform(RegexSearchTransform):
|
||||
"""Regex search with fixed call_tool arguments schema."""
|
||||
"""Regex search with fixed call_tool schema and arg normalization."""
|
||||
|
||||
def _make_call_tool(self) -> Any:
|
||||
return _fix_call_tool_arguments(super()._make_call_tool())
|
||||
def _make_call_tool(self) -> Tool:
|
||||
return _make_normalizing_call_tool(self)
|
||||
|
||||
transform = _FixedRegexSearchTransform(**kwargs)
|
||||
else:
|
||||
from fastmcp.server.transforms.search import BM25SearchTransform
|
||||
|
||||
class _FixedBM25SearchTransform(BM25SearchTransform):
|
||||
"""BM25 search with fixed call_tool arguments schema."""
|
||||
"""BM25 search with fixed call_tool schema and arg normalization."""
|
||||
|
||||
def _make_call_tool(self) -> Any:
|
||||
return _fix_call_tool_arguments(super()._make_call_tool())
|
||||
def _make_call_tool(self) -> Tool:
|
||||
return _make_normalizing_call_tool(self)
|
||||
|
||||
transform = _FixedBM25SearchTransform(**kwargs)
|
||||
|
||||
|
||||
@@ -26,8 +26,10 @@ from superset.mcp_service.mcp_config import MCP_TOOL_SEARCH_CONFIG
|
||||
from superset.mcp_service.server import (
|
||||
_apply_tool_search_transform,
|
||||
_fix_call_tool_arguments,
|
||||
_normalize_call_tool_arguments,
|
||||
_serialize_tools_without_output_schema,
|
||||
)
|
||||
from superset.utils import json
|
||||
|
||||
|
||||
def test_tool_search_config_defaults():
|
||||
@@ -171,3 +173,130 @@ def test_serialize_tools_handles_no_output_schema():
|
||||
assert len(result) == 1
|
||||
assert result[0]["name"] == "simple_tool"
|
||||
assert "outputSchema" not in result[0]
|
||||
|
||||
|
||||
# -- _normalize_call_tool_arguments tests --
|
||||
|
||||
|
||||
def test_normalize_serializes_dict_with_anyof_string():
|
||||
"""Dict value is JSON-serialized when schema has anyOf with string type."""
|
||||
arguments = {"request": {"dataset_id": 1, "config": {"key": "val"}}}
|
||||
schema = {
|
||||
"properties": {
|
||||
"request": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"$ref": "#/$defs/SomeModel"},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, schema)
|
||||
|
||||
assert isinstance(result["request"], str)
|
||||
assert json.loads(result["request"]) == {
|
||||
"dataset_id": 1,
|
||||
"config": {"key": "val"},
|
||||
}
|
||||
|
||||
|
||||
def test_normalize_serializes_dict_with_oneof_string():
|
||||
"""Dict value is JSON-serialized when schema has oneOf with string type."""
|
||||
arguments = {"request": {"name": "test"}}
|
||||
schema = {
|
||||
"properties": {
|
||||
"request": {
|
||||
"oneOf": [
|
||||
{"type": "string"},
|
||||
{"type": "object"},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, schema)
|
||||
|
||||
assert isinstance(result["request"], str)
|
||||
|
||||
|
||||
def test_normalize_leaves_dict_without_string_variant():
|
||||
"""Dict value is left as-is when schema has no string variant."""
|
||||
arguments = {"config": {"key": "val"}}
|
||||
schema = {
|
||||
"properties": {
|
||||
"config": {
|
||||
"anyOf": [
|
||||
{"type": "object"},
|
||||
{"type": "null"},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, schema)
|
||||
|
||||
assert isinstance(result["config"], dict)
|
||||
assert result["config"] == {"key": "val"}
|
||||
|
||||
|
||||
def test_normalize_leaves_non_dict_unchanged():
|
||||
"""Non-dict/list values pass through unchanged."""
|
||||
arguments = {"name": "test", "count": 42, "flag": True}
|
||||
schema = {
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"count": {"type": "integer"},
|
||||
"flag": {"type": "boolean"},
|
||||
}
|
||||
}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, schema)
|
||||
|
||||
assert result == {"name": "test", "count": 42, "flag": True}
|
||||
|
||||
|
||||
def test_normalize_returns_none_for_none_arguments():
|
||||
"""None arguments returns None."""
|
||||
result = _normalize_call_tool_arguments(None, {"properties": {}})
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_normalize_returns_arguments_for_non_dict_schema():
|
||||
"""Non-dict schema returns arguments unchanged."""
|
||||
arguments = {"request": {"key": "val"}}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, None)
|
||||
|
||||
assert result is arguments
|
||||
|
||||
|
||||
def test_normalize_serializes_list_with_anyof_string():
|
||||
"""List value is JSON-serialized when schema has anyOf with string type."""
|
||||
arguments = {"items": [1, 2, 3]}
|
||||
schema = {
|
||||
"properties": {
|
||||
"items": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "array", "items": {"type": "integer"}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, schema)
|
||||
|
||||
assert isinstance(result["items"], str)
|
||||
assert json.loads(result["items"]) == [1, 2, 3]
|
||||
|
||||
|
||||
def test_normalize_ignores_keys_not_in_schema():
|
||||
"""Dict values for keys not in schema properties are left unchanged."""
|
||||
arguments = {"unknown_key": {"nested": True}}
|
||||
schema = {"properties": {"other_key": {"type": "string"}}}
|
||||
|
||||
result = _normalize_call_tool_arguments(arguments, schema)
|
||||
|
||||
assert isinstance(result["unknown_key"], dict)
|
||||
|
||||
Reference in New Issue
Block a user