mirror of
https://github.com/apache/superset.git
synced 2026-05-12 19:35:17 +00:00
fix(mcp): strip json_metadata and position_json from get_dashboard_info response (#39101)
(cherry picked from commit 680cef0ee0)
This commit is contained in:
committed by
Michael S. Molina
parent
252044702a
commit
e2d6afa38d
152
superset/mcp_service/utils/response_utils.py
Normal file
152
superset/mcp_service/utils/response_utils.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Utilities for building MCP tool responses with explicit omission metadata.
|
||||
|
||||
When MCP tool responses strip large fields to reduce context window usage,
|
||||
the LLM agent should be told *what* was omitted and *why* — otherwise it
|
||||
cannot distinguish "field is empty" from "field was stripped for size".
|
||||
|
||||
This module provides a reusable builder for omission metadata that any
|
||||
MCP tool serializer can use.
|
||||
|
||||
Industry context (as of 2026):
|
||||
- The MCP spec has no standard for field omission signaling.
|
||||
- Silent omission is considered an anti-pattern (Grafana MCP #557).
|
||||
- Production servers (mcp-git-polite, Blockscout, Axiom) converge on
|
||||
explicit omission indicators with size hints and retrieval guidance.
|
||||
- Anthropic's "Writing Tools for Agents" blog recommends surfacing
|
||||
what was stripped so agents can decide whether to fetch full data.
|
||||
|
||||
Usage example::
|
||||
|
||||
from superset.mcp_service.utils.response_utils import OmittedFieldsBuilder
|
||||
|
||||
omitted = (
|
||||
OmittedFieldsBuilder()
|
||||
.add_raw_field(
|
||||
"position_json",
|
||||
raw_value=dashboard.position_json,
|
||||
reason="Internal layout tree — not useful for analysis.",
|
||||
)
|
||||
.add_extracted_field(
|
||||
"json_metadata",
|
||||
raw_value=dashboard.json_metadata,
|
||||
reason="native_filters and cross_filters_enabled extracted above.",
|
||||
)
|
||||
.build()
|
||||
)
|
||||
# Returns: {"position_json": "Omitted (~42 KB) — ...", ...}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def _byte_size_label(value: str | None) -> str:
|
||||
"""Return a human-readable size label for a string value."""
|
||||
if not value or not isinstance(value, str):
|
||||
return "empty"
|
||||
size_bytes = len(value.encode("utf-8", errors="replace"))
|
||||
if size_bytes < 1024:
|
||||
return f"{size_bytes} B"
|
||||
return f"{size_bytes / 1024:.0f} KB"
|
||||
|
||||
|
||||
class OmittedFieldsBuilder:
|
||||
"""Builder for constructing omission metadata dicts.
|
||||
|
||||
Produces a ``Dict[str, str]`` mapping field names to human-readable
|
||||
descriptions of what was omitted, including approximate sizes.
|
||||
|
||||
Two field types are supported:
|
||||
|
||||
- **Raw fields** (``add_raw_field``): The field was stripped entirely
|
||||
with no replacement. The agent has no way to access this data
|
||||
unless a companion tool exists.
|
||||
|
||||
- **Extracted fields** (``add_extracted_field``): The raw blob was
|
||||
stripped, but useful subsets were extracted into structured fields
|
||||
on the same response object (e.g. ``native_filters`` extracted
|
||||
from ``json_metadata``).
|
||||
|
||||
All methods return ``self`` for fluent chaining.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._fields: Dict[str, str] = {}
|
||||
|
||||
def add_raw_field(
|
||||
self,
|
||||
field_name: str,
|
||||
raw_value: str | None,
|
||||
reason: str,
|
||||
) -> "OmittedFieldsBuilder":
|
||||
"""Record a field that was stripped with no replacement.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
field_name:
|
||||
The original field name (e.g. ``"position_json"``).
|
||||
raw_value:
|
||||
The raw value that was omitted (used only to compute size).
|
||||
Pass ``None`` if the field was empty/unset.
|
||||
reason:
|
||||
Why the field was omitted, written for an LLM audience.
|
||||
"""
|
||||
size = _byte_size_label(raw_value)
|
||||
has_data = isinstance(raw_value, str) and len(raw_value) > 0
|
||||
if has_data:
|
||||
self._fields[field_name] = f"Omitted (~{size}) — {reason}"
|
||||
else:
|
||||
self._fields[field_name] = f"Omitted ({size}) — {reason}"
|
||||
return self
|
||||
|
||||
def add_extracted_field(
|
||||
self,
|
||||
field_name: str,
|
||||
raw_value: str | None,
|
||||
reason: str,
|
||||
) -> "OmittedFieldsBuilder":
|
||||
"""Record a field whose useful parts were extracted into other fields.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
field_name:
|
||||
The original raw field name (e.g. ``"json_metadata"``).
|
||||
raw_value:
|
||||
The raw value that was omitted (used only to compute size).
|
||||
reason:
|
||||
Explanation of what was extracted and where, for LLM context.
|
||||
"""
|
||||
size = _byte_size_label(raw_value)
|
||||
has_data = isinstance(raw_value, str) and len(raw_value) > 0
|
||||
if has_data:
|
||||
self._fields[field_name] = (
|
||||
f"Omitted (~{size}), useful parts extracted — {reason}"
|
||||
)
|
||||
else:
|
||||
self._fields[field_name] = (
|
||||
f"Omitted ({size}), useful parts extracted — {reason}"
|
||||
)
|
||||
return self
|
||||
|
||||
def build(self) -> Dict[str, str]:
|
||||
"""Return the omission metadata dict."""
|
||||
return dict(self._fields)
|
||||
Reference in New Issue
Block a user