Compare commits

...

1 Commits

Author SHA1 Message Date
Joe Li
d666ca9187 fix(mcp): surface actionable error when adhoc_filters passed to generate_chart
The validation pipeline caught unknown-field errors from parse_chart_config
but used a template_key ("validation_error") that didn't exist in
ChartErrorBuilder.TEMPLATES, causing the error to fall through to the
default "An error occurred" message with empty details and suggestions.

Additionally, Pydantic's discriminated union error strings start with a
very long type name listing all variants, which caused the 200-char
security truncation to cut off the actual helpful message before it
reached the user.

Fixes:
- Add "validation_error" template to ChartErrorBuilder.TEMPLATES
- Extract meaningful error messages from Pydantic's __cause__ chain
  before truncation (only accesses err["msg"], never err["input"])
- Catch config validation errors specifically in the pipeline with a
  targeted error_type/error_code instead of the generic catch-all
- Skip generic-message replacement for config errors so field-specific
  "did you mean?" suggestions are preserved

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 23:07:48 -07:00
3 changed files with 187 additions and 7 deletions

View File

@@ -73,9 +73,49 @@ def _get_generic_error_message(error_str: str) -> str | None:
return None
def _sanitize_validation_error(error: Exception) -> str:
"""SECURITY FIX: Sanitize validation errors to prevent disclosure."""
error_str = str(error)
def _extract_config_validation_message(error: Exception) -> str | None:
"""Extract the meaningful validation message from a config parsing error.
Pydantic's TypeAdapter for discriminated unions produces very long type names
in error strings (all variant names listed), which causes the 200-char
truncation to cut off the actual error message. This function extracts just
the user-actionable part.
"""
from pydantic import ValidationError as PydanticValidationError
# Check the __cause__ chain for a Pydantic ValidationError
cause = getattr(error, "__cause__", None)
if not isinstance(cause, PydanticValidationError):
return None
# Extract just the message parts from Pydantic's structured errors
messages = []
for err in cause.errors()[:3]:
msg = err.get("msg", "")
# Pydantic prefixes model_validator errors with "Value error, "
if msg.startswith("Value error, "):
msg = msg[len("Value error, ") :]
if msg:
messages.append(msg)
return " | ".join(messages) if messages else None
def _sanitize_validation_error(error: Exception, *, skip_generic: bool = False) -> str:
"""SECURITY FIX: Sanitize validation errors to prevent disclosure.
Args:
error: The exception to sanitize.
skip_generic: When True, skip the generic-message replacement.
Use this for config validation errors where the specific message
(e.g. "Unknown field 'X' — did you mean 'Y'?") is more useful
than a generic "Validation failed due to ..." message.
"""
# For config validation errors, try to extract the meaningful message
# before falling back to the raw string (which may be very long due to
# Pydantic's discriminated union type names).
extracted = _extract_config_validation_message(error)
error_str = extracted if extracted else str(error)
# SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
if len(error_str) > 200:
@@ -98,9 +138,10 @@ def _sanitize_validation_error(error: Exception) -> str:
error_str = _redact_sql_select(error_str, error_str_upper)
error_str = _redact_sql_where(error_str, error_str_upper)
# Return generic message for common error types
if generic := _get_generic_error_message(error_str):
return generic
# Return generic message for common error types (unless caller opted out)
if not skip_generic:
if generic := _get_generic_error_message(error_str):
return generic
return error_str
@@ -174,7 +215,27 @@ class ValidationPipeline:
# Parse the raw config dict into a typed ChartConfig for
# downstream validators that need typed access.
typed_config = parse_chart_config(request.config)
try:
typed_config = parse_chart_config(request.config)
except (ValueError, TypeError) as config_err:
from superset.mcp_service.utils.error_builder import (
ChartErrorBuilder,
)
sanitized_reason = _sanitize_validation_error(
config_err, skip_generic=True
)
error = ChartErrorBuilder.build_error(
error_type="config_validation_error",
template_key="validation_error",
template_vars={"reason": sanitized_reason},
custom_suggestions=[
"Use the 'filters' field for structured filters "
"(column/op/value) — do NOT use 'adhoc_filters'",
],
error_code="CONFIG_VALIDATION_ERROR",
)
return ValidationResult(is_valid=False, request=request, error=error)
# Fetch dataset context once and reuse across validation layers
dataset_context = ValidationPipeline._get_dataset_context(

View File

@@ -174,6 +174,15 @@ class ChartErrorBuilder:
"Modify your data selection or aggregation",
],
},
# Validation pipeline errors
"validation_error": {
"message": "Configuration validation failed: {reason}",
"details": "{reason}",
"suggestions": [
"Check field names in your chart configuration",
"Read the chart://configs resource for valid fields and examples",
],
},
# Chart generation errors
"generation_failed": {
"message": "Chart generation failed: {reason}",

View File

@@ -0,0 +1,110 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Regression tests for validation pipeline error surfacing.
Ensures that unknown fields (like adhoc_filters) produce actionable error
messages instead of opaque "An error occurred" responses.
See: sc-103356
"""
import pytest
from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
@pytest.fixture
def xy_chart_request_with_adhoc_filters():
"""Request matching the sc-103356 reproduction case."""
return {
"dataset_id": 2,
"chart_name": "Japan's RPG Dominance Over Time",
"save_chart": True,
"config": {
"chart_type": "xy",
"kind": "line",
"x": {"name": "year"},
"y": [{"aggregate": "SUM", "label": "Japan RPG Sales", "name": "jp_sales"}],
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": "Role-Playing",
"expressionType": "SIMPLE",
"operator": "==",
"subject": "genre",
}
],
},
}
def test_adhoc_filters_produces_actionable_error(xy_chart_request_with_adhoc_filters):
"""adhoc_filters must produce error mentioning 'filters'."""
result = ValidationPipeline.validate_request_with_warnings(
xy_chart_request_with_adhoc_filters
)
assert not result.is_valid
assert result.error is not None
# Must NOT be the opaque "An error occurred" message
assert result.error.message != "An error occurred"
# Must mention the correct field name so the LLM can self-correct
assert (
"filters" in result.error.message.lower()
or "filters" in (result.error.details or "").lower()
)
# Error code should indicate config validation, not a system error
assert result.error.error_code == "CONFIG_VALIDATION_ERROR"
def test_unknown_field_error_includes_suggestion(xy_chart_request_with_adhoc_filters):
"""Error response must include suggestions to help LLM self-correct."""
result = ValidationPipeline.validate_request_with_warnings(
xy_chart_request_with_adhoc_filters
)
assert result.error is not None
assert len(result.error.suggestions) > 0
# At least one suggestion should mention the correct approach
suggestion_text = " ".join(result.error.suggestions).lower()
assert "filters" in suggestion_text
# The adhoc_filters guidance should appear as a custom suggestion
assert any("adhoc_filters" in s for s in result.error.suggestions)
def test_arbitrary_unknown_field_produces_helpful_error():
"""Any unknown field should produce a helpful error, not opaque message."""
request_data = {
"dataset_id": 1,
"config": {
"chart_type": "xy",
"x": {"name": "date"},
"y": [{"name": "revenue", "aggregate": "SUM"}],
"totally_fake_field": "value",
},
}
result = ValidationPipeline.validate_request_with_warnings(request_data)
assert not result.is_valid
assert result.error is not None
assert result.error.message != "An error occurred"
assert "totally_fake_field" in (result.error.details or "") or "Unknown field" in (
result.error.message + (result.error.details or "")
)