fix(mcp): surface actionable error when adhoc_filters passed to generate_chart

The validation pipeline caught unknown-field errors from parse_chart_config but used a template_key ("validation_error") that didn't exist in ChartErrorBuilder.TEMPLATES, causing the error to fall through to the default "An error occurred" message with empty details and suggestions. Additionally, Pydantic's discriminated union error strings start with a very long type name listing all variants, which caused the 200-char security truncation to cut off the actual helpful message before it reached the user. Fixes: - Add "validation_error" template to ChartErrorBuilder.TEMPLATES - Extract meaningful error messages from Pydantic's __cause__ chain before truncation (only accesses err["msg"], never err["input"]) - Catch config validation errors specifically in the pipeline with a targeted error_type/error_code instead of the generic catch-all - Skip generic-message replacement for config errors so field-specific "did you mean?" suggestions are preserved Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-29 21:14:22 +00:00 · 2026-04-20 23:07:48 -07:00
3 changed files with 187 additions and 7 deletions
--- a/superset/mcp_service/chart/validation/pipeline.py
+++ b/superset/mcp_service/chart/validation/pipeline.py
@@ -73,9 +73,49 @@ def _get_generic_error_message(error_str: str) -> str | None:
    return None


-def _sanitize_validation_error(error: Exception) -> str:
-    """SECURITY FIX: Sanitize validation errors to prevent disclosure."""
-    error_str = str(error)
+def _extract_config_validation_message(error: Exception) -> str | None:
+    """Extract the meaningful validation message from a config parsing error.
+
+    Pydantic's TypeAdapter for discriminated unions produces very long type names
+    in error strings (all variant names listed), which causes the 200-char
+    truncation to cut off the actual error message. This function extracts just
+    the user-actionable part.
+    """
+    from pydantic import ValidationError as PydanticValidationError
+
+    # Check the __cause__ chain for a Pydantic ValidationError
+    cause = getattr(error, "__cause__", None)
+    if not isinstance(cause, PydanticValidationError):
+        return None
+
+    # Extract just the message parts from Pydantic's structured errors
+    messages = []
+    for err in cause.errors()[:3]:
+        msg = err.get("msg", "")
+        # Pydantic prefixes model_validator errors with "Value error, "
+        if msg.startswith("Value error, "):
+            msg = msg[len("Value error, ") :]
+        if msg:
+            messages.append(msg)
+
+    return " | ".join(messages) if messages else None
+
+
+def _sanitize_validation_error(error: Exception, *, skip_generic: bool = False) -> str:
+    """SECURITY FIX: Sanitize validation errors to prevent disclosure.
+
+    Args:
+        error: The exception to sanitize.
+        skip_generic: When True, skip the generic-message replacement.
+            Use this for config validation errors where the specific message
+            (e.g. "Unknown field 'X' — did you mean 'Y'?") is more useful
+            than a generic "Validation failed due to ..." message.
+    """
+    # For config validation errors, try to extract the meaningful message
+    # before falling back to the raw string (which may be very long due to
+    # Pydantic's discriminated union type names).
+    extracted = _extract_config_validation_message(error)
+    error_str = extracted if extracted else str(error)

    # SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
    if len(error_str) > 200:
@@ -98,9 +138,10 @@ def _sanitize_validation_error(error: Exception) -> str:
    error_str = _redact_sql_select(error_str, error_str_upper)
    error_str = _redact_sql_where(error_str, error_str_upper)

-    # Return generic message for common error types
-    if generic := _get_generic_error_message(error_str):
-        return generic
+    # Return generic message for common error types (unless caller opted out)
+    if not skip_generic:
+        if generic := _get_generic_error_message(error_str):
+            return generic

    return error_str

@@ -174,7 +215,27 @@ class ValidationPipeline:

            # Parse the raw config dict into a typed ChartConfig for
            # downstream validators that need typed access.
-            typed_config = parse_chart_config(request.config)
+            try:
+                typed_config = parse_chart_config(request.config)
+            except (ValueError, TypeError) as config_err:
+                from superset.mcp_service.utils.error_builder import (
+                    ChartErrorBuilder,
+                )
+
+                sanitized_reason = _sanitize_validation_error(
+                    config_err, skip_generic=True
+                )
+                error = ChartErrorBuilder.build_error(
+                    error_type="config_validation_error",
+                    template_key="validation_error",
+                    template_vars={"reason": sanitized_reason},
+                    custom_suggestions=[
+                        "Use the 'filters' field for structured filters "
+                        "(column/op/value) — do NOT use 'adhoc_filters'",
+                    ],
+                    error_code="CONFIG_VALIDATION_ERROR",
+                )
+                return ValidationResult(is_valid=False, request=request, error=error)

            # Fetch dataset context once and reuse across validation layers
            dataset_context = ValidationPipeline._get_dataset_context(
--- a/superset/mcp_service/utils/error_builder.py
+++ b/superset/mcp_service/utils/error_builder.py
@@ -174,6 +174,15 @@ class ChartErrorBuilder:
                "Modify your data selection or aggregation",
            ],
        },
+        # Validation pipeline errors
+        "validation_error": {
+            "message": "Configuration validation failed: {reason}",
+            "details": "{reason}",
+            "suggestions": [
+                "Check field names in your chart configuration",
+                "Read the chart://configs resource for valid fields and examples",
+            ],
+        },
        # Chart generation errors
        "generation_failed": {
            "message": "Chart generation failed: {reason}",
--- a/tests/unit_tests/mcp_service/chart/validation/test_pipeline_unknown_fields.py
+++ b/tests/unit_tests/mcp_service/chart/validation/test_pipeline_unknown_fields.py
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Regression tests for validation pipeline error surfacing.
+
+Ensures that unknown fields (like adhoc_filters) produce actionable error
+messages instead of opaque "An error occurred" responses.
+
+See: sc-103356
+"""
+
+import pytest
+
+from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
+
+
+@pytest.fixture
+def xy_chart_request_with_adhoc_filters():
+    """Request matching the sc-103356 reproduction case."""
+    return {
+        "dataset_id": 2,
+        "chart_name": "Japan's RPG Dominance Over Time",
+        "save_chart": True,
+        "config": {
+            "chart_type": "xy",
+            "kind": "line",
+            "x": {"name": "year"},
+            "y": [{"aggregate": "SUM", "label": "Japan RPG Sales", "name": "jp_sales"}],
+            "adhoc_filters": [
+                {
+                    "clause": "WHERE",
+                    "comparator": "Role-Playing",
+                    "expressionType": "SIMPLE",
+                    "operator": "==",
+                    "subject": "genre",
+                }
+            ],
+        },
+    }
+
+
+def test_adhoc_filters_produces_actionable_error(xy_chart_request_with_adhoc_filters):
+    """adhoc_filters must produce error mentioning 'filters'."""
+    result = ValidationPipeline.validate_request_with_warnings(
+        xy_chart_request_with_adhoc_filters
+    )
+
+    assert not result.is_valid
+    assert result.error is not None
+    # Must NOT be the opaque "An error occurred" message
+    assert result.error.message != "An error occurred"
+    # Must mention the correct field name so the LLM can self-correct
+    assert (
+        "filters" in result.error.message.lower()
+        or "filters" in (result.error.details or "").lower()
+    )
+    # Error code should indicate config validation, not a system error
+    assert result.error.error_code == "CONFIG_VALIDATION_ERROR"
+
+
+def test_unknown_field_error_includes_suggestion(xy_chart_request_with_adhoc_filters):
+    """Error response must include suggestions to help LLM self-correct."""
+    result = ValidationPipeline.validate_request_with_warnings(
+        xy_chart_request_with_adhoc_filters
+    )
+
+    assert result.error is not None
+    assert len(result.error.suggestions) > 0
+    # At least one suggestion should mention the correct approach
+    suggestion_text = " ".join(result.error.suggestions).lower()
+    assert "filters" in suggestion_text
+    # The adhoc_filters guidance should appear as a custom suggestion
+    assert any("adhoc_filters" in s for s in result.error.suggestions)
+
+
+def test_arbitrary_unknown_field_produces_helpful_error():
+    """Any unknown field should produce a helpful error, not opaque message."""
+    request_data = {
+        "dataset_id": 1,
+        "config": {
+            "chart_type": "xy",
+            "x": {"name": "date"},
+            "y": [{"name": "revenue", "aggregate": "SUM"}],
+            "totally_fake_field": "value",
+        },
+    }
+
+    result = ValidationPipeline.validate_request_with_warnings(request_data)
+
+    assert not result.is_valid
+    assert result.error is not None
+    assert result.error.message != "An error occurred"
+    assert "totally_fake_field" in (result.error.details or "") or "Unknown field" in (
+        result.error.message + (result.error.details or "")
+    )