mirror of
https://github.com/apache/superset.git
synced 2026-04-29 21:14:22 +00:00
Compare commits
1 Commits
fix/postgr
...
fix-mcp-ad
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d666ca9187 |
@@ -73,9 +73,49 @@ def _get_generic_error_message(error_str: str) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_validation_error(error: Exception) -> str:
|
||||
"""SECURITY FIX: Sanitize validation errors to prevent disclosure."""
|
||||
error_str = str(error)
|
||||
def _extract_config_validation_message(error: Exception) -> str | None:
|
||||
"""Extract the meaningful validation message from a config parsing error.
|
||||
|
||||
Pydantic's TypeAdapter for discriminated unions produces very long type names
|
||||
in error strings (all variant names listed), which causes the 200-char
|
||||
truncation to cut off the actual error message. This function extracts just
|
||||
the user-actionable part.
|
||||
"""
|
||||
from pydantic import ValidationError as PydanticValidationError
|
||||
|
||||
# Check the __cause__ chain for a Pydantic ValidationError
|
||||
cause = getattr(error, "__cause__", None)
|
||||
if not isinstance(cause, PydanticValidationError):
|
||||
return None
|
||||
|
||||
# Extract just the message parts from Pydantic's structured errors
|
||||
messages = []
|
||||
for err in cause.errors()[:3]:
|
||||
msg = err.get("msg", "")
|
||||
# Pydantic prefixes model_validator errors with "Value error, "
|
||||
if msg.startswith("Value error, "):
|
||||
msg = msg[len("Value error, ") :]
|
||||
if msg:
|
||||
messages.append(msg)
|
||||
|
||||
return " | ".join(messages) if messages else None
|
||||
|
||||
|
||||
def _sanitize_validation_error(error: Exception, *, skip_generic: bool = False) -> str:
|
||||
"""SECURITY FIX: Sanitize validation errors to prevent disclosure.
|
||||
|
||||
Args:
|
||||
error: The exception to sanitize.
|
||||
skip_generic: When True, skip the generic-message replacement.
|
||||
Use this for config validation errors where the specific message
|
||||
(e.g. "Unknown field 'X' — did you mean 'Y'?") is more useful
|
||||
than a generic "Validation failed due to ..." message.
|
||||
"""
|
||||
# For config validation errors, try to extract the meaningful message
|
||||
# before falling back to the raw string (which may be very long due to
|
||||
# Pydantic's discriminated union type names).
|
||||
extracted = _extract_config_validation_message(error)
|
||||
error_str = extracted if extracted else str(error)
|
||||
|
||||
# SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
|
||||
if len(error_str) > 200:
|
||||
@@ -98,9 +138,10 @@ def _sanitize_validation_error(error: Exception) -> str:
|
||||
error_str = _redact_sql_select(error_str, error_str_upper)
|
||||
error_str = _redact_sql_where(error_str, error_str_upper)
|
||||
|
||||
# Return generic message for common error types
|
||||
if generic := _get_generic_error_message(error_str):
|
||||
return generic
|
||||
# Return generic message for common error types (unless caller opted out)
|
||||
if not skip_generic:
|
||||
if generic := _get_generic_error_message(error_str):
|
||||
return generic
|
||||
|
||||
return error_str
|
||||
|
||||
@@ -174,7 +215,27 @@ class ValidationPipeline:
|
||||
|
||||
# Parse the raw config dict into a typed ChartConfig for
|
||||
# downstream validators that need typed access.
|
||||
typed_config = parse_chart_config(request.config)
|
||||
try:
|
||||
typed_config = parse_chart_config(request.config)
|
||||
except (ValueError, TypeError) as config_err:
|
||||
from superset.mcp_service.utils.error_builder import (
|
||||
ChartErrorBuilder,
|
||||
)
|
||||
|
||||
sanitized_reason = _sanitize_validation_error(
|
||||
config_err, skip_generic=True
|
||||
)
|
||||
error = ChartErrorBuilder.build_error(
|
||||
error_type="config_validation_error",
|
||||
template_key="validation_error",
|
||||
template_vars={"reason": sanitized_reason},
|
||||
custom_suggestions=[
|
||||
"Use the 'filters' field for structured filters "
|
||||
"(column/op/value) — do NOT use 'adhoc_filters'",
|
||||
],
|
||||
error_code="CONFIG_VALIDATION_ERROR",
|
||||
)
|
||||
return ValidationResult(is_valid=False, request=request, error=error)
|
||||
|
||||
# Fetch dataset context once and reuse across validation layers
|
||||
dataset_context = ValidationPipeline._get_dataset_context(
|
||||
|
||||
@@ -174,6 +174,15 @@ class ChartErrorBuilder:
|
||||
"Modify your data selection or aggregation",
|
||||
],
|
||||
},
|
||||
# Validation pipeline errors
|
||||
"validation_error": {
|
||||
"message": "Configuration validation failed: {reason}",
|
||||
"details": "{reason}",
|
||||
"suggestions": [
|
||||
"Check field names in your chart configuration",
|
||||
"Read the chart://configs resource for valid fields and examples",
|
||||
],
|
||||
},
|
||||
# Chart generation errors
|
||||
"generation_failed": {
|
||||
"message": "Chart generation failed: {reason}",
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Regression tests for validation pipeline error surfacing.
|
||||
|
||||
Ensures that unknown fields (like adhoc_filters) produce actionable error
|
||||
messages instead of opaque "An error occurred" responses.
|
||||
|
||||
See: sc-103356
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def xy_chart_request_with_adhoc_filters():
|
||||
"""Request matching the sc-103356 reproduction case."""
|
||||
return {
|
||||
"dataset_id": 2,
|
||||
"chart_name": "Japan's RPG Dominance Over Time",
|
||||
"save_chart": True,
|
||||
"config": {
|
||||
"chart_type": "xy",
|
||||
"kind": "line",
|
||||
"x": {"name": "year"},
|
||||
"y": [{"aggregate": "SUM", "label": "Japan RPG Sales", "name": "jp_sales"}],
|
||||
"adhoc_filters": [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "Role-Playing",
|
||||
"expressionType": "SIMPLE",
|
||||
"operator": "==",
|
||||
"subject": "genre",
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_adhoc_filters_produces_actionable_error(xy_chart_request_with_adhoc_filters):
|
||||
"""adhoc_filters must produce error mentioning 'filters'."""
|
||||
result = ValidationPipeline.validate_request_with_warnings(
|
||||
xy_chart_request_with_adhoc_filters
|
||||
)
|
||||
|
||||
assert not result.is_valid
|
||||
assert result.error is not None
|
||||
# Must NOT be the opaque "An error occurred" message
|
||||
assert result.error.message != "An error occurred"
|
||||
# Must mention the correct field name so the LLM can self-correct
|
||||
assert (
|
||||
"filters" in result.error.message.lower()
|
||||
or "filters" in (result.error.details or "").lower()
|
||||
)
|
||||
# Error code should indicate config validation, not a system error
|
||||
assert result.error.error_code == "CONFIG_VALIDATION_ERROR"
|
||||
|
||||
|
||||
def test_unknown_field_error_includes_suggestion(xy_chart_request_with_adhoc_filters):
|
||||
"""Error response must include suggestions to help LLM self-correct."""
|
||||
result = ValidationPipeline.validate_request_with_warnings(
|
||||
xy_chart_request_with_adhoc_filters
|
||||
)
|
||||
|
||||
assert result.error is not None
|
||||
assert len(result.error.suggestions) > 0
|
||||
# At least one suggestion should mention the correct approach
|
||||
suggestion_text = " ".join(result.error.suggestions).lower()
|
||||
assert "filters" in suggestion_text
|
||||
# The adhoc_filters guidance should appear as a custom suggestion
|
||||
assert any("adhoc_filters" in s for s in result.error.suggestions)
|
||||
|
||||
|
||||
def test_arbitrary_unknown_field_produces_helpful_error():
|
||||
"""Any unknown field should produce a helpful error, not opaque message."""
|
||||
request_data = {
|
||||
"dataset_id": 1,
|
||||
"config": {
|
||||
"chart_type": "xy",
|
||||
"x": {"name": "date"},
|
||||
"y": [{"name": "revenue", "aggregate": "SUM"}],
|
||||
"totally_fake_field": "value",
|
||||
},
|
||||
}
|
||||
|
||||
result = ValidationPipeline.validate_request_with_warnings(request_data)
|
||||
|
||||
assert not result.is_valid
|
||||
assert result.error is not None
|
||||
assert result.error.message != "An error occurred"
|
||||
assert "totally_fake_field" in (result.error.details or "") or "Unknown field" in (
|
||||
result.error.message + (result.error.details or "")
|
||||
)
|
||||
Reference in New Issue
Block a user