superset2/superset/mcp_service/utils/error_builder.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""
Unified error builder for chart operations.
Consolidates error handling logic from multiple files.
"""

import html
import logging
import re
from typing import Any, Dict, List

from superset.mcp_service.common.error_schemas import (
    ChartGenerationError,
    ValidationError,
)

logger = logging.getLogger(__name__)


def _sanitize_user_input(value: Any) -> str:
    """Sanitize user input to prevent XSS and injection attacks in error messages."""
    if value is None:
        return "None"

    # Convert to string and limit length to prevent ReDoS attacks
    str_value = str(value)
    if len(str_value) > 200:
        str_value = str_value[:200] + "...[truncated]"

    # HTML escape to prevent XSS
    str_value = html.escape(str_value)

    # Check for dangerous HTML tags with simple substring checks
    html_tags = ["<script", "</script>"]
    str_lower = str_value.lower()
    for tag in html_tags:
        if tag in str_lower:
            str_value = "[FILTERED]"
            break

    # Check for dangerous URL schemes using regex with word boundaries
    # This ensures we match actual URL schemes, not arbitrary substrings
    dangerous_url_patterns = [
        r"\b(javascript|vbscript|data):",  # URL schemes
        r"on\w+\s*=",  # Event handlers
    ]
    for pattern in dangerous_url_patterns:
        if re.search(pattern, str_value, re.IGNORECASE):
            str_value = "[FILTERED]"
            break

    return str_value


def _sanitize_template_vars(vars_dict: Dict[str, Any]) -> Dict[str, Any]:
    """Sanitize all variables before template formatting."""
    sanitized = {}
    for key, value in vars_dict.items():
        # Only sanitize string-like values that could contain user input
        if isinstance(value, (str, int, float)) or value is None:
            sanitized[key] = _sanitize_user_input(value)
        elif isinstance(value, (list, tuple)):
            # Sanitize lists of strings
            sanitized[key] = ", ".join(
                [_sanitize_user_input(item) for item in value[:10]]
            )  # Limit list size and convert to string
        else:
            # For other types, convert to string and sanitize
            sanitized[key] = _sanitize_user_input(value)
    return sanitized


class ChartErrorBuilder:
    """Unified error builder for consistent error messages across chart operations."""

    # Error templates organized by category
    TEMPLATES = {
        # Validation errors
        "missing_field": {
            "message": "Missing required field: {field}",
            "details": "{field_description}",
            "suggestions": [
                "Add the '{field}' field to your configuration",
                "Check the API documentation for required fields",
                "{specific_suggestion}",
            ],
        },
        "invalid_type": {
            "message": "Invalid type for field '{field}'",
            "details": "Expected {expected_type}, got {actual_type}",
            "suggestions": [
                "Change '{field}' to be a {expected_type}",
                "Example: {example}",
            ],
        },
        "invalid_value": {
            "message": "Invalid value for '{field}'",
            "details": "Value '{value}' is not allowed. {reason}",
            "suggestions": [
                "Use one of the allowed values: {allowed_values}",
                "{specific_suggestion}",
            ],
        },
        # Dataset errors
        "dataset_not_found": {
            "message": "Dataset not found: {dataset_id}",
            "details": "No dataset found with identifier '{dataset_id}'. Please "
            "verify the dataset ID or UUID is correct.",
            "suggestions": [
                "Check that the dataset ID is correct",
                "Verify you have access to this dataset",
                "Use the list_datasets tool to find available datasets",
            ],
        },
        "column_not_found": {
            "message": "Column '{column}' not found in dataset",
            "details": "The column '{column}' does not exist in the dataset schema",
            "suggestions": [
                "Check column name spelling and case sensitivity",
                "Use get_dataset_info to see available columns",
                "Did you mean: {suggestions}?",
            ],
        },
        # Runtime errors
        "empty_result": {
            "message": "Query would return no data",
            "details": "{reason}",
            "suggestions": [
                "Check your filter conditions",
                "Verify the data exists for your criteria",
                "Try broader filter values or remove some filters",
            ],
        },
        "performance_warning": {
            "message": "Configuration may cause performance issues",
            "details": "{reason}",
            "suggestions": [
                "Consider adding filters to limit data",
                "Use aggregations to reduce data volume",
                "{specific_suggestion}",
            ],
        },
        # Chart-specific errors
        "invalid_chart_type": {
            "message": "Invalid chart type: '{chart_type}'",
            "details": "Chart type must be either 'xy' or 'table'",
            "suggestions": [
                "Use 'chart_type': 'xy' for line, bar, area, or scatter charts",
                "Use 'chart_type': 'table' for tabular data display",
            ],
        },
        "incompatible_configuration": {
            "message": "Chart configuration incompatible with data",
            "details": "{reason}",
            "suggestions": [
                "{primary_suggestion}",
                "Consider using a different chart type",
                "Modify your data selection or aggregation",
            ],
        },
        # Chart generation errors
        "generation_failed": {
            "message": "Chart generation failed: {reason}",
            "details": "Failed to create {chart_type} chart for dataset {dataset_id}. "
            "{reason}",
            "suggestions": [
                "Check that the dataset exists and is accessible",
                "Verify chart configuration is valid for the selected chart type",
                "Ensure all referenced columns exist in the dataset",
                "Check Superset logs for detailed error information",
            ],
        },
    }

    @classmethod
    def build_error(
        cls,
        error_type: str,
        template_key: str,
        template_vars: Dict[str, Any] | None = None,
        custom_suggestions: List[str] | None = None,
        error_code: str | None = None,
        validation_errors: List[ValidationError] | None = None,
    ) -> ChartGenerationError:
        """
        Build a standardized error using templates.

        Args:
            error_type: Type of error for categorization
            template_key: Key to error template
            template_vars: Variables to format into template
            custom_suggestions: Additional suggestions to append
            error_code: Optional error code
            validation_errors: Optional list of validation errors

        Returns:
            ChartGenerationError with formatted message
        """
        template = cls.TEMPLATES.get(template_key, {})
        # SECURITY FIX: Sanitize template variables to prevent XSS/injection
        vars_dict = _sanitize_template_vars(template_vars or {})

        message = cls._format_message(template, vars_dict)
        details = cls._format_details(template, vars_dict)
        suggestions = cls._format_suggestions(template, vars_dict, custom_suggestions)
        error_code = cls._generate_error_code(error_code, template_key)

        return ChartGenerationError(
            error_type=error_type,
            message=message,
            details=details,
            suggestions=suggestions,
            error_code=error_code,
            validation_errors=validation_errors or [],
        )

    @classmethod
    def _format_message(
        cls, template: Dict[str, Any], vars_dict: Dict[str, Any]
    ) -> str:
        """Format the error message from template."""
        message_raw = template.get("message", "An error occurred")
        message: str = (
            " ".join(message_raw) if isinstance(message_raw, list) else str(message_raw)
        )
        if vars_dict:
            try:
                # SECURITY FIX: vars_dict is already sanitized by caller
                message = message.format(**vars_dict)
            except (KeyError, ValueError, TypeError) as e:
                logger.warning("Template formatting failed: %s", e)
                # Return safe fallback message
                message = "An error occurred during chart operation"
        return message

    @classmethod
    def _format_details(
        cls, template: Dict[str, Any], vars_dict: Dict[str, Any]
    ) -> str:
        """Format the error details from template."""
        details_raw = template.get("details", "")
        details: str = (
            " ".join(details_raw) if isinstance(details_raw, list) else str(details_raw)
        )
        if vars_dict and details:
            try:
                # SECURITY FIX: vars_dict is already sanitized by caller
                details = details.format(**vars_dict)
            except (KeyError, ValueError, TypeError) as e:
                logger.warning("Template formatting failed: %s", e)
                # Return safe fallback
                details = "Additional error details unavailable"
        return details

    @classmethod
    def _format_suggestions(
        cls,
        template: Dict[str, Any],
        vars_dict: Dict[str, Any],
        custom_suggestions: List[str] | None,
    ) -> List[str]:
        """Format suggestions from template and add custom ones."""
        suggestions = []
        for suggestion in template.get("suggestions", []):
            if vars_dict and "{" in suggestion:
                try:
                    # SECURITY FIX: vars_dict is already sanitized by caller
                    suggestion = suggestion.format(**vars_dict)
                    if suggestion and suggestion != "None":
                        suggestions.append(suggestion)
                except (KeyError, ValueError, TypeError):
                    # Skip malformed suggestions rather than exposing errors
                    continue
            else:
                suggestions.append(suggestion)

        if custom_suggestions:
            # SECURITY FIX: Sanitize custom suggestions too
            sanitized_custom = [
                _sanitize_user_input(s) for s in custom_suggestions[:5]
            ]  # Limit count
            suggestions.extend(sanitized_custom)

        return suggestions[:10]  # Limit total suggestions to prevent response bloat

    @classmethod
    def _generate_error_code(cls, error_code: str | None, template_key: str) -> str:
        """Generate error code if not provided."""
        if error_code:
            return error_code
        return f"CHART_{template_key.upper()}"

    @classmethod
    def missing_field_error(
        cls,
        field: str,
        field_description: str,
        specific_suggestion: str | None = None,
    ) -> ChartGenerationError:
        """Build a missing field error."""
        return cls.build_error(
            error_type="missing_field",
            template_key="missing_field",
            template_vars={
                "field": field,
                "field_description": field_description,
                "specific_suggestion": specific_suggestion
                or f"Add '{field}' to your request",
            },
        )

    @classmethod
    def invalid_type_error(
        cls,
        field: str,
        expected_type: str,
        actual_type: str,
        example: str | None = None,
    ) -> ChartGenerationError:
        """Build an invalid type error."""
        return cls.build_error(
            error_type="invalid_type",
            template_key="invalid_type",
            template_vars={
                "field": field,
                "expected_type": expected_type,
                "actual_type": actual_type,
                "example": example or f"'{field}': <{expected_type}>",
            },
        )

    @classmethod
    def column_not_found_error(
        cls, column: str, suggestions: List[str] | None = None
    ) -> ChartGenerationError:
        """Build a column not found error."""
        suggestion_text = (
            ", ".join(suggestions[:3]) if suggestions else "Check available columns"
        )
        return cls.build_error(
            error_type="column_not_found",
            template_key="column_not_found",
            template_vars={"column": column, "suggestions": suggestion_text},
        )

    @classmethod
    def dataset_not_found_error(cls, dataset_id: Any) -> ChartGenerationError:
        """Build a dataset not found error."""
        return cls.build_error(
            error_type="dataset_not_found",
            template_key="dataset_not_found",
            template_vars={"dataset_id": str(dataset_id)},
        )