# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Unified schema validation for chart configurations. Consolidates pre-validation, schema validation, and error enhancement. """ import logging from typing import Any, Dict, Tuple from pydantic import ValidationError as PydanticValidationError from superset.mcp_service.chart.schemas import ( GenerateChartRequest, ) from superset.mcp_service.common.error_schemas import ChartGenerationError logger = logging.getLogger(__name__) class SchemaValidator: """Unified schema validator with pre-validation and enhanced error messages.""" @staticmethod def validate_request( request_data: Dict[str, Any], ) -> Tuple[bool, GenerateChartRequest | None, ChartGenerationError | None]: """ Validate request data with pre-validation and enhanced error handling. Returns: Tuple of (is_valid, parsed_request, error) """ # Pre-validate to catch common issues early is_valid, error = SchemaValidator._pre_validate(request_data) if not is_valid: return False, None, error # Try Pydantic validation try: request = GenerateChartRequest(**request_data) return True, request, None except PydanticValidationError as e: # Enhance the error message error = SchemaValidator._enhance_validation_error(e, request_data) return False, None, error @staticmethod def _pre_validate( data: Dict[str, Any], ) -> Tuple[bool, ChartGenerationError | None]: """Pre-validate request data before Pydantic processing.""" if not isinstance(data, dict): return False, ChartGenerationError( error_type="invalid_request_format", message="Request must be a JSON object", details="The request body must be a valid JSON object, not a string " "or array", suggestions=[ "Ensure you're sending a JSON object with 'dataset_id' and " "'config' fields", "Check that Content-Type header is set to 'application/json'", ], error_code="INVALID_REQUEST_FORMAT", ) # Check for required top-level fields if "dataset_id" not in data: return False, ChartGenerationError( error_type="missing_dataset_id", message="Missing required field: dataset_id", details="The 'dataset_id' field is required to identify which dataset " "to use", suggestions=[ "Add 'dataset_id' field with the ID of your dataset", "Use list_datasets tool to find available dataset IDs", "Example: {'dataset_id': 1, 'config': {...}}", ], error_code="MISSING_DATASET_ID", ) if "config" not in data: return False, ChartGenerationError( error_type="missing_config", message="Missing required field: config", details="The 'config' field is required to specify chart configuration", suggestions=[ "Add 'config' field with chart type and settings", "Example: {'dataset_id': 1, 'config': {'chart_type': 'xy', ...}}", ], error_code="MISSING_CONFIG", ) config = data.get("config", {}) if not isinstance(config, dict): return False, ChartGenerationError( error_type="invalid_config_format", message="Config must be a JSON object", details="The 'config' field must be a valid JSON object with chart " "settings", suggestions=[ "Ensure config is an object, not a string or array", "Example: 'config': {'chart_type': 'xy', 'x': {...}, 'y': [...]}", ], error_code="INVALID_CONFIG_FORMAT", ) # Check chart_type early chart_type = config.get("chart_type") if not chart_type: return False, ChartGenerationError( error_type="missing_chart_type", message="Missing required field: chart_type", details="Chart configuration must specify 'chart_type' as either 'xy' " "or 'table'", suggestions=[ "Add 'chart_type': 'xy' for line/bar/area/scatter charts", "Add 'chart_type': 'table' for table visualizations", "Example: 'config': {'chart_type': 'xy', ...}", ], error_code="MISSING_CHART_TYPE", ) if chart_type not in ["xy", "table"]: return False, ChartGenerationError( error_type="invalid_chart_type", message=f"Invalid chart_type: '{chart_type}'", details=f"Chart type '{chart_type}' is not supported. Must be 'xy' or " f"'table'", suggestions=[ "Use 'chart_type': 'xy' for line, bar, area, or scatter charts", "Use 'chart_type': 'table' for tabular data display", "Check spelling and ensure lowercase", ], error_code="INVALID_CHART_TYPE", ) # Pre-validate structure based on chart type if chart_type == "xy": return SchemaValidator._pre_validate_xy_config(config) elif chart_type == "table": return SchemaValidator._pre_validate_table_config(config) return True, None @staticmethod def _pre_validate_xy_config( config: Dict[str, Any], ) -> Tuple[bool, ChartGenerationError | None]: """Pre-validate XY chart configuration.""" missing_fields = [] if "x" not in config: missing_fields.append("'x' (X-axis column)") if "y" not in config: missing_fields.append("'y' (Y-axis metrics)") if missing_fields: return False, ChartGenerationError( error_type="missing_xy_fields", message=f"XY chart missing required " f"fields: {', '.join(missing_fields)}", details="XY charts require both X-axis (dimension) and Y-axis (" "metrics) specifications", suggestions=[ "Add 'x' field: {'name': 'column_name'} for X-axis", "Add 'y' field: [{'name': 'metric_column', 'aggregate': 'SUM'}] " "for Y-axis", "Example: {'chart_type': 'xy', 'x': {'name': 'date'}, " "'y': [{'name': 'sales', 'aggregate': 'SUM'}]}", ], error_code="MISSING_XY_FIELDS", ) # Validate Y is a list if not isinstance(config.get("y", []), list): return False, ChartGenerationError( error_type="invalid_y_format", message="Y-axis must be a list of metrics", details="The 'y' field must be an array of metric specifications", suggestions=[ "Wrap Y-axis metric in array: 'y': [{'name': 'column', " "'aggregate': 'SUM'}]", "Multiple metrics supported: 'y': [metric1, metric2, ...]", ], error_code="INVALID_Y_FORMAT", ) return True, None @staticmethod def _pre_validate_table_config( config: Dict[str, Any], ) -> Tuple[bool, ChartGenerationError | None]: """Pre-validate table chart configuration.""" if "columns" not in config: return False, ChartGenerationError( error_type="missing_columns", message="Table chart missing required field: columns", details="Table charts require a 'columns' array to specify which " "columns to display", suggestions=[ "Add 'columns' field with array of column specifications", "Example: 'columns': [{'name': 'product'}, {'name': 'sales', " "'aggregate': 'SUM'}]", "Each column can have optional 'aggregate' for metrics", ], error_code="MISSING_COLUMNS", ) if not isinstance(config.get("columns", []), list): return False, ChartGenerationError( error_type="invalid_columns_format", message="Columns must be a list", details="The 'columns' field must be an array of column specifications", suggestions=[ "Ensure columns is an array: 'columns': [...]", "Each column should be an object with 'name' field", ], error_code="INVALID_COLUMNS_FORMAT", ) return True, None @staticmethod def _enhance_validation_error( error: PydanticValidationError, request_data: Dict[str, Any] ) -> ChartGenerationError: """Convert Pydantic validation errors to user-friendly messages.""" errors = error.errors() # Check for discriminated union errors (generic "'table' was expected") for err in errors: if err.get("type") == "union_tag_invalid" or "discriminator" in str( err.get("ctx", {}) ): # This is the generic union error - provide better message config = request_data.get("config", {}) chart_type = config.get("chart_type", "unknown") if chart_type == "xy": return ChartGenerationError( error_type="xy_validation_error", message="XY chart configuration validation failed", details="The XY chart configuration is missing required " "fields or has invalid structure", suggestions=[ "Ensure 'x' field exists with {'name': 'column_name'}", "Ensure 'y' field is an array: [{'name': 'metric', " "'aggregate': 'SUM'}]", "Check that all column names are strings", "Verify aggregate functions are valid: SUM, COUNT, AVG, " "MIN, MAX", ], error_code="XY_VALIDATION_ERROR", ) elif chart_type == "table": return ChartGenerationError( error_type="table_validation_error", message="Table chart configuration validation failed", details="The table chart configuration is missing required " "fields or has invalid structure", suggestions=[ "Ensure 'columns' field is an array of column " "specifications", "Each column needs {'name': 'column_name'}", "Optional: add 'aggregate' for metrics", "Example: 'columns': [{'name': 'product'}, {'name': " "'sales', 'aggregate': 'SUM'}]", ], error_code="TABLE_VALIDATION_ERROR", ) # Default enhanced error error_details = [] for err in errors[:3]: # Show first 3 errors loc = " -> ".join(str(location) for location in err.get("loc", [])) msg = err.get("msg", "Validation failed") error_details.append(f"{loc}: {msg}") return ChartGenerationError( error_type="validation_error", message="Chart configuration validation failed", details="; ".join(error_details), suggestions=[ "Check that all required fields are present", "Ensure field types match the schema", "Use get_dataset_info to verify column names", "Refer to the API documentation for field requirements", ], error_code="VALIDATION_ERROR", )