feat(mcp): MCP service implementation (PRs 3-9 consolidated) (#35877)

2026-04-24 18:44:53 +00:00 · 2025-11-01 02:33:21 +11:00
parent 30d584afd1
commit fee4e7d8e2
106 changed files with 21826 additions and 223 deletions
--- a/superset/mcp_service/chart/validation/runtime/init.py
+++ b/superset/mcp_service/chart/validation/runtime/init.py
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Runtime validation module for chart configurations.
+Validates performance, compatibility, and user experience issues.
+"""
+
+import logging
+from typing import List, Tuple
+
+from superset.mcp_service.chart.schemas import (
+    ChartConfig,
+    XYChartConfig,
+)
+from superset.mcp_service.common.error_schemas import ChartGenerationError
+
+logger = logging.getLogger(__name__)
+
+
+class RuntimeValidator:
+    """Orchestrates runtime validations for chart configurations."""
+
+    @staticmethod
+    def validate_runtime_issues(
+        config: ChartConfig, dataset_id: int | str
+    ) -> Tuple[bool, ChartGenerationError | None]:
+        """
+        Validate runtime issues that could affect chart rendering or performance.
+
+        Args:
+            config: Chart configuration to validate
+            dataset_id: Dataset identifier
+
+        Returns:
+            Tuple of (is_valid, error)
+        """
+        warnings: List[str] = []
+        suggestions: List[str] = []
+
+        # Only check XY charts for format and cardinality issues
+        if isinstance(config, XYChartConfig):
+            # Format-type compatibility validation
+            format_warnings = RuntimeValidator._validate_format_compatibility(config)
+            if format_warnings:
+                warnings.extend(format_warnings)
+
+            # Cardinality validation
+            cardinality_warnings, cardinality_suggestions = (
+                RuntimeValidator._validate_cardinality(config, dataset_id)
+            )
+            if cardinality_warnings:
+                warnings.extend(cardinality_warnings)
+                suggestions.extend(cardinality_suggestions)
+
+        # Chart type appropriateness validation (for all chart types)
+        type_warnings, type_suggestions = RuntimeValidator._validate_chart_type(
+            config, dataset_id
+        )
+        if type_warnings:
+            warnings.extend(type_warnings)
+            suggestions.extend(type_suggestions)
+
+        # If we have warnings, return them as a validation error
+        if warnings:
+            from superset.mcp_service.utils.error_builder import (
+                ChartErrorBuilder,
+            )
+
+            return False, ChartErrorBuilder.build_error(
+                error_type="runtime_semantic_warning",
+                template_key="performance_warning",
+                template_vars={
+                    "reason": "; ".join(warnings[:3])
+                    + ("..." if len(warnings) > 3 else "")
+                },
+                custom_suggestions=suggestions[:5],  # Limit suggestions
+                error_code="RUNTIME_SEMANTIC_WARNING",
+            )
+
+        return True, None
+
+    @staticmethod
+    def _validate_format_compatibility(config: XYChartConfig) -> List[str]:
+        """Validate format-type compatibility."""
+        warnings: List[str] = []
+
+        try:
+            # Import here to avoid circular imports
+            from .format_validator import FormatTypeValidator
+
+            is_valid, format_warnings = (
+                FormatTypeValidator.validate_format_compatibility(config)
+            )
+            if format_warnings:
+                warnings.extend(format_warnings)
+        except ImportError:
+            logger.warning("Format validator not available")
+        except Exception as e:
+            logger.warning("Format validation failed: %s", e)
+
+        return warnings
+
+    @staticmethod
+    def _validate_cardinality(
+        config: XYChartConfig, dataset_id: int | str
+    ) -> Tuple[List[str], List[str]]:
+        """Validate cardinality issues."""
+        warnings: List[str] = []
+        suggestions: List[str] = []
+
+        try:
+            # Import here to avoid circular imports
+            from .cardinality_validator import CardinalityValidator
+
+            # Determine chart type for cardinality thresholds
+            chart_type = config.kind if hasattr(config, "kind") else "default"
+
+            # Check X-axis cardinality
+            is_ok, cardinality_info = CardinalityValidator.check_cardinality(
+                dataset_id=dataset_id,
+                x_column=config.x.name,
+                chart_type=chart_type,
+                group_by_column=config.group_by.name if config.group_by else None,
+            )
+
+            if not is_ok and cardinality_info:
+                warnings.extend(cardinality_info.get("warnings", []))
+                suggestions.extend(cardinality_info.get("suggestions", []))
+
+        except ImportError:
+            logger.warning("Cardinality validator not available")
+        except Exception as e:
+            logger.warning("Cardinality validation failed: %s", e)
+
+        return warnings, suggestions
+
+    @staticmethod
+    def _validate_chart_type(
+        config: ChartConfig, dataset_id: int | str
+    ) -> Tuple[List[str], List[str]]:
+        """Validate chart type appropriateness."""
+        warnings: List[str] = []
+        suggestions: List[str] = []
+
+        try:
+            # Import here to avoid circular imports
+            from .chart_type_suggester import ChartTypeSuggester
+
+            is_appropriate, suggestion_info = ChartTypeSuggester.analyze_and_suggest(
+                config, dataset_id
+            )
+
+            if not is_appropriate and suggestion_info:
+                warnings.extend(suggestion_info.get("issues", []))
+                suggestions.extend(suggestion_info.get("suggestions", []))
+
+                # Add recommended chart types
+                recommended = suggestion_info.get("recommended_types", [])
+                if recommended:
+                    recommendations = ", ".join(recommended)
+                    suggestions.append(
+                        f"Recommended chart types for this data: {recommendations}"
+                    )
+
+        except ImportError:
+            logger.warning("Chart type suggester not available")
+        except Exception as e:
+            logger.warning("Chart type validation failed: %s", e)
+
+        return warnings, suggestions
--- a/superset/mcp_service/chart/validation/runtime/cardinality_validator.py
+++ b/superset/mcp_service/chart/validation/runtime/cardinality_validator.py
@@ -0,0 +1,195 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Cardinality validation to prevent unusable visualizations from high-cardinality data.
+"""
+
+import logging
+from typing import Any, Dict, List, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+class CardinalityValidator:
+    """
+    Validates cardinality of dimensions to prevent charts with too many categories
+    that become unreadable or cause performance issues.
+    """
+
+    # Thresholds for different chart types
+    CARDINALITY_THRESHOLDS = {
+        "bar": 50,  # Bar charts become unreadable with >50 bars
+        "line": 100,  # Line charts can handle more points
+        "scatter": 500,  # Scatter plots can show many points
+        "area": 30,  # Area charts need fewer categories
+        "table": 1000,  # Tables can handle many rows with pagination
+        "default": 50,  # Conservative default
+    }
+
+    # Known high-cardinality column patterns
+    HIGH_CARDINALITY_PATTERNS = [
+        "id",
+        "uuid",
+        "guid",
+        "email",
+        "phone",
+        "address",
+        "session",
+        "transaction",
+        "order_number",
+        "invoice",
+        "timestamp",
+        "datetime",
+        "created_at",
+        "updated_at",
+    ]
+
+    @staticmethod
+    def check_cardinality(
+        dataset_id: int | str,
+        x_column: str,
+        chart_type: str = "default",
+        group_by_column: str | None = None,
+    ) -> Tuple[bool, Dict[str, Any] | None]:
+        """
+        Check cardinality of X-axis and group_by columns.
+
+        Returns:
+            Tuple of (is_ok, warning_info)
+        """
+        try:
+            # Quick pattern check first (no DB query needed)
+            pattern_warnings = CardinalityValidator._check_column_patterns(
+                x_column, group_by_column
+            )
+
+            if pattern_warnings:
+                return False, {
+                    "warnings": pattern_warnings,
+                    "suggestions": CardinalityValidator._get_suggestions(
+                        x_column, chart_type, pattern_based=True
+                    ),
+                }
+
+            # For non-pattern columns, we could do actual cardinality check
+            # but that requires DB access - for now just return OK
+            # In production, you'd want to cache cardinality stats
+
+            return True, None
+
+        except Exception as e:
+            logger.warning("Cardinality check failed: %s", e)
+            # Don't block on validation failures
+            return True, None
+
+    @staticmethod
+    def _check_column_patterns(
+        x_column: str, group_by_column: str | None = None
+    ) -> List[str]:
+        """Check for known high-cardinality column patterns."""
+        warnings = []
+
+        x_lower = x_column.lower()
+
+        # Check X-axis column
+        for pattern in CardinalityValidator.HIGH_CARDINALITY_PATTERNS:
+            if pattern in x_lower:
+                warnings.append(
+                    f"Column '{x_column}' appears to be a high-cardinality field "
+                    f"(contains '{pattern}'). This may create an unreadable chart "
+                    f"with too many categories on the X-axis."
+                )
+                break
+
+        # Check group_by column if present
+        if group_by_column:
+            group_lower = group_by_column.lower()
+            for pattern in CardinalityValidator.HIGH_CARDINALITY_PATTERNS:
+                if pattern in group_lower:
+                    warnings.append(
+                        f"Group by column '{group_by_column}' appears to be a "
+                        f"high-cardinality field (contains '{pattern}'). This may "
+                        f"create too many series to visualize effectively."
+                    )
+                    break
+
+        return warnings
+
+    @staticmethod
+    def _get_suggestions(
+        column: str, chart_type: str, pattern_based: bool = False
+    ) -> List[str]:
+        """Get suggestions for handling high cardinality."""
+        suggestions = []
+
+        if pattern_based:
+            # Suggestions when we detected high-cardinality patterns
+            if any(p in column.lower() for p in ["id", "uuid", "guid"]):
+                suggestions.extend(
+                    [
+                        "Consider using a different column for the X-axis",
+                        f"If you need to analyze by {column}, use filters to limit "
+                        f"the data",
+                        "A table chart might be more appropriate for ID-based data",
+                    ]
+                )
+            elif any(p in column.lower() for p in ["email", "phone", "address"]):
+                suggestions.extend(
+                    [
+                        "Consider grouping by a higher-level category (e.g., "
+                        "domain for emails)",
+                        f"Use filters to focus on specific {column} values",
+                        "Aggregate the data before visualization",
+                    ]
+                )
+            elif any(
+                p in column.lower() for p in ["timestamp", "datetime", "created_at"]
+            ):
+                suggestions.extend(
+                    [
+                        "Consider truncating timestamps to date or hour level",
+                        "Use time-based grouping (daily, weekly, monthly)",
+                        "Apply date range filters to limit the data",
+                    ]
+                )
+        else:
+            # General high-cardinality suggestions
+            threshold = CardinalityValidator.CARDINALITY_THRESHOLDS.get(chart_type, 50)
+            suggestions.extend(
+                [
+                    f"This chart type works best with fewer than {threshold} "
+                    f"categories",
+                    "Consider using filters to reduce the number of values",
+                    "Try grouping or categorizing the data at a higher level",
+                    "A table or pivot table might better display high-cardinality data",
+                ]
+            )
+
+        return suggestions
+
+    @staticmethod
+    def suggest_chart_type(cardinality: int) -> List[str]:
+        """Suggest appropriate chart types based on cardinality."""
+        if cardinality <= 10:
+            return ["bar", "pie", "donut", "area"]
+        elif cardinality <= 30:
+            return ["bar", "line", "area"]
+        elif cardinality <= 100:
+            return ["line", "scatter"]
+        else:
+            return ["table", "pivot_table", "heatmap"]
--- a/superset/mcp_service/chart/validation/runtime/chart_type_suggester.py
+++ b/superset/mcp_service/chart/validation/runtime/chart_type_suggester.py
@@ -0,0 +1,437 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Chart type suggestions based on data characteristics and user intent.
+"""
+
+import logging
+from typing import Any, Dict, List, Tuple
+
+from superset.mcp_service.chart.schemas import (
+    ChartConfig,
+    ColumnRef,
+    TableChartConfig,
+    XYChartConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class ChartTypeSuggester:
+    """
+    Suggests appropriate chart types based on data characteristics
+    and identifies potential mismatches between chart type and data.
+    """
+
+    @staticmethod
+    def analyze_and_suggest(
+        config: ChartConfig,
+        dataset_id: int | str,  # noqa: ARG002
+    ) -> Tuple[bool, Dict[str, Any] | None]:
+        """
+        Analyze chart configuration and suggest better chart types if needed.
+
+        Returns:
+            Tuple of (is_appropriate, suggestion_info)
+        """
+        try:
+            if isinstance(config, XYChartConfig):
+                return ChartTypeSuggester._analyze_xy_chart(config)
+            elif isinstance(config, TableChartConfig):
+                return ChartTypeSuggester._analyze_table_chart(config)
+            else:
+                return True, None
+        except Exception as e:
+            logger.warning("Chart type analysis failed: %s", e)
+            return True, None  # Don't block on suggestion failures
+
+    @staticmethod
+    def _analyze_xy_chart(
+        config: XYChartConfig,
+    ) -> Tuple[bool, Dict[str, Any] | None]:
+        """Analyze XY chart appropriateness."""
+        issues = []
+        suggestions = []
+
+        x_analysis = ChartTypeSuggester._analyze_x_axis(config.x.name)
+        y_analysis = ChartTypeSuggester._analyze_y_axis(config.y)
+
+        # Check chart type specific issues
+        chart_issues, chart_suggestions = ChartTypeSuggester._check_chart_type_issues(
+            config, x_analysis, y_analysis
+        )
+        issues.extend(chart_issues)
+        suggestions.extend(chart_suggestions)
+
+        # Add general suggestions
+        general_suggestions = ChartTypeSuggester._get_general_suggestions(
+            x_analysis, y_analysis
+        )
+        suggestions.extend(general_suggestions)
+
+        if issues:
+            return False, {
+                "issues": issues,
+                "suggestions": suggestions,
+                "recommended_types": ChartTypeSuggester._get_recommended_types(
+                    x_analysis["is_temporal"],
+                    x_analysis["is_categorical"],
+                    y_analysis["has_count"],
+                    y_analysis["num_metrics"],
+                ),
+            }
+
+        return True, None
+
+    @staticmethod
+    def _analyze_x_axis(x_name: str) -> Dict[str, Any]:
+        """Analyze X-axis characteristics."""
+        x_name_lower = x_name.lower()
+        return {
+            "is_temporal": any(
+                t in x_name_lower
+                for t in [
+                    "date",
+                    "time",
+                    "year",
+                    "month",
+                    "day",
+                    "hour",
+                    "created",
+                    "updated",
+                ]
+            ),
+            "is_categorical": any(
+                c in x_name_lower
+                for c in [
+                    "category",
+                    "type",
+                    "status",
+                    "department",
+                    "region",
+                    "country",
+                    "state",
+                ]
+            ),
+            "is_id": any(i in x_name_lower for i in ["id", "uuid", "guid", "key"]),
+            "name": x_name,
+        }
+
+    @staticmethod
+    def _analyze_y_axis(y_columns: List[ColumnRef]) -> Dict[str, Any]:
+        """Analyze Y-axis characteristics."""
+        return {
+            "has_count": any(
+                col.aggregate in ["COUNT", "COUNT_DISTINCT"] for col in y_columns
+            ),
+            "num_metrics": len(y_columns),
+        }
+
+    @staticmethod
+    def _check_chart_type_issues(
+        config: XYChartConfig, x_analysis: Dict[str, Any], y_analysis: Dict[str, Any]
+    ) -> Tuple[List[str], List[str]]:
+        """Check for chart type specific issues."""
+        issues = []
+        suggestions = []
+
+        # Extract analysis values
+        x_is_temporal = x_analysis["is_temporal"]
+        x_is_categorical = x_analysis["is_categorical"]
+        x_is_id = x_analysis["is_id"]
+        num_metrics = y_analysis["num_metrics"]
+
+        # Check chart type specific issues by delegating to helper methods
+        if config.kind == "line":
+            line_issues, line_suggestions = ChartTypeSuggester._check_line_chart_issues(
+                config, x_is_temporal, x_is_categorical, x_is_id
+            )
+            issues.extend(line_issues)
+            suggestions.extend(line_suggestions)
+        elif config.kind == "scatter":
+            (
+                scatter_issues,
+                scatter_suggestions,
+            ) = ChartTypeSuggester._check_scatter_chart_issues(
+                config, x_is_categorical, num_metrics
+            )
+            issues.extend(scatter_issues)
+            suggestions.extend(scatter_suggestions)
+        elif config.kind == "area":
+            area_issues, area_suggestions = ChartTypeSuggester._check_area_chart_issues(
+                config, x_is_temporal
+            )
+            issues.extend(area_issues)
+            suggestions.extend(area_suggestions)
+        elif config.kind == "bar":
+            bar_issues, bar_suggestions = ChartTypeSuggester._check_bar_chart_issues(
+                config, x_is_id
+            )
+            issues.extend(bar_issues)
+            suggestions.extend(bar_suggestions)
+
+        return issues, suggestions
+
+    @staticmethod
+    def _check_line_chart_issues(
+        config: XYChartConfig,
+        x_is_temporal: bool,
+        x_is_categorical: bool,
+        x_is_id: bool,
+    ) -> Tuple[List[str], List[str]]:
+        """Check line chart specific issues."""
+        issues = []
+        suggestions = []
+
+        if not x_is_temporal and x_is_categorical:
+            issues.append(
+                f"Line chart with categorical X-axis '{config.x.name}' may not "
+                f"show meaningful trends"
+            )
+            suggestions.extend(
+                [
+                    "Consider using a bar chart for categorical comparisons",
+                    "Line charts work best with temporal or continuous data",
+                ]
+            )
+        elif x_is_id:
+            issues.append(
+                f"Line chart with ID field '{config.x.name}' on X-axis will not "
+                f"show meaningful patterns"
+            )
+            suggestions.extend(
+                [
+                    "Use a table to display individual records",
+                    "Or aggregate the data by a meaningful dimension",
+                ]
+            )
+
+        return issues, suggestions
+
+    @staticmethod
+    def _check_scatter_chart_issues(
+        config: XYChartConfig, x_is_categorical: bool, num_metrics: int
+    ) -> Tuple[List[str], List[str]]:
+        """Check scatter chart specific issues."""
+        issues = []
+        suggestions = []
+
+        if x_is_categorical:
+            issues.append(
+                f"Scatter plot with categorical X-axis '{config.x.name}' may not "
+                f"effectively show correlations"
+            )
+            suggestions.extend(
+                [
+                    "Scatter plots work best with two continuous variables",
+                    "Consider a bar chart for categorical vs numeric data",
+                ]
+            )
+        if num_metrics > 1:
+            issues.append("Scatter plots with multiple Y metrics can be confusing")
+            suggestions.extend(
+                [
+                    "Consider using only one Y metric for clarity",
+                    "Or use a line/bar chart to compare multiple metrics",
+                ]
+            )
+
+        return issues, suggestions
+
+    @staticmethod
+    def _check_area_chart_issues(
+        config: XYChartConfig, x_is_temporal: bool
+    ) -> Tuple[List[str], List[str]]:
+        """Check area chart specific issues."""
+        issues = []
+        suggestions = []
+
+        if not x_is_temporal:
+            issues.append(
+                f"Area chart with non-temporal X-axis '{config.x.name}' may be "
+                f"misleading"
+            )
+            suggestions.extend(
+                [
+                    "Area charts imply cumulative or part-to-whole relationships over "
+                    "time",
+                    "Consider a stacked bar chart for categorical data",
+                ]
+            )
+
+        # Check for potential negative values
+        for col in config.y:
+            if any(term in col.name.lower() for term in ["loss", "debt", "negative"]):
+                issues.append(
+                    f"Area chart with potentially negative values in '{col.name}' "
+                    f"can create visual confusion"
+                )
+                suggestions.extend(
+                    [
+                        "Use a line chart for data that can go negative",
+                        "Or ensure all values are positive before using area chart",
+                    ]
+                )
+
+        return issues, suggestions
+
+    @staticmethod
+    def _check_bar_chart_issues(
+        config: XYChartConfig, x_is_id: bool
+    ) -> Tuple[List[str], List[str]]:
+        """Check bar chart specific issues."""
+        issues = []
+        suggestions = []
+
+        if x_is_id:
+            issues.append(
+                f"Bar chart with ID field '{config.x.name}' may create too many bars"
+            )
+            suggestions.extend(
+                [
+                    "Consider aggregating by a higher-level category",
+                    "Or use filters to limit the number of bars displayed",
+                ]
+            )
+
+        return issues, suggestions
+
+    @staticmethod
+    def _get_general_suggestions(
+        x_analysis: Dict[str, Any], y_analysis: Dict[str, Any]
+    ) -> List[str]:
+        """Get general suggestions based on data patterns."""
+        suggestions = []
+        x_is_temporal = x_analysis["is_temporal"]
+        x_is_categorical = x_analysis["is_categorical"]
+        has_count = y_analysis["has_count"]
+        num_metrics = y_analysis["num_metrics"]
+
+        if has_count and x_is_categorical:
+            suggestions.append(
+                "This looks like frequency analysis - bar charts work well for counts "
+                "by category"
+            )
+        elif x_is_temporal and num_metrics == 1:
+            suggestions.append(
+                "Single metric over time - line charts are ideal for showing trends"
+            )
+        elif x_is_temporal and num_metrics > 3:
+            suggestions.append(
+                "Many metrics over time - consider focusing on 2-3 key metrics for "
+                "clarity"
+            )
+
+        return suggestions
+
+    @staticmethod
+    def _analyze_table_chart(
+        config: TableChartConfig,
+    ) -> Tuple[bool, Dict[str, Any] | None]:
+        """Analyze table chart appropriateness."""
+        issues = []
+        suggestions = []
+
+        # Count different column types
+        raw_columns = sum(1 for col in config.columns if not col.aggregate)
+        metric_columns = sum(1 for col in config.columns if col.aggregate)
+        total_columns = len(config.columns)
+
+        # Check if data might be better visualized
+        if metric_columns > 0 and raw_columns <= 2:
+            # Mostly metrics with few dimensions - could be visualized
+            issues.append(
+                "Table with mostly aggregated metrics could be visualized as a chart"
+            )
+            suggestions.append("Consider a bar chart to compare metric values visually")
+            suggestions.append("Or use a line chart if there's a time dimension")
+
+        # Check for ID-heavy tables
+        id_columns = sum(
+            1
+            for col in config.columns
+            if any(i in col.name.lower() for i in ["id", "uuid", "guid", "key"])
+        )
+        if id_columns > total_columns / 2:
+            suggestions.append(
+                "Table appears to be ID-heavy - ensure this is for detailed record "
+                "inspection"
+            )
+            suggestions.append(
+                "For analysis, consider aggregating by meaningful dimensions instead"
+            )
+
+        # Very wide tables
+        if total_columns > 10:
+            issues.append(
+                f"Table with {total_columns} columns may be difficult to read"
+            )
+            suggestions.append("Consider showing only the most important columns")
+            suggestions.append("Or break into multiple focused views")
+
+        if issues:
+            return False, {
+                "issues": issues,
+                "suggestions": suggestions,
+                "recommended_types": ["table", "pivot_table"]
+                if metric_columns > 0
+                else ["table"],
+            }
+
+        return True, None
+
+    @staticmethod
+    def _get_recommended_types(
+        x_is_temporal: bool, x_is_categorical: bool, has_count: bool, num_metrics: int
+    ) -> List[str]:
+        """Get recommended chart types based on data characteristics."""
+        recommendations = []
+
+        if x_is_temporal:
+            recommendations.extend(["line", "area", "bar"])
+            if num_metrics == 1:
+                recommendations.append("scatter")  # For trend analysis
+        elif x_is_categorical:
+            recommendations.extend(["bar", "table"])
+            if has_count and num_metrics == 1:
+                recommendations.append("pie")  # For proportion analysis
+        else:
+            # Continuous or unclear X-axis
+            recommendations.extend(["scatter", "line", "table"])
+
+        # Always include table as fallback
+        if "table" not in recommendations:
+            recommendations.append("table")
+
+        return recommendations
+
+    @staticmethod
+    def get_chart_type_description(chart_type: str) -> str:
+        """Get a description of when to use each chart type."""
+        descriptions = {
+            "line": "Best for showing trends over time or continuous data",
+            "bar": "Ideal for comparing values across categories",
+            "area": "Shows cumulative totals and part-to-whole relationships over time",
+            "scatter": "Reveals correlations between two continuous variables",
+            "table": "Displays detailed data or many dimensions at once",
+            "pie": "Shows proportions of a whole (use sparingly, max 5-7 slices)",
+            "pivot_table": "Summarizes data across multiple dimensions",
+        }
+        return descriptions.get(
+            chart_type, f"Visualizes data using {chart_type} format"
+        )
--- a/superset/mcp_service/chart/validation/runtime/format_validator.py
+++ b/superset/mcp_service/chart/validation/runtime/format_validator.py
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Format-type compatibility validation to prevent misleading data presentation.
+"""
+
+import logging
+import re
+from typing import List, Tuple
+
+from superset.mcp_service.chart.schemas import ColumnRef, XYChartConfig
+
+logger = logging.getLogger(__name__)
+
+
+class FormatTypeValidator:
+    """
+    Validates that format strings are appropriate for the data type and aggregation.
+    Prevents issues like currency formatting on COUNT data or percentage on absolute
+    values.
+    """
+
+    # Format patterns and their appropriate uses
+    CURRENCY_PATTERNS = [
+        r"\$",  # Dollar sign
+        r"€",  # Euro
+        r"£",  # Pound
+        r"¥",  # Yen
+        r"[,.]2f",  # Two decimal places (common for currency)
+        r"\$[,.]",  # Dollar with thousands separator
+    ]
+
+    PERCENTAGE_PATTERNS = [
+        r"%",  # Percentage sign
+        r"\.0%",  # Percentage with no decimals
+        r"\.1%",  # Percentage with 1 decimal
+        r"\.2%",  # Percentage with 2 decimals
+    ]
+
+    INTEGER_PATTERNS = [
+        r"\.0f",  # No decimals
+        r",d",  # Integer with thousands separator
+        r"[,.]0f",  # Integer format variations
+    ]
+
+    @staticmethod
+    def validate_format_compatibility(
+        config: XYChartConfig,
+    ) -> Tuple[bool, List[str] | None]:
+        """
+        Validate that axis formats are appropriate for the data types.
+
+        Returns:
+            Tuple of (is_valid, warnings_list)
+        """
+        warnings = []
+
+        # Validate Y-axis format against metrics
+        if config.y_axis and config.y_axis.format:
+            y_warnings = FormatTypeValidator._validate_y_axis_format(
+                config.y_axis.format, config.y
+            )
+            warnings.extend(y_warnings)
+
+        # Validate X-axis format (usually temporal or categorical)
+        if config.x_axis and config.x_axis.format:
+            x_warnings = FormatTypeValidator._validate_x_axis_format(
+                config.x_axis.format, config.x
+            )
+            warnings.extend(x_warnings)
+
+        return len(warnings) == 0, warnings if warnings else None
+
+    @staticmethod
+    def _validate_y_axis_format(
+        format_string: str, y_columns: List[ColumnRef]
+    ) -> List[str]:
+        """Validate Y-axis format against the metrics."""
+        warnings = []
+
+        warnings.extend(
+            FormatTypeValidator._check_currency_format_issues(format_string, y_columns)
+        )
+        warnings.extend(
+            FormatTypeValidator._check_percentage_format_issues(
+                format_string, y_columns
+            )
+        )
+        warnings.extend(
+            FormatTypeValidator._check_decimal_format_issues(format_string, y_columns)
+        )
+
+        return warnings
+
+    @staticmethod
+    def _check_currency_format_issues(
+        format_string: str, y_columns: List[ColumnRef]
+    ) -> List[str]:
+        """Check for currency format issues."""
+        warnings = []
+        if FormatTypeValidator._is_currency_format(format_string):
+            for col in y_columns:
+                if col.aggregate in ["COUNT", "COUNT_DISTINCT"]:
+                    warnings.append(
+                        f"Currency format '{format_string}' applied to {col.aggregate} "
+                        f"of '{col.name}'. COUNT operations return whole numbers, not "
+                        f"currency values. Consider using integer format like ',"
+                        f"d' instead."
+                    )
+        return warnings
+
+    @staticmethod
+    def _check_percentage_format_issues(
+        format_string: str, y_columns: List[ColumnRef]
+    ) -> List[str]:
+        """Check for percentage format issues."""
+        warnings = []
+        if FormatTypeValidator._is_percentage_format(format_string):
+            for col in y_columns:
+                if col.aggregate in ["SUM", "COUNT", "COUNT_DISTINCT"]:
+                    label = col.label or f"{col.aggregate}({col.name})"
+                    warnings.append(
+                        f"Percentage format '{format_string}' applied to "
+                        f"{col.aggregate} of '{col.name}'. This will multiply values "
+                        f"by 100 and add %. "
+                        f"If '{label}' contains absolute values (not ratios 0-1), "
+                        f"consider using a numeric format instead."
+                    )
+        return warnings
+
+    @staticmethod
+    def _check_decimal_format_issues(
+        format_string: str, y_columns: List[ColumnRef]
+    ) -> List[str]:
+        """Check for decimal format issues."""
+        warnings = []
+        if "." in format_string and any(char.isdigit() for char in format_string):
+            decimal_places = FormatTypeValidator._get_decimal_places(format_string)
+            if decimal_places and decimal_places > 0:
+                for col in y_columns:
+                    if col.aggregate in ["COUNT", "COUNT_DISTINCT"]:
+                        warnings.append(
+                            f"Decimal format '{format_string}' applied to "
+                            f"{col.aggregate} of '{col.name}'. COUNT operations "
+                            f"always return "
+                            f"integers. Consider using integer format like ',"
+                            f"d' or '.0f' instead."
+                        )
+        return warnings
+
+    @staticmethod
+    def _validate_x_axis_format(format_string: str, x_column: ColumnRef) -> List[str]:
+        """Validate X-axis format appropriateness."""
+        warnings = []
+
+        # Currency format on X-axis is almost always wrong
+        if FormatTypeValidator._is_currency_format(format_string):
+            warnings.append(
+                f"Currency format '{format_string}' applied to X-axis '"
+                f"{x_column.name}'. "
+                f"X-axis typically shows categories, time, or dimensions, "
+                f"not currency. "
+                f"Consider removing the format or using a date/category format."
+            )
+
+        # Percentage format on X-axis is unusual
+        elif FormatTypeValidator._is_percentage_format(format_string):
+            warnings.append(
+                f"Percentage format '{format_string}' applied to X-axis '"
+                f"{x_column.name}'. "
+                f"This is unusual for axis labels. Consider if this is intentional."
+            )
+
+        return warnings
+
+    @staticmethod
+    def _is_currency_format(format_string: str) -> bool:
+        """Check if format string represents currency."""
+        return any(
+            re.search(pattern, format_string, re.IGNORECASE)
+            for pattern in FormatTypeValidator.CURRENCY_PATTERNS
+        )
+
+    @staticmethod
+    def _is_percentage_format(format_string: str) -> bool:
+        """Check if format string represents percentage."""
+        return any(
+            re.search(pattern, format_string)
+            for pattern in FormatTypeValidator.PERCENTAGE_PATTERNS
+        )
+
+    @staticmethod
+    def _get_decimal_places(format_string: str) -> int | None:
+        """Extract number of decimal places from format string."""
+        if match := re.search(r"\.(\d+)f", format_string):
+            return int(match.group(1))
+        return None
+
+    @staticmethod
+    def suggest_format(column: ColumnRef) -> str:
+        """Suggest appropriate format based on column and aggregation."""
+        if column.aggregate in ["COUNT", "COUNT_DISTINCT"]:
+            return ",d"  # Integer with thousands separator
+        elif column.aggregate in ["AVG", "STDDEV", "VAR"]:
+            return ",.2f"  # Two decimals for statistical measures
+        elif column.aggregate in ["SUM", "MIN", "MAX"]:
+            # Could be currency or regular number, default to flexible
+            return ",.2f"  # Two decimals with thousands separator
+        else:
+            return ""  # Let Superset decide