fix(mcp): simplify health_check tool and refactor system utils (#36063)

2026-06-05 23:59:25 +00:00 · 2025-11-13 04:28:09 +10:00
parent bb2e2a5ed6
commit c244e7f847
8 changed files with 314 additions and 177 deletions
--- a/superset/mcp_service/app.py
+++ b/superset/mcp_service/app.py
@@ -68,6 +68,7 @@ Explore & Analysis:

 System Information:
 - get_instance_info: Get instance-wide statistics and metadata
+- health_check: Simple health check tool (takes NO parameters, call without arguments)

 Available Resources:
 - superset://instance/metadata: Access instance configuration and metadata
--- a/superset/mcp_service/system/init.py
+++ b/superset/mcp_service/system/init.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""System-level MCP service tools and utilities."""
--- a/superset/mcp_service/system/resources/instance_metadata.py
+++ b/superset/mcp_service/system/resources/instance_metadata.py
@@ -55,7 +55,7 @@ def get_instance_metadata_resource() -> str:
        from superset.daos.user import UserDAO
        from superset.mcp_service.mcp_core import InstanceInfoCore
        from superset.mcp_service.system.schemas import InstanceInfo
-        from superset.mcp_service.system.tool.get_instance_info import (
+        from superset.mcp_service.system.system_utils import (
            calculate_dashboard_breakdown,
            calculate_database_breakdown,
            calculate_instance_summary,
--- a/superset/mcp_service/system/schemas.py
+++ b/superset/mcp_service/system/schemas.py
@@ -52,7 +52,6 @@ class GetSupersetInstanceInfoRequest(BaseModel):

    model_config = ConfigDict(
        extra="forbid",
-        str_strip_whitespace=True,
    )


--- a/superset/mcp_service/system/system_utils.py
+++ b/superset/mcp_service/system/system_utils.py
@@ -0,0 +1,196 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+System-level utility functions for MCP service.
+
+This module contains helper functions used by system tools for calculating
+instance metrics, dashboard breakdowns, database breakdowns, and activity summaries.
+"""
+
+from typing import Any, Dict
+
+from superset.mcp_service.system.schemas import (
+    DashboardBreakdown,
+    DatabaseBreakdown,
+    InstanceSummary,
+    PopularContent,
+    RecentActivity,
+)
+
+
+def calculate_dashboard_breakdown(
+    base_counts: Dict[str, int],
+    time_metrics: Dict[str, Dict[str, int]],
+    dao_classes: Dict[str, Any],
+) -> DashboardBreakdown:
+    """Calculate detailed dashboard breakdown metrics."""
+    try:
+        from superset.daos.base import ColumnOperator, ColumnOperatorEnum
+        from superset.extensions import db
+        from superset.models.dashboard import Dashboard
+
+        dashboard_dao = dao_classes["dashboards"]
+
+        # Published vs unpublished
+        published_count = dashboard_dao.count(
+            column_operators=[
+                ColumnOperator(col="published", opr=ColumnOperatorEnum.eq, value=True)
+            ]
+        )
+        unpublished_count = base_counts.get("total_dashboards", 0) - published_count
+
+        # Certified dashboards
+        certified_count = dashboard_dao.count(
+            column_operators=[
+                ColumnOperator(
+                    col="certified_by", opr=ColumnOperatorEnum.is_not_null, value=None
+                )
+            ]
+        )
+
+        # Dashboards with/without charts
+        dashboards_with_charts = (
+            db.session.query(Dashboard).join(Dashboard.slices).distinct().count()
+        )
+        dashboards_without_charts = (
+            base_counts.get("total_dashboards", 0) - dashboards_with_charts
+        )
+
+        return DashboardBreakdown(
+            published=published_count,
+            unpublished=unpublished_count,
+            certified=certified_count,
+            with_charts=dashboards_with_charts,
+            without_charts=dashboards_without_charts,
+        )
+    except Exception:
+        # Return empty breakdown on error
+        return DashboardBreakdown(
+            published=0,
+            unpublished=0,
+            certified=0,
+            with_charts=0,
+            without_charts=0,
+        )
+
+
+def calculate_database_breakdown(
+    base_counts: Dict[str, int],
+    time_metrics: Dict[str, Dict[str, int]],
+    dao_classes: Dict[str, Any],
+) -> DatabaseBreakdown:
+    """Calculate database type breakdown."""
+    try:
+        from superset.extensions import db
+        from superset.models.core import Database
+
+        # Get database types distribution
+        db_types = db.session.query(
+            Database.database_name, Database.sqlalchemy_uri
+        ).all()
+
+        type_counts: Dict[str, int] = {}
+        for _name, uri in db_types:
+            if uri:
+                # Extract database type from SQLAlchemy URI
+                db_type = uri.split("://")[0] if "://" in uri else "unknown"
+                type_counts[db_type] = type_counts.get(db_type, 0) + 1
+            else:
+                type_counts["unknown"] = type_counts.get("unknown", 0) + 1
+
+        return DatabaseBreakdown(by_type=type_counts)
+    except Exception:
+        # Return empty breakdown on error
+        return DatabaseBreakdown(by_type={})
+
+
+def calculate_instance_summary(
+    base_counts: Dict[str, int],
+    time_metrics: Dict[str, Dict[str, int]],
+    dao_classes: Dict[str, Any],
+) -> InstanceSummary:
+    """Calculate instance summary with computed metrics."""
+    try:
+        from flask_appbuilder.security.sqla.models import Role
+
+        from superset.extensions import db
+
+        # Add roles count (no DAO available)
+        total_roles = db.session.query(Role).count()
+
+        # Calculate average charts per dashboard
+        total_dashboards = base_counts.get("total_dashboards", 0)
+        total_charts = base_counts.get("total_charts", 0)
+        avg_charts_per_dashboard = (
+            (total_charts / total_dashboards) if total_dashboards > 0 else 0
+        )
+
+        return InstanceSummary(
+            total_dashboards=total_dashboards,
+            total_charts=total_charts,
+            total_datasets=base_counts.get("total_datasets", 0),
+            total_databases=base_counts.get("total_databases", 0),
+            total_users=base_counts.get("total_users", 0),
+            total_roles=total_roles,
+            total_tags=base_counts.get("total_tags", 0),
+            avg_charts_per_dashboard=round(avg_charts_per_dashboard, 2),
+        )
+    except Exception:
+        # Return empty summary on error
+        return InstanceSummary(
+            total_dashboards=0,
+            total_charts=0,
+            total_datasets=0,
+            total_databases=0,
+            total_users=0,
+            total_roles=0,
+            total_tags=0,
+            avg_charts_per_dashboard=0.0,
+        )
+
+
+def calculate_recent_activity(
+    base_counts: Dict[str, int],
+    time_metrics: Dict[str, Dict[str, int]],
+    dao_classes: Dict[str, Any],
+) -> RecentActivity:
+    """Transform time metrics into RecentActivity format."""
+    monthly = time_metrics.get("monthly", {})
+    recent = time_metrics.get("recent", {})
+
+    return RecentActivity(
+        dashboards_created_last_30_days=monthly.get("dashboards_created", 0),
+        charts_created_last_30_days=monthly.get("charts_created", 0),
+        datasets_created_last_30_days=monthly.get("datasets_created", 0),
+        dashboards_modified_last_7_days=recent.get("dashboards_modified", 0),
+        charts_modified_last_7_days=recent.get("charts_modified", 0),
+        datasets_modified_last_7_days=recent.get("datasets_modified", 0),
+    )
+
+
+def calculate_popular_content(
+    base_counts: Dict[str, int],
+    time_metrics: Dict[str, Dict[str, int]],
+    dao_classes: Dict[str, Any],
+) -> PopularContent:
+    """Calculate popular content metrics (placeholder implementation)."""
+
+    return PopularContent(
+        top_tags=[],
+        top_creators=[],
+    )
--- a/superset/mcp_service/system/tool/get_instance_info.py
+++ b/superset/mcp_service/system/tool/get_instance_info.py
@@ -21,7 +21,6 @@ InstanceInfoCore for flexible, extensible metrics calculation.
 """

 import logging
-from typing import Any, Dict

 from fastmcp import Context

@@ -29,181 +28,20 @@ from superset.mcp_service.app import mcp
 from superset.mcp_service.auth import mcp_auth_hook
 from superset.mcp_service.mcp_core import InstanceInfoCore
 from superset.mcp_service.system.schemas import (
-    DashboardBreakdown,
-    DatabaseBreakdown,
    GetSupersetInstanceInfoRequest,
    InstanceInfo,
-    InstanceSummary,
-    PopularContent,
-    RecentActivity,
+)
+from superset.mcp_service.system.system_utils import (
+    calculate_dashboard_breakdown,
+    calculate_database_breakdown,
+    calculate_instance_summary,
+    calculate_popular_content,
+    calculate_recent_activity,
 )

 logger = logging.getLogger(__name__)


-def calculate_dashboard_breakdown(
-    base_counts: Dict[str, int],
-    time_metrics: Dict[str, Dict[str, int]],
-    dao_classes: Dict[str, Any],
-) -> DashboardBreakdown:
-    """Calculate detailed dashboard breakdown metrics."""
-    try:
-        from superset.daos.base import ColumnOperator, ColumnOperatorEnum
-        from superset.extensions import db
-        from superset.models.dashboard import Dashboard
-
-        dashboard_dao = dao_classes["dashboards"]
-
-        # Published vs unpublished
-        published_count = dashboard_dao.count(
-            column_operators=[
-                ColumnOperator(col="published", opr=ColumnOperatorEnum.eq, value=True)
-            ]
-        )
-        unpublished_count = base_counts.get("total_dashboards", 0) - published_count
-
-        # Certified dashboards
-        certified_count = dashboard_dao.count(
-            column_operators=[
-                ColumnOperator(
-                    col="certified_by", opr=ColumnOperatorEnum.is_not_null, value=None
-                )
-            ]
-        )
-
-        # Dashboards with/without charts
-        dashboards_with_charts = (
-            db.session.query(Dashboard).join(Dashboard.slices).distinct().count()
-        )
-        dashboards_without_charts = (
-            base_counts.get("total_dashboards", 0) - dashboards_with_charts
-        )
-
-        return DashboardBreakdown(
-            published=published_count,
-            unpublished=unpublished_count,
-            certified=certified_count,
-            with_charts=dashboards_with_charts,
-            without_charts=dashboards_without_charts,
-        )
-    except Exception:
-        # Return empty breakdown on error
-        return DashboardBreakdown(
-            published=0,
-            unpublished=0,
-            certified=0,
-            with_charts=0,
-            without_charts=0,
-        )
-
-
-def calculate_database_breakdown(
-    base_counts: Dict[str, int],
-    time_metrics: Dict[str, Dict[str, int]],
-    dao_classes: Dict[str, Any],
-) -> DatabaseBreakdown:
-    """Calculate database type breakdown."""
-    try:
-        from superset.extensions import db
-        from superset.models.core import Database
-
-        # Get database types distribution
-        db_types = db.session.query(
-            Database.database_name, Database.sqlalchemy_uri
-        ).all()
-
-        type_counts: Dict[str, int] = {}
-        for _name, uri in db_types:
-            if uri:
-                # Extract database type from SQLAlchemy URI
-                db_type = uri.split("://")[0] if "://" in uri else "unknown"
-                type_counts[db_type] = type_counts.get(db_type, 0) + 1
-            else:
-                type_counts["unknown"] = type_counts.get("unknown", 0) + 1
-
-        return DatabaseBreakdown(by_type=type_counts)
-    except Exception:
-        # Return empty breakdown on error
-        return DatabaseBreakdown(by_type={})
-
-
-def calculate_instance_summary(
-    base_counts: Dict[str, int],
-    time_metrics: Dict[str, Dict[str, int]],
-    dao_classes: Dict[str, Any],
-) -> InstanceSummary:
-    """Calculate instance summary with computed metrics."""
-    try:
-        from flask_appbuilder.security.sqla.models import Role
-
-        from superset.extensions import db
-
-        # Add roles count (no DAO available)
-        total_roles = db.session.query(Role).count()
-
-        # Calculate average charts per dashboard
-        total_dashboards = base_counts.get("total_dashboards", 0)
-        total_charts = base_counts.get("total_charts", 0)
-        avg_charts_per_dashboard = (
-            (total_charts / total_dashboards) if total_dashboards > 0 else 0
-        )
-
-        return InstanceSummary(
-            total_dashboards=total_dashboards,
-            total_charts=total_charts,
-            total_datasets=base_counts.get("total_datasets", 0),
-            total_databases=base_counts.get("total_databases", 0),
-            total_users=base_counts.get("total_users", 0),
-            total_roles=total_roles,
-            total_tags=base_counts.get("total_tags", 0),
-            avg_charts_per_dashboard=round(avg_charts_per_dashboard, 2),
-        )
-    except Exception:
-        # Return empty summary on error
-        return InstanceSummary(
-            total_dashboards=0,
-            total_charts=0,
-            total_datasets=0,
-            total_databases=0,
-            total_users=0,
-            total_roles=0,
-            total_tags=0,
-            avg_charts_per_dashboard=0.0,
-        )
-
-
-def calculate_recent_activity(
-    base_counts: Dict[str, int],
-    time_metrics: Dict[str, Dict[str, int]],
-    dao_classes: Dict[str, Any],
-) -> RecentActivity:
-    """Transform time metrics into RecentActivity format."""
-    monthly = time_metrics.get("monthly", {})
-    recent = time_metrics.get("recent", {})
-
-    return RecentActivity(
-        dashboards_created_last_30_days=monthly.get("dashboards_created", 0),
-        charts_created_last_30_days=monthly.get("charts_created", 0),
-        datasets_created_last_30_days=monthly.get("datasets_created", 0),
-        dashboards_modified_last_7_days=recent.get("dashboards_modified", 0),
-        charts_modified_last_7_days=recent.get("charts_modified", 0),
-        datasets_modified_last_7_days=recent.get("datasets_modified", 0),
-    )
-
-
-def calculate_popular_content(
-    base_counts: Dict[str, int],
-    time_metrics: Dict[str, Dict[str, int]],
-    dao_classes: Dict[str, Any],
-) -> PopularContent:
-    """Calculate popular content metrics (placeholder implementation)."""
-
-    return PopularContent(
-        top_tags=[],
-        top_creators=[],
-    )
-
-
 # Configure the instance info core
 _instance_info_core = InstanceInfoCore(
    dao_classes={
--- a/superset/mcp_service/system/tool/health_check.py
+++ b/superset/mcp_service/system/tool/health_check.py
@@ -21,9 +21,12 @@ import datetime
 import logging
 import platform

+from flask import current_app
+
 from superset.mcp_service.app import mcp
 from superset.mcp_service.auth import mcp_auth_hook
 from superset.mcp_service.system.schemas import HealthCheckResponse
+from superset.utils.version import get_version_metadata

 logger = logging.getLogger(__name__)

@@ -34,18 +37,44 @@ async def health_check() -> HealthCheckResponse:
    """
    Simple health check tool for testing the MCP service.

+    IMPORTANT: This tool takes NO parameters. Call it without any arguments.
+
    Returns basic system information and confirms the service is running.
    This is useful for testing connectivity and basic functionality.

+    Parameters:
+        None - This tool does not accept any parameters
+
    Returns:
-        HealthCheckResponse: Health status and system information
+        HealthCheckResponse: Health status and system information including:
+            - status: "healthy" or "error"
+            - timestamp: ISO format timestamp
+            - service: Service name from APP_NAME config (e.g., "Superset MCP Service")
+            - version: Superset version string
+            - python_version: Python version
+            - platform: Operating system platform
+
+    Example:
+        # Correct - no parameters
+        health_check()
+
+        # Incorrect - do not pass any arguments
+        # health_check(request={})  # This will cause validation errors
    """
+    # Get app name from config (safe to do outside try block)
+    app_name = current_app.config.get("APP_NAME", "Superset")
+    service_name = f"{app_name} MCP Service"
+
    try:
+        # Get version from Superset version metadata
+        version_metadata = get_version_metadata()
+        version = version_metadata.get("version_string", "unknown")
+
        response = HealthCheckResponse(
            status="healthy",
            timestamp=datetime.datetime.now().isoformat(),
-            service="Superset MCP Service",
-            version="1.0.0",
+            service=service_name,
+            version=version,
            python_version=platform.python_version(),
            platform=platform.system(),
        )
@@ -56,11 +85,12 @@ async def health_check() -> HealthCheckResponse:
    except Exception as e:
        logger.error("Health check failed: %s", e)
        # Return error status but don't raise to keep tool working
-        return HealthCheckResponse(
+        response = HealthCheckResponse(
            status="error",
            timestamp=datetime.datetime.now().isoformat(),
-            service="Superset MCP Service",
-            version="1.0.0",
+            service=service_name,
+            version="unknown",
            python_version=platform.python_version(),
            platform=platform.system(),
        )
+        return response
--- a/tests/unit_tests/mcp_service/system/tool/test_health_check.py
+++ b/tests/unit_tests/mcp_service/system/tool/test_health_check.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Tests for health_check MCP tool."""
+
+from superset.mcp_service.system.schemas import HealthCheckResponse
+
+
+def test_health_check_response_schema():
+    """Test that HealthCheckResponse has required fields."""
+    response = HealthCheckResponse(
+        status="healthy",
+        timestamp="2025-11-10T19:00:00",
+        service="Test MCP Service",
+        version="4.0.0",
+        python_version="3.11.0",
+        platform="Darwin",
+    )
+
+    assert response.status == "healthy"
+    assert response.service == "Test MCP Service"
+    assert response.version == "4.0.0"
+    assert response.python_version == "3.11.0"
+    assert response.platform == "Darwin"
+    assert response.timestamp is not None
+    assert response.uptime_seconds is None  # Optional field
+
+
+def test_health_check_response_with_uptime():
+    """Test HealthCheckResponse with optional uptime field."""
+    response = HealthCheckResponse(
+        status="healthy",
+        timestamp="2025-11-10T19:00:00",
+        service="Test MCP Service",
+        version="4.0.0",
+        python_version="3.11.0",
+        platform="Darwin",
+        uptime_seconds=123.45,
+    )
+
+    assert response.uptime_seconds == 123.45