Files
superset2/superset/mcp_service/system/resources/instance_metadata.py

156 lines
6.0 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
System resources for providing instance configuration and stats.
This resource differs from the get_instance_info tool by also including
available dataset IDs and database IDs, so LLMs can immediately call
get_dataset_info or execute_sql without an extra list call.
"""
import logging
from sqlalchemy.exc import SQLAlchemyError
from superset.mcp_service.app import mcp
from superset.mcp_service.auth import mcp_auth_hook
logger = logging.getLogger(__name__)
@mcp.resource("instance://metadata")
@mcp_auth_hook
def get_instance_metadata_resource() -> str:
"""
Provide instance metadata with available dataset and database IDs.
This resource gives LLMs context about:
- Instance summary stats (counts of dashboards, charts, datasets)
- Available database connections with their IDs (for execute_sql)
- Available datasets with IDs and table names (for get_dataset_info)
- Dashboard and chart statistics
"""
try:
from typing import Any, cast, Type
from superset.daos.base import BaseDAO
from superset.daos.chart import ChartDAO
from superset.daos.dashboard import DashboardDAO
from superset.daos.database import DatabaseDAO
from superset.daos.dataset import DatasetDAO
from superset.daos.tag import TagDAO
from superset.daos.user import UserDAO
from superset.mcp_service.mcp_core import InstanceInfoCore
from superset.mcp_service.system.schemas import InstanceInfo
from superset.mcp_service.system.system_utils import (
calculate_dashboard_breakdown,
calculate_database_breakdown,
calculate_instance_summary,
calculate_popular_content,
calculate_recent_activity,
)
from superset.utils import json
instance_info_core = InstanceInfoCore(
dao_classes={
"dashboards": cast(Type[BaseDAO[Any]], DashboardDAO),
"charts": cast(Type[BaseDAO[Any]], ChartDAO),
"datasets": cast(Type[BaseDAO[Any]], DatasetDAO),
"databases": cast(Type[BaseDAO[Any]], DatabaseDAO),
"users": cast(Type[BaseDAO[Any]], UserDAO),
"tags": cast(Type[BaseDAO[Any]], TagDAO),
},
output_schema=InstanceInfo,
metric_calculators={
"instance_summary": calculate_instance_summary,
"recent_activity": calculate_recent_activity,
"dashboard_breakdown": calculate_dashboard_breakdown,
"database_breakdown": calculate_database_breakdown,
"popular_content": calculate_popular_content,
},
time_windows={
"recent": 7,
"monthly": 30,
"quarterly": 90,
},
logger=logger,
)
# Get base instance info
base_result = json.loads(instance_info_core.get_resource())
# Remove empty popular_content if it has no useful data
popular = base_result.get("popular_content", {})
if popular and not any(popular.get(k) for k in popular):
del base_result["popular_content"]
# Add available datasets (top 20 by most recent modification)
dataset_dao = instance_info_core.dao_classes["datasets"]
try:
datasets = dataset_dao.find_all()
# Convert to string to avoid TypeError when comparing datetime with None
sorted_datasets = sorted(
datasets,
key=lambda d: str(getattr(d, "changed_on", "") or ""),
reverse=True,
)[:20]
base_result["available_datasets"] = [
{
"id": ds.id,
"table_name": ds.table_name,
"schema": getattr(ds, "schema", None),
"database_id": getattr(ds, "database_id", None),
}
for ds in sorted_datasets
]
except (SQLAlchemyError, AttributeError) as e:
logger.warning("Could not fetch datasets for metadata: %s", e)
base_result["available_datasets"] = []
# Add available databases (for execute_sql)
database_dao = instance_info_core.dao_classes["databases"]
try:
databases = database_dao.find_all()
base_result["available_databases"] = [
{
"id": db.id,
"database_name": db.database_name,
"backend": getattr(db, "backend", None),
}
for db in databases
]
except (SQLAlchemyError, AttributeError) as e:
logger.warning("Could not fetch databases for metadata: %s", e)
base_result["available_databases"] = []
return json.dumps(base_result, indent=2)
except (SQLAlchemyError, AttributeError, KeyError, ValueError) as e:
logger.error("Error generating instance metadata: %s", e)
from superset.utils import json
return json.dumps(
{
"error": "Unable to fetch complete metadata",
"tips": [
"Use list_datasets to explore available data",
"Use get_instance_info for basic stats",
],
}
)