mirror of
https://github.com/apache/superset.git
synced 2026-05-11 19:05:24 +00:00
210 lines
7.3 KiB
Python
210 lines
7.3 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""Privacy helpers for MCP user-directory and data-model metadata."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from collections.abc import Iterable
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Callable, TypeVar
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
F = TypeVar("F", bound=Callable[..., Any])
|
|
|
|
USER_DIRECTORY_FIELDS = frozenset(
|
|
{
|
|
"changed_by",
|
|
"changed_by_fk",
|
|
"changed_by_name",
|
|
"created_by",
|
|
"created_by_fk",
|
|
"created_by_name",
|
|
"last_saved_by",
|
|
"last_saved_by_fk",
|
|
"last_saved_by_name",
|
|
"owner",
|
|
"owners",
|
|
"roles",
|
|
}
|
|
)
|
|
|
|
# User-directory columns that are valid as filter inputs even though they are
|
|
# hidden from response payloads and select-column surfaces. The system injects
|
|
# the correct value server-side, so callers never need to supply user IDs.
|
|
SELF_REFERENCING_FILTER_COLUMNS = frozenset(
|
|
{"created_by_fk", "owner", "created_by_fk_or_owner"}
|
|
)
|
|
|
|
DATA_MODEL_METADATA_ACCESS_ATTR = "_requires_data_model_metadata_access"
|
|
DATA_MODEL_METADATA_ERROR_TYPE = "DataModelMetadataRestricted"
|
|
DATA_MODEL_METADATA_PRIVACY_SCOPE = "data_model"
|
|
DATA_MODEL_METADATA_ERROR_MESSAGE = (
|
|
"You don't have permission to access underlying dataset or database details "
|
|
"for your role."
|
|
)
|
|
|
|
# Fields that reveal dataset/database metadata through chart list and schema surfaces.
|
|
# ChartInfo only exposes a subset of these as direct model fields.
|
|
CHART_DATA_MODEL_COLUMNS = frozenset(
|
|
{
|
|
"catalog_perm",
|
|
"datasource_id",
|
|
"datasource_name",
|
|
"datasource_type",
|
|
"filters",
|
|
"form_data",
|
|
"params",
|
|
"perm",
|
|
"query_context",
|
|
"schema_perm",
|
|
}
|
|
)
|
|
|
|
|
|
class PrivacyError(BaseModel):
|
|
"""Structured privacy/permission denial for MCP tool responses."""
|
|
|
|
error: str = Field(..., description="Error message")
|
|
error_type: str = Field(..., description="Type of error")
|
|
privacy_scope: str = Field(..., description="Privacy scope for the denial")
|
|
timestamp: str | datetime | None = Field(None, description="Error timestamp")
|
|
model_config = ConfigDict(ser_json_timedelta="iso8601")
|
|
|
|
@classmethod
|
|
def create_data_model_metadata_denied(cls) -> "PrivacyError":
|
|
return cls(
|
|
error=DATA_MODEL_METADATA_ERROR_MESSAGE,
|
|
error_type=DATA_MODEL_METADATA_ERROR_TYPE,
|
|
privacy_scope=DATA_MODEL_METADATA_PRIVACY_SCOPE,
|
|
timestamp=datetime.now(timezone.utc),
|
|
)
|
|
|
|
|
|
def requires_data_model_metadata_access(func: F) -> F:
|
|
"""Mark a tool as requiring data-model metadata permission."""
|
|
setattr(func, DATA_MODEL_METADATA_ACCESS_ATTR, True)
|
|
return func
|
|
|
|
|
|
def tool_requires_data_model_metadata_access(func: Any) -> bool:
|
|
"""Return whether a tool requires data-model metadata access."""
|
|
return bool(getattr(func, DATA_MODEL_METADATA_ACCESS_ATTR, False))
|
|
|
|
|
|
def user_can_view_data_model_metadata() -> bool:
|
|
"""Return whether the current user can inspect data-model metadata.
|
|
|
|
Dataset drill/write permissions indicate active data-model introspection access.
|
|
Dashboard-only viewers may have Dataset read access for chart rendering, but that
|
|
should not expose dataset/database metadata through MCP tools.
|
|
These resource-type permissions intentionally gate metadata globally rather
|
|
than per dashboard chart.
|
|
"""
|
|
try:
|
|
from superset import security_manager
|
|
|
|
return any(
|
|
security_manager.can_access(permission_name, "Dataset")
|
|
for permission_name in (
|
|
"can_get_drill_info",
|
|
"can_get_or_create_dataset",
|
|
"can_write",
|
|
)
|
|
)
|
|
except Exception: # noqa: BLE001
|
|
return False
|
|
|
|
|
|
def inject_current_user_for_self_referencing_filters(filters: Any, user: Any) -> Any:
|
|
"""Replace the value of any self-referencing filter with the current user's ID.
|
|
|
|
Callers specify the column and operator; the system fills in the value.
|
|
This prevents enumeration of other users' content.
|
|
"""
|
|
if not filters:
|
|
return filters
|
|
filter_list = filters if isinstance(filters, list) else [filters]
|
|
result = []
|
|
for f in filter_list:
|
|
col = f.get("col") if isinstance(f, dict) else getattr(f, "col", None)
|
|
if col in SELF_REFERENCING_FILTER_COLUMNS:
|
|
if not user or not getattr(user, "is_authenticated", False):
|
|
raise ValueError("This operation requires an authenticated user")
|
|
f = (
|
|
{**f, "value": user.id}
|
|
if isinstance(f, dict)
|
|
else f.model_copy(update={"value": user.id})
|
|
)
|
|
result.append(f)
|
|
return result
|
|
|
|
|
|
def filter_user_directory_fields(data: dict[str, Any]) -> dict[str, Any]:
|
|
"""Remove fields that expose users, roles, owners, or access metadata."""
|
|
return {
|
|
key: value for key, value in data.items() if key not in USER_DIRECTORY_FIELDS
|
|
}
|
|
|
|
|
|
def filter_user_directory_columns(columns: Iterable[str]) -> list[str]:
|
|
"""Remove user-directory columns while preserving order."""
|
|
return [column for column in columns if column not in USER_DIRECTORY_FIELDS]
|
|
|
|
|
|
def remove_chart_data_model_columns(columns: Iterable[str]) -> list[str]:
|
|
"""Remove chart fields that reveal data-model metadata."""
|
|
return [column for column in columns if column not in CHART_DATA_MODEL_COLUMNS]
|
|
|
|
|
|
def redact_chart_data_model_fields(chart_info: Any) -> Any:
|
|
"""Redact chart fields that expose dataset or database metadata.
|
|
|
|
Fails closed: if redaction cannot be applied, the exception propagates
|
|
rather than returning unredacted data.
|
|
"""
|
|
from superset.mcp_service.chart.schemas import ChartInfo
|
|
|
|
if isinstance(chart_info, ChartInfo):
|
|
return chart_info.model_copy(
|
|
update={
|
|
"datasource_name": None,
|
|
"datasource_type": None,
|
|
"filters": None,
|
|
"form_data": None,
|
|
}
|
|
)
|
|
return chart_info
|
|
|
|
|
|
def request_uses_chart_data_model_filter(filters: Iterable[Any]) -> bool:
|
|
"""Return whether chart filters target hidden data-model fields."""
|
|
return any(
|
|
getattr(filter_, "col", None) in CHART_DATA_MODEL_COLUMNS for filter_ in filters
|
|
)
|
|
|
|
|
|
def is_data_model_metadata_error(data: Any) -> bool:
|
|
"""Return whether tool output is a structured data-model privacy denial."""
|
|
return (
|
|
isinstance(data, dict)
|
|
and data.get("error_type") == DATA_MODEL_METADATA_ERROR_TYPE
|
|
and data.get("privacy_scope", DATA_MODEL_METADATA_PRIVACY_SCOPE)
|
|
== DATA_MODEL_METADATA_PRIVACY_SCOPE
|
|
)
|