fix(mcp): add description and certification fields to default list tool columns (#39017)

This commit is contained in:
Amin Ghadersohi
2026-04-06 13:37:52 -04:00
committed by GitHub
parent 83ad1eca26
commit 7be2acb2f3
13 changed files with 198 additions and 20 deletions

View File

@@ -74,6 +74,8 @@ class ChartLike(Protocol):
cache_timeout: int | None
form_data: Dict[str, Any] | None
query_context: Any | None
certified_by: str | None
certification_details: str | None
changed_by: Any | None # User object
changed_by_name: str | None
changed_on: str | datetime | None
@@ -113,6 +115,12 @@ class ChartInfo(BaseModel):
created_on_humanized: str | None = Field(
None, description="Humanized creation time"
)
certified_by: str | None = Field(
None, description="Name of the person or team who certified this chart"
)
certification_details: str | None = Field(
None, description="Certification details or reason"
)
uuid: str | None = Field(None, description="Chart UUID")
tags: List[TagInfo] = Field(default_factory=list, description="Chart tags")
owners: List[UserInfo] = Field(default_factory=list, description="Chart owners")
@@ -311,6 +319,8 @@ def serialize_chart_object(chart: ChartLike | None) -> ChartInfo | None:
datasource_type=getattr(chart, "datasource_type", None),
url=chart_url,
description=getattr(chart, "description", None),
certified_by=getattr(chart, "certified_by", None),
certification_details=getattr(chart, "certification_details", None),
cache_timeout=getattr(chart, "cache_timeout", None),
form_data=chart_form_data,
changed_by=getattr(chart, "changed_by_name", None)

View File

@@ -46,6 +46,9 @@ DEFAULT_CHART_COLUMNS = [
"id",
"slice_name",
"viz_type",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",

View File

@@ -238,7 +238,17 @@ def get_columns_from_model(
# - Extra columns (computed/relationship fields not on the model)
# Chart configuration
CHART_DEFAULT_COLUMNS = ["id", "slice_name", "viz_type", "url", "changed_on_humanized"]
CHART_DEFAULT_COLUMNS = [
"id",
"slice_name",
"viz_type",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",
]
CHART_SORTABLE_COLUMNS = [
"id",
"slice_name",
@@ -306,6 +316,18 @@ CHART_EXTRA_COLUMNS: dict[str, ColumnMetadata] = {
type="str",
is_default=False,
),
"certified_by": ColumnMetadata(
name="certified_by",
description="Name of the person who certified this chart",
type="str",
is_default=True,
),
"certification_details": ColumnMetadata(
name="certification_details",
description="Certification details or reason",
type="str",
is_default=True,
),
"tags": ColumnMetadata(
name="tags", description="Chart tags", type="list", is_default=False
),
@@ -315,7 +337,16 @@ CHART_EXTRA_COLUMNS: dict[str, ColumnMetadata] = {
}
# Dataset configuration
DATASET_DEFAULT_COLUMNS = ["id", "table_name", "schema", "changed_on_humanized"]
DATASET_DEFAULT_COLUMNS = [
"id",
"table_name",
"schema",
"description",
"certified_by",
"certification_details",
"changed_on",
"changed_on_humanized",
]
DATASET_SORTABLE_COLUMNS = [
"id",
"table_name",
@@ -367,6 +398,18 @@ DATASET_EXTRA_COLUMNS: dict[str, ColumnMetadata] = {
type="str",
is_default=False,
),
"certified_by": ColumnMetadata(
name="certified_by",
description="Name of the person who certified this dataset",
type="str",
is_default=True,
),
"certification_details": ColumnMetadata(
name="certification_details",
description="Certification details or reason",
type="str",
is_default=True,
),
"metrics": ColumnMetadata(
name="metrics",
description="Dataset metrics definitions",
@@ -392,7 +435,11 @@ DASHBOARD_DEFAULT_COLUMNS = [
"id",
"dashboard_title",
"slug",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",
]
DASHBOARD_SORTABLE_COLUMNS = [

View File

@@ -48,6 +48,9 @@ DEFAULT_DASHBOARD_COLUMNS = [
"id",
"dashboard_title",
"slug",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",

View File

@@ -102,6 +102,12 @@ class DatasetInfo(BaseModel):
schema_name: str | None = Field(None, description="Schema name", alias="schema")
database_name: str | None = Field(None, description="Database name")
description: str | None = Field(None, description="Dataset description")
certified_by: str | None = Field(
None, description="Name of the person or team who certified this dataset"
)
certification_details: str | None = Field(
None, description="Certification details or reason"
)
changed_by: str | None = Field(None, description="Last modifier (username)")
changed_on: str | datetime | None = Field(
None, description="Last modification timestamp"
@@ -363,6 +369,8 @@ def serialize_dataset_object(dataset: Any) -> DatasetInfo | None:
if getattr(dataset, "database", None)
else None,
description=getattr(dataset, "description", None),
certified_by=getattr(dataset, "certified_by", None),
certification_details=getattr(dataset, "certification_details", None),
changed_by=getattr(dataset, "changed_by_name", None)
or (str(dataset.changed_by) if getattr(dataset, "changed_by", None) else None),
changed_on=getattr(dataset, "changed_on", None),

View File

@@ -48,6 +48,9 @@ DEFAULT_DATASET_COLUMNS = [
"id",
"table_name",
"schema",
"description",
"certified_by",
"certification_details",
"changed_on",
"changed_on_humanized",
]

View File

@@ -161,11 +161,23 @@ class ModelListCore(BaseCore, Generic[L]):
select_columns = parse_json_or_list(
select_columns, param_name="select_columns"
)
columns_to_load = select_columns
columns_to_load = list(select_columns)
columns_requested = select_columns
else:
columns_to_load = self.default_columns
columns_to_load = list(self.default_columns)
columns_requested = self.default_columns
# Ensure computed columns have their dependencies loaded.
# Humanized timestamps are derived from their raw counterparts —
# if the raw column isn't loaded, the serializer produces null.
computed_deps: dict[str, str] = {
"changed_on_humanized": "changed_on",
"created_on_humanized": "created_on",
}
for computed, dependency in computed_deps.items():
if computed in columns_to_load and dependency not in columns_to_load:
columns_to_load.append(dependency)
# Query the DAO
items: List[Any]
items, total_count = self.dao_class.list(

View File

@@ -363,6 +363,8 @@ def _make_mock_chart(chart_id: int = 42) -> Mock:
chart.datasource_name = "test_table"
chart.datasource_type = "table"
chart.description = None
chart.certified_by = None
chart.certification_details = None
chart.cache_timeout = None
chart.changed_by = None
chart.changed_by_name = "admin"
@@ -394,6 +396,20 @@ class TestChartSerializationEagerLoading:
assert result.tags == []
assert result.owners == []
def test_serialize_chart_object_with_certification_fields(self):
"""serialize_chart_object correctly serializes non-None certification values."""
from superset.mcp_service.chart.schemas import serialize_chart_object
chart = _make_mock_chart()
chart.certified_by = "Data Team"
chart.certification_details = "Verified Q1 2026 metrics"
result = serialize_chart_object(chart)
assert result is not None
assert result.certified_by == "Data Team"
assert result.certification_details == "Verified Q1 2026 metrics"
def test_serialize_chart_object_fails_on_detached_instance(self):
"""serialize_chart_object raises when accessing lazy attrs on detached
instance — this is the bug scenario that the eager-loading fix prevents."""

View File

@@ -46,6 +46,8 @@ def mock_chart():
chart.datasource_name = "test_dataset"
chart.datasource_type = "table"
chart.description = "Test chart"
chart.certified_by = None
chart.certification_details = None
chart.url = "/chart/1"
chart.changed_by_name = "admin"
chart.changed_on = None
@@ -197,20 +199,21 @@ class TestChartDefaultColumnFiltering:
"""Test that minimal default columns are properly defined."""
from superset.mcp_service.common.schema_discovery import CHART_DEFAULT_COLUMNS
# Required minimal columns must be present
required_columns = {
assert set(CHART_DEFAULT_COLUMNS) == {
"id",
"slice_name",
"viz_type",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",
}
assert required_columns.issubset(set(CHART_DEFAULT_COLUMNS))
# Heavy columns should NOT be in defaults
assert "form_data" not in CHART_DEFAULT_COLUMNS
assert "query_context" not in CHART_DEFAULT_COLUMNS
assert "description" not in CHART_DEFAULT_COLUMNS
assert "datasource_name" not in CHART_DEFAULT_COLUMNS
assert "uuid" not in CHART_DEFAULT_COLUMNS

View File

@@ -87,6 +87,8 @@ def _mock_chart(id: int = 1, slice_name: str = "Test Chart") -> Mock:
chart.datasource_name = None
chart.datasource_type = None
chart.description = None
chart.certified_by = None
chart.certification_details = None
chart.cache_timeout = None
chart.changed_by = None
chart.changed_by_name = None

View File

@@ -529,13 +529,15 @@ class TestDashboardDefaultColumnFiltering:
DASHBOARD_DEFAULT_COLUMNS,
)
# Should have exactly 5 minimal columns
assert len(DASHBOARD_DEFAULT_COLUMNS) == 5
assert set(DASHBOARD_DEFAULT_COLUMNS) == {
"id",
"dashboard_title",
"slug",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",
}

View File

@@ -46,6 +46,8 @@ def create_mock_dataset(
dataset.table_name = table_name
dataset.schema = schema
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -106,6 +108,8 @@ async def test_list_datasets_basic(mock_list, mcp_server):
dataset.table_name = "Test DatasetInfo"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -212,6 +216,8 @@ async def test_list_datasets_custom_uuid_columns(mock_list, mcp_server):
dataset.table_name = "custom_dataset"
dataset.schema = "public"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -290,6 +296,8 @@ async def test_list_datasets_with_filters(mock_list, mcp_server):
dataset.table_name = "Filtered Dataset"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -391,6 +399,8 @@ async def test_list_datasets_with_string_filters(mock_list, mcp_server):
dataset.table_name = "String Filter Dataset"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -468,6 +478,8 @@ async def test_list_datasets_with_search(mock_list, mcp_server):
dataset.database_name = "test_db"
dataset.database = None
dataset.description = "A test dataset"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by = "admin"
dataset.changed_by_name = "admin"
dataset.changed_on = None
@@ -559,6 +571,8 @@ async def test_list_datasets_simple_with_search(mock_list, mcp_server):
dataset.database_name = "analytics_db"
dataset.database = None
dataset.description = "Another test dataset"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by = "user"
dataset.changed_by_name = "user"
dataset.changed_on = None
@@ -648,6 +662,8 @@ async def test_list_datasets_simple_basic(mock_list, mcp_server):
dataset.table_name = "Test DatasetInfo"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -743,6 +759,8 @@ async def test_list_datasets_simple_with_filters(mock_list, mcp_server):
dataset.table_name = "Sales Dataset"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -853,6 +871,8 @@ async def test_get_dataset_info_success(mock_info, mcp_server):
dataset.table_name = "Test DatasetInfo"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -990,6 +1010,8 @@ async def test_get_dataset_info_includes_columns_and_metrics(mock_info, mcp_serv
dataset.database = MagicMock()
dataset.database.database_name = "examples"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -1080,6 +1102,8 @@ async def test_list_datasets_includes_columns_and_metrics(mock_list, mcp_server)
dataset.database = MagicMock()
dataset.database.database_name = "examples"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -1154,6 +1178,8 @@ async def test_get_dataset_info_by_uuid(mock_find_object, mcp_server):
dataset.table_name = "Test Dataset UUID"
dataset.schema = "main"
dataset.description = "desc"
dataset.certified_by = None
dataset.certification_details = None
dataset.changed_by_name = "admin"
dataset.changed_on = None
dataset.changed_on_humanized = None
@@ -1190,6 +1216,36 @@ async def test_get_dataset_info_by_uuid(mock_find_object, mcp_server):
assert data["table_name"] == "Test Dataset UUID"
class TestDatasetCertificationSerialization:
"""Test certification fields flow through dataset serialization."""
def test_serialize_dataset_with_certification_fields(self):
"""Serializes non-None certification values."""
from superset.mcp_service.dataset.schemas import serialize_dataset_object
dataset = create_mock_dataset()
dataset.certified_by = "Analytics Engineering"
dataset.certification_details = "Production-ready, SLA-backed"
result = serialize_dataset_object(dataset)
assert result is not None
assert result.certified_by == "Analytics Engineering"
assert result.certification_details == "Production-ready, SLA-backed"
def test_serialize_dataset_with_none_certification(self):
"""serialize_dataset_object handles None certification fields."""
from superset.mcp_service.dataset.schemas import serialize_dataset_object
dataset = create_mock_dataset()
result = serialize_dataset_object(dataset)
assert result is not None
assert result.certified_by is None
assert result.certification_details is None
class TestDatasetDefaultColumnFiltering:
"""Test default column filtering behavior for datasets."""
@@ -1197,12 +1253,14 @@ class TestDatasetDefaultColumnFiltering:
"""Test that minimal default columns are properly defined."""
from superset.mcp_service.common.schema_discovery import DATASET_DEFAULT_COLUMNS
# Should have exactly 4 minimal columns
assert len(DATASET_DEFAULT_COLUMNS) == 4
assert set(DATASET_DEFAULT_COLUMNS) == {
"id",
"table_name",
"schema",
"description",
"certified_by",
"certification_details",
"changed_on",
"changed_on_humanized",
}
assert "uuid" not in DATASET_DEFAULT_COLUMNS
@@ -1210,7 +1268,6 @@ class TestDatasetDefaultColumnFiltering:
# Heavy columns should NOT be in defaults
assert "columns" not in DATASET_DEFAULT_COLUMNS
assert "metrics" not in DATASET_DEFAULT_COLUMNS
assert "description" not in DATASET_DEFAULT_COLUMNS
assert "database_name" not in DATASET_DEFAULT_COLUMNS
@patch("superset.daos.dataset.DatasetDAO.list")
@@ -1236,6 +1293,9 @@ class TestDatasetDefaultColumnFiltering:
"id",
"table_name",
"schema",
"description",
"certified_by",
"certification_details",
"changed_on",
"changed_on_humanized",
}
@@ -1324,6 +1384,9 @@ class TestDatasetDefaultColumnFiltering:
"id",
"table_name",
"schema",
"description",
"certified_by",
"certification_details",
"changed_on",
"changed_on_humanized",
}
@@ -1339,7 +1402,6 @@ class TestDatasetDefaultColumnFiltering:
# Verify non-default columns are NOT present (not even with null values)
non_default_columns = [
"description",
"database_name",
"changed_by",
"columns",

View File

@@ -153,16 +153,18 @@ class TestModelSchemaInfo:
def test_chart_default_columns(self):
"""Test chart default columns include required minimal set."""
required_columns = {
assert set(CHART_DEFAULT_COLUMNS) == {
"id",
"slice_name",
"viz_type",
"description",
"certified_by",
"certification_details",
"url",
"changed_on",
"changed_on_humanized",
}
assert required_columns.issubset(set(CHART_DEFAULT_COLUMNS))
# These should NOT be in defaults
assert "description" not in CHART_DEFAULT_COLUMNS
# Heavy columns should NOT be in defaults
assert "form_data" not in CHART_DEFAULT_COLUMNS
assert "uuid" not in CHART_DEFAULT_COLUMNS
@@ -291,12 +293,17 @@ class TestGetSchemaToolViaClient:
assert id_col["type"] == "int"
assert id_col["is_default"] is True
# Find a non-default column (description is on the model)
# description is now a default column
desc_col = next(
(c for c in select_cols if c["name"] == "description"), None
)
assert desc_col is not None
assert desc_col["is_default"] is False
assert desc_col["is_default"] is True
# Find a non-default column (uuid is on the model but not default)
uuid_col = next((c for c in select_cols if c["name"] == "uuid"), None)
assert uuid_col is not None
assert uuid_col["is_default"] is False
class TestGetSchemaEdgeCases: