Compare commits

..

2 Commits

Author SHA1 Message Date
Beto Dealmeida
137c9fca6d Improvements 2026-05-14 11:18:33 -04:00
Beto Dealmeida
671eed7863 feat(semantic layers): form for semantic layer with single semantic view 2026-05-14 11:02:33 -04:00
23 changed files with 167 additions and 229 deletions

View File

@@ -8,10 +8,6 @@ on:
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
pull-requests: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -9,10 +9,6 @@ on:
branches:
- "master"
permissions:
contents: read
pull-requests: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true

View File

@@ -6,9 +6,6 @@ on:
- "master"
- "[0-9].[0-9]*"
permissions:
contents: read
jobs:
config:
runs-on: ubuntu-24.04

View File

@@ -6,9 +6,6 @@ on:
- "superset-embedded-sdk/**"
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -6,9 +6,6 @@ on:
- "master"
- "[0-9].[0-9]*"
permissions:
contents: read
jobs:
config:
runs-on: ubuntu-24.04

View File

@@ -8,9 +8,6 @@ on:
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
jobs:
validate-all-ghas:

View File

@@ -4,9 +4,6 @@ on:
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -8,10 +8,6 @@ on:
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
pull-requests: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -8,10 +8,6 @@ on:
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
pull-requests: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -6,9 +6,6 @@ on:
paths:
- "helm/**"
permissions:
contents: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -8,10 +8,6 @@ on:
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
pull-requests: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -11,9 +11,6 @@ on:
- "superset-websocket/**"
types: [synchronize, opened, reopened, ready_for_review]
permissions:
contents: read
# cancel previous workflow jobs for PRs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}

View File

@@ -118,7 +118,7 @@ const NewChartButtonContainer = styled.div`
${({ theme }) => css`
display: flex;
justify-content: flex-end;
padding: ${theme.sizeUnit * 3}px ${theme.sizeUnit * 2}px 0;
padding-right: ${theme.sizeUnit * 2}px;
`}
`;

View File

@@ -255,22 +255,107 @@ const EnumNamesRenderer = withJsonFormsControlProps(EnumNamesControl);
const enumNamesEntry = {
// Rank 5: higher than the default string renderer (23) so this fires
// whenever x-enumNames is present, regardless of the underlying type.
// Array-of-enum schemas are handled by ``multiEnumEntry`` below — this
// renderer only targets scalar string/number controls.
tester: rankWith(
5,
schemaMatches(s => {
const names = (s as Record<string, unknown>)['x-enumNames'];
return Array.isArray(names) && (names as unknown[]).length > 0;
}),
and(
schemaMatches(s => {
const names = (s as Record<string, unknown>)['x-enumNames'];
return Array.isArray(names) && (names as unknown[]).length > 0;
}),
schemaMatches(
s => (s as Record<string, unknown>)?.type !== 'array',
),
),
),
renderer: EnumNamesRenderer,
};
/**
* Renderer for ``{type: 'array', items: {enum: [...]}}`` schemas. Renders
* a single Antd Select with ``mode="multiple"`` (tag-style multi-select),
* matching the natural expectation of a "pick several from a list" control.
*
* Without this, the default ``PrimitiveArrayControl`` from the upstream
* library renders an "Add …" button that creates one single-select per
* element — visually wrong for an enum multi-select and unable to display
* ``items.x-enumNames`` labels.
*
* The renderer is dynamic-aware: when the host form is refreshing the
* schema (e.g. compatible options narrowing as the user picks), the Select
* shows a loading indicator without becoming disabled, so the user can
* continue editing while options refresh.
*/
function MultiEnumControl(props: ControlProps) {
const { refreshingSchema } = props.config ?? {};
const arraySchema = props.schema as Record<string, unknown>;
const itemsSchema =
(arraySchema.items as Record<string, unknown>) ??
({} as Record<string, unknown>);
const enumValues = (itemsSchema.enum as unknown[]) ?? [];
const enumNames =
(itemsSchema['x-enumNames'] as string[]) ?? enumValues.map(String);
const options = enumValues.map((value, index) => ({
value: value as string | number,
label: enumNames[index] ?? String(value),
}));
const value = Array.isArray(props.data) ? (props.data as unknown[]) : [];
const tooltip = (props.uischema?.options as Record<string, unknown>)
?.tooltip as string | undefined;
return (
<Form.Item label={props.label} tooltip={tooltip}>
<Select
mode="multiple"
value={value as (string | number)[]}
onChange={next => props.handleChange(props.path, next)}
options={options}
style={{ width: '100%' }}
disabled={!props.enabled}
loading={!!refreshingSchema}
allowClear
optionFilterProp="label"
placeholder={
(props.uischema?.options as Record<string, unknown>)
?.placeholderText as string | undefined
}
/>
</Form.Item>
);
}
const MultiEnumRenderer = withJsonFormsControlProps(MultiEnumControl);
const multiEnumEntry = {
// Rank 35: must beat upstream ``PrimitiveArrayRenderer`` (rank 30) so an
// ``array``/``items.enum`` schema renders as one Antd multi-select tag
// box instead of the "Add" repeater pattern that PrimitiveArray uses.
tester: rankWith(
35,
schemaMatches(s => {
const schema = s as Record<string, unknown>;
if (schema?.type !== 'array') return false;
const items = schema.items as Record<string, unknown> | undefined;
return (
!!items &&
Array.isArray(items.enum) &&
(items.enum as unknown[]).length > 0
);
}),
),
renderer: MultiEnumRenderer,
};
export const renderers = [
...rendererRegistryEntries,
passwordEntry,
constEntry,
readOnlyEntry,
enumNamesEntry,
multiEnumEntry,
dynamicFieldEntry,
];

View File

@@ -254,7 +254,9 @@ export default function AddSemanticViewModal({
!schema?.properties ||
Object.keys(schema.properties).length === 0
) {
// No runtime config needed — fetch views right away
// Preserve top-level runtime metadata (e.g. x-singleView) even when
// there are no form fields, then fetch views right away.
applyRuntimeSchema(schema);
fetchViews(uuid, {}, gen);
} else {
applyRuntimeSchema(schema);
@@ -456,6 +458,32 @@ export default function AddSemanticViewModal({
const viewsDisabled =
loadingViews || (!loadingViews && availableViews.length === 0);
// When ``x-singleView: true`` the runtime form fully describes a single
// semantic view (e.g. a MetricFlow cube). Hide the picker and auto-select
// whatever ``get_semantic_views`` returned so the Add button can fire
// without an extra user click.
const singleViewMode =
(runtimeSchema as Record<string, unknown> | null)?.['x-singleView'] ===
true;
useEffect(() => {
if (!singleViewMode) return;
const namesToAdd = availableViews
.filter(v => !v.already_added)
.map(v => v.name)
.sort((a, b) => a.localeCompare(b))
.slice(0, 1);
setSelectedViewNames(prev => {
if (
prev.length === namesToAdd.length &&
prev.every((n, i) => n === namesToAdd[i])
) {
return prev;
}
return namesToAdd;
});
}, [singleViewMode, availableViews]);
return (
<StandardModal
show={show}
@@ -511,8 +539,12 @@ export default function AddSemanticViewModal({
</>
)}
{/* Semantic Views — always visible once a layer is selected */}
{selectedLayerUuid && !loadingRuntime && (
{/* Semantic Views — always visible once a layer is selected, unless
the runtime schema declares ``x-singleView: true``: extensions
(e.g. MetricFlow cubes) whose runtime form fully describes a
single view set that flag so the picker disappears and the
view is auto-selected when ``get_semantic_views`` returns it. */}
{selectedLayerUuid && !loadingRuntime && !singleViewMode && (
<ModalFormField label={t('Semantic Views')}>
<Select
ariaLabel={t('Semantic views')}

View File

@@ -2072,7 +2072,6 @@ class SqlaTable(
self.database,
self.catalog,
self.schema or default_schema or "",
exclude_dataset_id=self.id,
)
# Add each predicate as a separate cache key component
extra_cache_keys.extend(rls_predicates)

View File

@@ -37,7 +37,6 @@ from superset.mcp_service.chart.schemas import (
MixedTimeseriesChartConfig,
PieChartConfig,
PivotTableChartConfig,
SortByConfig,
TableChartConfig,
XYChartConfig,
)
@@ -467,14 +466,7 @@ def map_table_config(config: TableChartConfig) -> Dict[str, Any]:
_add_adhoc_filters(form_data, config.filters)
if config.sort_by:
form_data["order_by_cols"] = [
json.dumps(
[entry.column, entry.ascending]
if isinstance(entry, SortByConfig)
else [entry, False]
)
for entry in config.sort_by
]
form_data["order_by_cols"] = config.sort_by
form_data["row_limit"] = config.row_limit

View File

@@ -22,7 +22,7 @@ Pydantic schemas for chart-related responses
from __future__ import annotations
import difflib
from datetime import datetime
from datetime import datetime, timezone
from typing import Annotated, Any, Dict, List, Literal, Protocol
import humanize
@@ -50,7 +50,7 @@ from superset.mcp_service.common.cache_schemas import (
OwnedByMeMixin,
QueryCacheControl,
)
from superset.mcp_service.common.error_schemas import ChartGenerationError, MCPBaseError
from superset.mcp_service.common.error_schemas import ChartGenerationError
from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
from superset.mcp_service.privacy import filter_user_directory_fields
from superset.mcp_service.system.schemas import (
@@ -183,8 +183,16 @@ class ChartInfo(BaseModel):
return data
class ChartError(MCPBaseError):
@field_validator("message")
class ChartError(BaseModel):
error: str = Field(..., description="Error message")
error_type: str = Field(..., description="Type of error")
timestamp: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
description="Error timestamp",
)
model_config = ConfigDict(ser_json_timedelta="iso8601")
@field_validator("error")
@classmethod
def sanitize_error_for_llm_context(cls, value: str) -> str:
"""Wrap error text before it is exposed to LLM context."""
@@ -798,30 +806,6 @@ class FilterConfig(BaseModel):
return self
class SortByConfig(BaseModel):
"""Sort specification with explicit direction.
Accepts either this object or a bare column-name string in `sort_by`
lists. Bare strings default to descending, which matches the
sort-by-metric "top N" pattern most commonly used for tables.
"""
model_config = ConfigDict(populate_by_name=True)
column: str = Field(
...,
min_length=1,
max_length=255,
validation_alias=AliasChoices("column", "col"),
description="Column to sort by",
)
ascending: bool = Field(
False,
description="Sort ascending. Defaults to False (descending) to match "
"the typical sort-by-metric top-N use case.",
)
# Actual chart types
class PieChartConfig(UnknownFieldCheckMixin):
model_config = ConfigDict(extra="ignore", populate_by_name=True)
@@ -1208,12 +1192,8 @@ class TableChartConfig(UnknownFieldCheckMixin):
description="Structured filters (column/op/value). "
"Do NOT use adhoc_filters or raw SQL expressions.",
)
sort_by: List[str | SortByConfig] | None = Field(
sort_by: List[str] | None = Field(
None,
description="Columns to sort by. Each entry is either a column-name "
"string (defaults to descending) or a SortByConfig object with "
"explicit `ascending`. Descending matches the sort-by-metric "
"top-N pattern most common for tables.",
validation_alias=AliasChoices("sort_by", "order_by_cols", "order_by"),
)
row_limit: int = Field(1000, description="Max rows returned", ge=1, le=50000)

View File

@@ -19,53 +19,9 @@
Enhanced error schemas for MCP chart generation with contextual information
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Dict, List
from pydantic import BaseModel, computed_field, ConfigDict, Field, model_validator
class MCPBaseError(BaseModel):
"""Base error shape for all MCP tool responses.
Provides a consistent set of fields that every error response includes,
allowing LLM clients to handle errors uniformly regardless of which tool
produced them.
"""
error_type: str = Field(
..., description="Type of error (validation, execution, etc.)"
)
message: str = Field(..., description="Human-readable error message")
timestamp: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
description="Error timestamp",
)
details: str | None = Field(None, description="Detailed error explanation")
suggestions: list[str] = Field(
default_factory=list, description="Actionable suggestions to fix the error"
)
error_code: str | None = Field(
None, description="Unique error code for support reference"
)
model_config = ConfigDict(ser_json_timedelta="iso8601")
@model_validator(mode="before")
@classmethod
def _compat_error_to_message(cls, data: Any) -> Any:
"""Allow construction with error= kwarg for backward compatibility."""
if isinstance(data, dict) and "error" in data and "message" not in data:
data["message"] = data.pop("error")
return data
@computed_field # type: ignore[prop-decorator]
@property
def error(self) -> str:
"""Backward-compatible field: mirrors 'message' in serialized output."""
return self.message
from pydantic import BaseModel, Field
class ColumnSuggestion(BaseModel):
@@ -111,9 +67,13 @@ class DatasetContext(BaseModel):
)
class ChartGenerationError(MCPBaseError):
class ChartGenerationError(BaseModel):
"""Enhanced error response for chart generation failures"""
error_type: str = Field(
..., description="Type of error (validation, execution, etc.)"
)
message: str = Field(..., description="High-level error message")
details: str = Field(..., description="Detailed error explanation")
validation_errors: List[ValidationError] = Field(
default_factory=list, description="Specific field validation errors"
@@ -124,9 +84,15 @@ class ChartGenerationError(MCPBaseError):
query_info: Dict[str, Any] | None = Field(
None, description="Query execution details"
)
suggestions: List[str] = Field(
default_factory=list, description="Actionable suggestions to fix the error"
)
help_url: str | None = Field(
None, description="URL to documentation for this error type"
)
error_code: str | None = Field(
None, description="Unique error code for support reference"
)
class ChartGenerationResponse(BaseModel):

View File

@@ -2060,17 +2060,12 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
default_schema = self.database.get_default_schema(self.catalog)
try:
rls_applied = False
# ``id`` lives on concrete subclasses (e.g. SqlaTable), not on
# ExploreMixin itself. getattr keeps this safe for non-dataset
# subclasses (e.g. SQL Lab Query), which have no RLS to dedupe.
self_id = getattr(self, "id", None)
for statement in parsed_script.statements:
if apply_rls(
self.database,
self.catalog,
self.schema or default_schema or "",
statement,
exclude_dataset_id=self_id,
):
rls_applied = True

View File

@@ -34,16 +34,10 @@ def apply_rls(
catalog: str | None,
schema: str,
parsed_statement: BaseSQLStatement[Any],
exclude_dataset_id: int | None = None,
) -> bool:
"""
Modify statement inplace to ensure RLS rules are applied.
:param exclude_dataset_id: When applying RLS to a virtual dataset's inner SQL,
pass the virtual dataset's id here so its own RLS isn't injected again
on top of the outer-WHERE application (avoids double-apply when the
virtual dataset's table_name collides with a table in its own SQL — for
example, after converting a physical dataset with RLS to virtual).
:returns: True if any RLS predicates were actually applied, False otherwise.
"""
# There are two ways to insert RLS: either replacing the table with a subquery
@@ -61,7 +55,6 @@ def apply_rls(
table,
database,
database.get_default_catalog(),
exclude_dataset_id=exclude_dataset_id,
)
if predicate
]
@@ -75,7 +68,6 @@ def get_predicates_for_table(
table: Table,
database: Database,
default_catalog: str | None,
exclude_dataset_id: int | None = None,
) -> list[str]:
"""
Get the RLS predicates for a table.
@@ -95,21 +87,18 @@ def get_predicates_for_table(
SqlaTable.catalog.is_(None),
)
filters = [
SqlaTable.database_id == database.id,
catalog_predicate,
SqlaTable.schema == table.schema,
SqlaTable.table_name == table.table,
]
# When applying RLS to a virtual dataset's inner SQL, skip a match against
# the dataset itself — its RLS is already applied on the outer WHERE via
# get_sqla_row_level_filters(). Without this, a virtual dataset whose
# table_name happens to equal a table in its own SQL (e.g. after a
# physical→virtual conversion) double-applies its own predicates.
if exclude_dataset_id is not None:
filters.append(SqlaTable.id != exclude_dataset_id)
dataset = db.session.query(SqlaTable).filter(and_(*filters)).one_or_none()
dataset = (
db.session.query(SqlaTable)
.filter(
and_(
SqlaTable.database_id == database.id,
catalog_predicate,
SqlaTable.schema == table.schema,
SqlaTable.table_name == table.table,
)
)
.one_or_none()
)
if not dataset:
return []
@@ -141,7 +130,6 @@ def collect_rls_predicates_for_sql(
database: Database,
catalog: str | None,
schema: str,
exclude_dataset_id: int | None = None,
) -> list[str]:
"""
Collect all RLS predicates that would be applied to tables in the given SQL.
@@ -153,9 +141,6 @@ def collect_rls_predicates_for_sql(
:param database: The database the query runs against
:param catalog: The default catalog for the query
:param schema: The default schema for the query
:param exclude_dataset_id: Mirror of the same parameter on apply_rls — pass
the virtual dataset's id so its self-match is excluded from the cache key
(kept consistent with what's actually applied at query time).
:return: List of RLS predicate strings that would be applied
"""
from superset.sql.parse import SQLScript
@@ -176,7 +161,6 @@ def collect_rls_predicates_for_sql(
table,
database,
default_catalog,
exclude_dataset_id=exclude_dataset_id,
)
}
)

View File

@@ -44,7 +44,6 @@ from superset.mcp_service.chart.schemas import (
ColumnRef,
FilterConfig,
LegendConfig,
SortByConfig,
TableChartConfig,
XYChartConfig,
)
@@ -296,7 +295,7 @@ class TestMapTableConfig:
assert result["adhoc_filters"][2]["comparator"] == ["Sports", "Racing"]
def test_map_table_config_with_sort(self) -> None:
"""Bare strings default to descending."""
"""Test table config mapping with sort"""
config = TableChartConfig(
chart_type="table",
columns=[ColumnRef(name="product")],
@@ -304,26 +303,7 @@ class TestMapTableConfig:
)
result = map_table_config(config)
assert result["order_by_cols"] == ['["product", false]', '["revenue", false]']
def test_map_table_config_with_sort_direction(self) -> None:
"""SortByConfig entries honor the explicit ascending flag."""
config = TableChartConfig(
chart_type="table",
columns=[ColumnRef(name="product")],
sort_by=[
SortByConfig(column="product", ascending=True),
SortByConfig(column="revenue", ascending=False),
"category",
],
)
result = map_table_config(config)
assert result["order_by_cols"] == [
'["product", true]',
'["revenue", false]',
'["category", false]',
]
assert result["order_by_cols"] == ["product", "revenue"]
def test_map_table_config_ag_grid_table(self) -> None:
"""Test table config mapping with AG Grid Interactive Table viz_type"""

View File

@@ -285,18 +285,8 @@ def test_apply_rls(mocker: MockerFixture) -> None:
get_predicates_for_table.assert_has_calls(
[
mocker.call(
Table("t1", "public", "examples"),
database,
"examples",
exclude_dataset_id=None,
),
mocker.call(
Table("t2", "public", "examples"),
database,
"examples",
exclude_dataset_id=None,
),
mocker.call(Table("t1", "public", "examples"), database, "examples"),
mocker.call(Table("t2", "public", "examples"), database, "examples"),
]
)
@@ -339,27 +329,3 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None:
dataset.get_sqla_row_level_filters.assert_called_once_with(
include_global_guest_rls=False
)
def test_get_predicates_for_table_excludes_self(mocker: MockerFixture) -> None:
"""
When ``exclude_dataset_id`` is supplied, the lookup query must add an
``id != exclude_dataset_id`` filter so a virtual dataset whose
``table_name`` matches a table referenced inside its own SQL doesn't get
its own RLS injected into the inner SQL (would double-apply on top of the
outer WHERE). Regression test for the physical→virtual conversion bug.
"""
database = mocker.MagicMock()
db = mocker.patch("superset.utils.rls.db")
db.session.query().filter().one_or_none.return_value = None
table = Table("orders", "public", "examples")
assert (
get_predicates_for_table(table, database, "examples", exclude_dataset_id=42)
== []
)
# The filter call should have received four base filters plus the exclusion
# filter, i.e. five total positional args inside and_().
filter_call = db.session.query().filter.call_args
and_clause = filter_call.args[0]
assert len(and_clause.clauses) == 5