Compare commits

..

2 Commits

Author SHA1 Message Date
Amin Ghadersohi
22d1d3a878 refactor(mcp): extract _add_xy_limits helper and move series_limit tests
Extract row/series limit assignment into a private helper to keep
map_xy_config within the McCabe complexity threshold, removing the
noqa: C901 suppression. Move series_limit schema tests out of
TestRowLimit and into a dedicated TestSeriesLimit class.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-21 01:13:45 +00:00
Amin Ghadersohi
188fea221d feat(mcp): add series_limit to generate_chart XY config
Add series_limit parameter to XYChartConfig so callers can control
how many distinct series (group_by values) the chart renders.  When
set, the value is forwarded as form_data["series_limit"], which
Superset's echarts timeseries plugins interpret to cap the number of
rendered series lines/bars.
2026-05-21 01:13:45 +00:00
16 changed files with 534 additions and 630 deletions

View File

@@ -62,11 +62,6 @@ updates:
- package-ecosystem: "pip"
directory: "/"
open-pull-requests-limit: 10
# Bump the lower bound to the new version, not just widen the upper
# bound. Without this, a `sqlglot>=28.10.0, <29` constraint upgraded
# to `<30` would keep the stale lower bound forever, dragging
# transitively-resolved versions with it. See #40186 (review thread).
versioning-strategy: increase
schedule:
interval: "weekly"
labels:

File diff suppressed because it is too large Load Diff

View File

@@ -191,7 +191,7 @@
"json-stringify-pretty-compact": "^2.0.0",
"lodash": "^4.18.1",
"mapbox-gl": "^3.24.0",
"markdown-to-jsx": "^9.8.1",
"markdown-to-jsx": "^9.8.0",
"match-sorter": "^8.3.0",
"memoize-one": "^5.2.1",
"mousetrap": "^1.6.5",
@@ -350,7 +350,7 @@
"lightningcss": "^1.32.0",
"mini-css-extract-plugin": "^2.10.2",
"open-cli": "^9.0.0",
"oxlint": "^1.66.0",
"oxlint": "^1.65.0",
"po2json": "^0.4.5",
"prettier": "3.8.3",
"prettier-plugin-packagejson": "^3.0.2",

View File

@@ -29,7 +29,7 @@
"acorn": "^8.16.0",
"d3-array": "^3.2.4",
"lodash": "^4.18.1",
"zod": "^4.4.3"
"zod": "^4.4.1"
},
"peerDependencies": {
"@apache-superset/core": "*",

View File

@@ -97,7 +97,7 @@ export function createWrapper(options?: Options) {
}
if (useDnd) {
// @ts-ignore react-dnd's DndProviderProps omits `children` under React 18 types
// @ts-expect-error react-dnd types not updated for React 18
result = <DndProvider backend={HTML5Backend}>{result}</DndProvider>;
}

View File

@@ -117,7 +117,6 @@ type LaunchQueue = {
const pendingTimerIds = new Set<ReturnType<typeof setTimeout>>();
const MAX_CONSUMER_POLL_ATTEMPTS = 50;
const consumerPromises: Promise<void>[] = [];
// Defer the consumer call to a macrotask so it doesn't fire synchronously inside
// the component's useEffect — calling it inline deadlocks Jest because the
@@ -132,11 +131,7 @@ const setupLaunchQueue = (fileHandle: MockFileHandle | null = null) => {
if (fileHandle) {
const id = setTimeout(() => {
pendingTimerIds.delete(id);
consumerPromises.push(
Promise.resolve(consumer({ files: [fileHandle] })).then(
() => undefined,
),
);
consumer({ files: [fileHandle] });
}, 0);
pendingTimerIds.add(id);
}
@@ -170,19 +165,9 @@ beforeEach(() => {
.launchQueue;
});
afterEach(async () => {
afterEach(() => {
pendingTimerIds.forEach(id => clearTimeout(id));
pendingTimerIds.clear();
if (consumerPromises.length > 0) {
const results = await Promise.allSettled(consumerPromises);
results.forEach(r => {
if (r.status === 'rejected') {
// eslint-disable-next-line no-console
console.warn('LaunchQueue consumer rejected:', r.reason);
}
});
consumerPromises.length = 0;
}
delete (window as unknown as Window & { launchQueue?: LaunchQueue })
.launchQueue;
});

View File

@@ -22,15 +22,7 @@ from typing import Any, TYPE_CHECKING
from flask import current_app
from flask_babel import gettext as _
from marshmallow import (
EXCLUDE,
fields,
post_load,
Schema,
validate,
validates_schema,
ValidationError,
)
from marshmallow import EXCLUDE, fields, post_load, Schema, validate
from marshmallow.validate import Length, Range
from marshmallow_union import Union
@@ -945,42 +937,6 @@ class ChartDataPostProcessingOperationSchema(Schema):
},
)
# Map post-processing operation -> its options schema, for operations that
# declare one. Operations without a dedicated schema are not structurally
# validated here.
_OPTIONS_SCHEMAS: dict[str, type[Schema]] = {
"aggregate": ChartDataAggregateOptionsSchema,
"rolling": ChartDataRollingOptionsSchema,
"select": ChartDataSelectOptionsSchema,
"sort": ChartDataSortOptionsSchema,
"contribution": ChartDataContributionOptionsSchema,
"prophet": ChartDataProphetOptionsSchema,
"boxplot": ChartDataBoxplotOptionsSchema,
"pivot": ChartDataPivotOptionsSchema,
"geohash_decode": ChartDataGeohashDecodeOptionsSchema,
"geohash_encode": ChartDataGeohashEncodeOptionsSchema,
"geodetic_parse": ChartDataGeodeticParseOptionsSchema,
}
@validates_schema
def validate_options(self, data: dict[str, Any], **kwargs: Any) -> None:
"""Validate ``options`` against the operation's option schema.
Validation is lenient (unknown keys are ignored) so it surfaces wrong
types / out-of-range values on declared fields without rejecting
payloads that carry extra keys.
"""
operation = data.get("operation")
options = data.get("options")
if not isinstance(operation, str) or not isinstance(options, dict):
return
schema_cls = self._OPTIONS_SCHEMAS.get(operation)
if schema_cls is None:
return
errors = schema_cls(unknown=EXCLUDE).validate(options)
if errors:
raise ValidationError({"options": errors})
class ChartDataFilterSchema(Schema):
col = fields.Raw(

View File

@@ -21,11 +21,10 @@ import logging
import re
from datetime import datetime
from re import Pattern
from typing import Any, Callable, Optional, TYPE_CHECKING
from typing import Any, Optional, TYPE_CHECKING
from flask_babel import gettext as __
from sqlalchemy import types
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION, ENUM, INTERVAL, JSON
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION, ENUM, JSON
from sqlalchemy.dialects.postgresql.base import PGInspector
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.engine.url import URL
@@ -136,34 +135,6 @@ def parse_options(connect_args: dict[str, Any]) -> dict[str, str]:
return {token[0]: token[1] for token in tokens}
def _normalize_interval(v: Any) -> Optional[float]:
"""Convert PostgreSQL INTERVAL values to milliseconds.
psycopg2 and psycopg3 always return INTERVAL values as datetime.timedelta
objects. We convert to milliseconds so users can apply the built-in
"DURATION" number format for human-readable display (e.g.,
"1d 2h 30m 45s") and so the values participate cleanly in numeric
aggregations in bar/pie charts.
Returns None for the NULL case (preserves NULL semantics) and for any
unexpected non-timedelta type (avoids producing a mixed-type column
when an unfamiliar driver surfaces something other than timedelta).
"""
if v is None:
return None
if hasattr(v, "total_seconds"):
return v.total_seconds() * 1000
# Defensive: psycopg2/3 should always hand us a timedelta. If a future
# driver doesn't, surface the surprise in the logs rather than silently
# dropping the value so operators can diagnose it.
logger.warning(
"Cannot normalize PostgreSQL INTERVAL value of type %s to numeric; "
"returning None.",
type(v).__name__,
)
return None
class PostgresBaseEngineSpec(BaseEngineSpec):
"""Abstract class for Postgres 'like' databases"""
@@ -555,17 +526,8 @@ class PostgresEngineSpec(BasicParametersMixin, PostgresBaseEngineSpec):
ENUM(),
GenericDataType.STRING,
),
(
re.compile(r"^interval", re.IGNORECASE),
INTERVAL(),
GenericDataType.NUMERIC,
),
)
column_type_mutators: dict[types.TypeEngine, Callable[[Any], Any]] = {
INTERVAL: _normalize_interval,
}
@classmethod
def get_schema_from_engine_params(
cls,

View File

@@ -648,6 +648,12 @@ def _resolve_default_x_axis(
return config.model_copy(update={"x": ColumnRef(name=dataset.main_dttm_col)})
def _add_xy_limits(form_data: Dict[str, Any], config: XYChartConfig) -> None:
form_data["row_limit"] = config.row_limit
if config.series_limit is not None:
form_data["series_limit"] = config.series_limit
def map_xy_config(
config: XYChartConfig, dataset_id: int | str | None = None
) -> Dict[str, Any]:
@@ -712,7 +718,7 @@ def map_xy_config(
if x_is_temporal:
_ensure_temporal_adhoc_filter(form_data, config.x.name)
form_data["row_limit"] = config.row_limit
_add_xy_limits(form_data, config)
# Add stacking configuration
if getattr(config, "stacked", False):

View File

@@ -1304,6 +1304,16 @@ class XYChartConfig(UnknownFieldCheckMixin):
"Do NOT use adhoc_filters or raw SQL expressions.",
)
row_limit: int = Field(10000, description="Max data points", ge=1, le=50000)
series_limit: int | None = Field(
None,
description=(
"Max number of series to show when group_by is set. "
"Limits the distinct values rendered as separate lines/bars. "
"Only applies when group_by is specified."
),
ge=1,
le=10000,
)
@field_validator("group_by", mode="before")
@classmethod

View File

@@ -53,11 +53,7 @@ from superset_core.queries.models import (
)
from superset import security_manager
from superset.exceptions import (
SupersetException,
SupersetParseError,
SupersetSecurityException,
)
from superset.exceptions import SupersetParseError, SupersetSecurityException
from superset.explorables.base import TimeGrainDict
from superset.jinja_context import BaseTemplateProcessor, get_template_processor
from superset.models.helpers import (
@@ -103,14 +99,6 @@ class SqlTablesMixin: # pylint: disable=too-few-public-methods
)
except (SupersetSecurityException, SupersetParseError, TemplateError):
return []
except SupersetException as ex:
# Jinja macros such as ``{{ dataset(id) }}`` or ``{{ metric(...) }}``
# may reference resources that no longer exist (e.g. a deleted
# dataset). Surfacing the failure here would break list endpoints
# that include ``sql_tables`` in their payload, hiding every saved
# query from the user. Treat it as a parse failure instead.
logger.warning("Unable to extract tables from SQL via Jinja: %s", ex)
return []
class Query(

View File

@@ -152,53 +152,3 @@ def test_time_grain_validation_with_config_addons(app_context: None) -> None:
}
result = schema.load(custom_data)
assert result["time_grain"] == "PT10M"
def test_post_processing_operation_validates_options(app_context: None) -> None:
"""options are validated against the operation's option schema (leniently)."""
from superset.charts.schemas import ChartDataPostProcessingOperationSchema
schema = ChartDataPostProcessingOperationSchema()
# Valid prophet options load.
schema.load(
{
"operation": "prophet",
"options": {
"time_grain": "P1D",
"periods": 7,
"confidence_interval": 0.8,
},
}
)
# Out-of-range confidence_interval (must be 0-1) on a declared field is
# rejected.
with pytest.raises(ValidationError) as exc_info:
schema.load(
{
"operation": "prophet",
"options": {
"time_grain": "P1D",
"periods": 7,
"confidence_interval": 2.0,
},
}
)
assert "options" in exc_info.value.messages
# Extra/unknown keys are tolerated (lenient validation).
schema.load(
{
"operation": "prophet",
"options": {
"time_grain": "P1D",
"periods": 7,
"confidence_interval": 0.8,
"some_future_option": True,
},
}
)
# An operation without a dedicated schema accepts arbitrary options.
schema.load({"operation": "flatten", "options": {"anything": [1, 2, 3]}})

View File

@@ -15,14 +15,14 @@
# specific language governing permissions and limitations
# under the License.
from datetime import datetime, timedelta
from datetime import datetime
from typing import Any, Optional
from unittest.mock import MagicMock
import pytest
from pytest_mock import MockerFixture
from sqlalchemy import column, types
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION, ENUM, INTERVAL, JSON
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION, ENUM, JSON
from sqlalchemy.engine.interfaces import Dialect
from sqlalchemy.engine.url import make_url
@@ -87,8 +87,6 @@ def test_convert_dttm(
("TIME", types.Time, None, GenericDataType.TEMPORAL, True),
# Boolean
("BOOLEAN", types.Boolean, None, GenericDataType.BOOLEAN, False),
# Interval (mapped to NUMERIC for chart rendering)
("INTERVAL", INTERVAL, None, GenericDataType.NUMERIC, False),
],
)
def test_get_column_spec(
@@ -368,38 +366,3 @@ class TestRedshiftDetection:
spec.update_params_from_encrypted_extra(database, params)
assert "pool_events" not in params
def test_interval_type_mutator() -> None:
"""
DB Eng Specs (postgres): Test INTERVAL type mutator
INTERVAL values are converted to milliseconds so users can apply
the built-in "DURATION" number format for human-readable display.
"""
mutator = spec.column_type_mutators[INTERVAL]
# Timedelta conversion — the only path psycopg2/psycopg3 actually
# exercises. Result is in milliseconds for compatibility with the
# DURATION formatter.
td = timedelta(days=1, hours=2, minutes=30, seconds=45)
assert mutator(td) == 95445000.0 # (1*86400 + 2*3600 + 30*60 + 45) * 1000
# Zero duration
assert mutator(timedelta(0)) == 0.0
# Negative interval
assert mutator(timedelta(days=-1)) == -86400000.0
# None preserves NULL semantics (not converted to 0)
assert mutator(None) is None
# Unexpected non-timedelta types fall through to the defensive
# `return None` (and emit a warning) rather than producing a
# mixed-type column.
assert mutator("1 day 02:30:45") is None
assert mutator("P1DT2H30M45S") is None
assert mutator(12345) is None
assert mutator(True) is None
assert mutator([1, 2, 3]) is None
assert mutator({"days": 1}) is None

View File

@@ -485,6 +485,47 @@ class TestRowLimit:
)
class TestSeriesLimit:
"""Test series_limit field on XYChartConfig."""
def test_xy_chart_series_limit_default_none(self) -> None:
"""Test that XYChartConfig series_limit defaults to None."""
config = XYChartConfig(
chart_type="xy",
x=ColumnRef(name="date"),
y=[ColumnRef(name="revenue", aggregate="SUM")],
)
assert config.series_limit is None
def test_xy_chart_series_limit_custom(self) -> None:
"""Test that XYChartConfig accepts a custom series_limit."""
config = XYChartConfig(
chart_type="xy",
x=ColumnRef(name="date"),
y=[ColumnRef(name="revenue", aggregate="SUM")],
group_by=[ColumnRef(name="region")],
series_limit=5,
)
assert config.series_limit == 5
def test_xy_chart_series_limit_validation(self) -> None:
"""Test that XYChartConfig rejects invalid series_limit values."""
with pytest.raises(ValidationError):
XYChartConfig(
chart_type="xy",
x=ColumnRef(name="date"),
y=[ColumnRef(name="revenue", aggregate="SUM")],
series_limit=0,
)
with pytest.raises(ValidationError):
XYChartConfig(
chart_type="xy",
x=ColumnRef(name="date"),
y=[ColumnRef(name="revenue", aggregate="SUM")],
series_limit=10001,
)
class TestTableChartConfigExtraFields:
"""Test TableChartConfig rejects unknown fields."""

View File

@@ -804,6 +804,38 @@ class TestMapXYConfig:
assert result["row_limit"] == 10000
@patch("superset.mcp_service.chart.chart_utils.is_column_truly_temporal")
def test_map_xy_config_series_limit(self, mock_is_temporal) -> None:
"""Test that series_limit is mapped to form_data when set."""
mock_is_temporal.return_value = True
config = XYChartConfig(
chart_type="xy",
x=ColumnRef(name="date"),
y=[ColumnRef(name="revenue", aggregate="SUM")],
kind="line",
group_by=[ColumnRef(name="region")],
series_limit=10,
)
result = map_xy_config(config)
assert result["series_limit"] == 10
@patch("superset.mcp_service.chart.chart_utils.is_column_truly_temporal")
def test_map_xy_config_no_series_limit_by_default(self, mock_is_temporal) -> None:
"""Test that series_limit is omitted from form_data when not set."""
mock_is_temporal.return_value = True
config = XYChartConfig(
chart_type="xy",
x=ColumnRef(name="date"),
y=[ColumnRef(name="revenue", aggregate="SUM")],
kind="line",
)
result = map_xy_config(config)
assert "series_limit" not in result
@patch("superset.mcp_service.chart.chart_utils.is_column_truly_temporal")
def test_map_xy_config_saved_metric(self, mock_is_temporal: Any) -> None:
"""Test XY config with saved metric emits string in metrics list"""

View File

@@ -21,14 +21,8 @@ from flask_appbuilder import Model
from jinja2.exceptions import TemplateError
from pytest_mock import MockerFixture
from superset.commands.dataset.exceptions import DatasetNotFoundError
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import (
SupersetParseError,
SupersetSecurityException,
SupersetTemplateException,
)
from superset.models import sql_lab as sql_lab_module
from superset.exceptions import SupersetParseError, SupersetSecurityException
from superset.models.sql_lab import Query, SavedQuery
@@ -40,61 +34,34 @@ from superset.models.sql_lab import Query, SavedQuery
],
)
@pytest.mark.parametrize(
("exception", "should_warn"),
"exception",
[
# Original silent handler — security/parse/template errors are
# expected during list rendering and produce no log noise.
(
SupersetSecurityException(
SupersetError(
error_type=SupersetErrorType.QUERY_SECURITY_ACCESS_ERROR,
message="",
level=ErrorLevel.ERROR,
)
),
False,
SupersetSecurityException(
SupersetError(
error_type=SupersetErrorType.QUERY_SECURITY_ACCESS_ERROR,
message="",
level=ErrorLevel.ERROR,
)
),
(
SupersetParseError(
sql="INVALID SQL",
message="Invalid SQL syntax",
),
False,
SupersetParseError(
sql="INVALID SQL",
message="Invalid SQL syntax",
),
(TemplateError, False),
# ``{{ dataset(id) }}`` referencing a deleted dataset previously
# bubbled up through ``sql_tables`` and broke saved-query list
# endpoints (see issue #32771). The new handler swallows it but
# logs a warning so the underlying breakage is still observable —
# pinned here so a future refactor that collapses the case into
# the silent handler fails this test.
(DatasetNotFoundError("Dataset 1 not found!"), True),
(SupersetTemplateException("Template rendering failed"), True),
TemplateError,
],
)
def test_sql_tables_mixin_sql_tables_exception(
klass: type[Model],
exception: Exception,
should_warn: bool,
mocker: MockerFixture,
) -> None:
mocker.patch(
"superset.models.sql_lab.process_jinja_sql",
side_effect=exception,
)
warning_spy = mocker.spy(sql_lab_module.logger, "warning")
assert klass(sql="SELECT 1", database=MagicMock()).sql_tables == []
if should_warn:
assert warning_spy.call_count == 1, (
f"{type(exception).__name__} should hit the warning-logging "
"handler; if this fails, the case was likely collapsed into "
"the silent first-handler clause."
)
else:
warning_spy.assert_not_called()
@pytest.mark.parametrize(
"klass",