Compare commits

..

7 Commits

Author SHA1 Message Date
Evan
6bc90dc8b9 test(email): add return type annotations to setUp/tearDown
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 15:22:39 -07:00
Evan
0b40d8d438 test(email): restore mutated SMTP config keys in TestEmailSmtp teardown
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:06:16 -07:00
Evan
c76606f48a docs(tests): add docstring to _smtp_config test helper
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:06:16 -07:00
Evan
5288083a79 chore(config): correct SMTP_SSL_SERVER_AUTH comment to SERVER_AUTH
ssl.create_default_context() defaults to ssl.Purpose.SERVER_AUTH, which is
correct for Superset acting as an SMTP client verifying the mail server's
certificate. The comment incorrectly referenced CLIENT_AUTH.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:06:16 -07:00
Evan
7fc50614e8 test(config): cover SMTP_SSL_SERVER_AUTH enabled behavior
Add unit tests in config_test.py that exercise send_mime_email and assert
ssl.create_default_context() is called and its context is threaded through
to SMTP_SSL and starttls when SMTP_SSL_SERVER_AUTH=True, plus the opt-out
path passing context=None. Complements the existing module-level default
assertion.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:06:16 -07:00
Evan
1c994194e3 test(email): explicitly opt out of SMTP_SSL_SERVER_AUTH in test_send_mime_ssl
The new default for SMTP_SSL_SERVER_AUTH is True. test_send_mime_ssl
tests the no-server-auth code path and must explicitly set the flag to
False to avoid asserting context=None when the default now produces an
SSL context.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-15 14:06:16 -07:00
Claude Code
6b466225fb chore(config): default SMTP_SSL_SERVER_AUTH to True
Change the shipped default for SMTP_SSL_SERVER_AUTH from False to True so
STARTTLS/SSL connections to the SMTP server validate the server's TLS
certificate against the system CA store out of the box.

The setting remains overridable: operators using a self-signed or otherwise
untrusted certificate can restore the previous behavior by setting
SMTP_SSL_SERVER_AUTH = False in superset_config.py.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:06:12 -07:00
12 changed files with 155 additions and 406 deletions

View File

@@ -135,6 +135,18 @@ Runbook to adopt:
2. Set that value on the tunnel's `server_host_key` (via the database/SSH tunnel API or UI payload).
3. Optionally set `SSH_TUNNEL_STRICT_HOST_KEY_CHECKING = True` in `superset_config.py` to require host-key verification on all tunnels.
### SMTP server certificate validation enabled by default
`SMTP_SSL_SERVER_AUTH` now defaults to `True` (previously `False`). With this default, STARTTLS/SSL connections to the configured SMTP server validate the server's TLS certificate against the system trusted CA store. This makes outbound email (alerts and reports) verify the mail server's identity out of the box.
If your SMTP server presents a self-signed certificate, or a certificate that is not trusted by the system CA store, email delivery may now fail with a certificate verification error. To restore the previous behavior of skipping certificate validation, set the following in `superset_config.py`:
```python
SMTP_SSL_SERVER_AUTH = False
```
The recommended fix is to add the SMTP server's certificate (or its issuing CA) to the system trust store rather than disabling validation.
### Dataset import validates catalog against the target connection
Importing a dataset now validates the `catalog` field against the target database connection. When the connection has multi-catalog disabled (`allow_multi_catalog` off) and the dataset's catalog is not the connection's default catalog, the import fails instead of silently persisting the non-default catalog. This matches the validation already enforced on the dataset update path and prevents imported datasets from querying an unintended database.

View File

@@ -1454,21 +1454,6 @@ class ChartDataQueryObjectSchema(Schema):
allow_none=True,
)
@post_load
def rename_deprecated_fields(
self, data: dict[str, Any], **kwargs: Any
) -> dict[str, Any]:
_renames = (
("groupby", "columns"),
("granularity_sqla", "granularity"),
("timeseries_limit", "series_limit"),
("timeseries_limit_metric", "series_limit_metric"),
)
for old, new in _renames:
if value := data.pop(old, None):
data[new] = value
return data
class ChartDataQueryContextSchema(Schema):
query_context_factory: QueryContextFactory | None = None

View File

@@ -358,11 +358,6 @@ class BaseReportState:
dashboard_id=str(self._report_schedule.dashboard.uuid),
state=dashboard_state,
).run()
# Commit the permalink immediately so Playwright's separate DB connection
# can resolve the URL. CreateDashboardPermalinkCommand only flushes when
# called inside an outer @transaction(), leaving the row invisible to
# other connections until we explicitly commit here.
db.session.commit() # pylint: disable=consider-using-transaction
return get_url_path(
"Superset.dashboard_permalink",

View File

@@ -1731,9 +1731,14 @@ SMTP_USER = "superset"
SMTP_PORT = 25
SMTP_PASSWORD = "superset" # noqa: S105
SMTP_MAIL_FROM = "superset@superset.com"
# If True creates a default SSL context with ssl.Purpose.CLIENT_AUTH using the
# default system root CA certificates.
SMTP_SSL_SERVER_AUTH = False
# If True creates a default SSL context with ssl.Purpose.SERVER_AUTH using the
# default system root CA certificates. This makes STARTTLS/SSL connections to the
# SMTP server validate the server's certificate against the trusted CA store.
# Defaults to True so the mail server identity is verified out of the box. Set to
# False to restore the previous behavior of skipping certificate validation (for
# example, when using a self-signed certificate that is not in the system CA
# store).
SMTP_SSL_SERVER_AUTH = True
ENABLE_CHUNK_ENCODING = False
# Whether to bump the logging level to ERROR on the flask_appbuilder package

View File

@@ -195,7 +195,6 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
allows_hidden_cc_in_orderby = True
supports_catalog = supports_dynamic_catalog = supports_cross_catalog_queries = True
supports_dynamic_schema = True
# when editing the database, mask this field in `encrypted_extra`
# pylint: disable=invalid-name
@@ -741,41 +740,11 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
catalog: str | None = None,
schema: str | None = None,
) -> tuple[URL, dict[str, Any]]:
if not uri.host:
# Triple-slash form (e.g., bigquery:///project): project is in database.
default_catalog = uri.database
default_schema = None
else:
# Standard forms: bigquery://project, bigquery://project/dataset
default_catalog = uri.host
default_schema = uri.database or None # coerce empty string to None
uri = uri.set(
host=catalog or default_catalog,
database=schema or default_schema,
)
if catalog:
uri = uri.set(host=catalog, database="")
return uri, connect_args
@classmethod
def get_schema_from_engine_params(
cls,
sqlalchemy_uri: URL,
connect_args: dict[str, Any],
) -> str | None:
"""
Return the default dataset encoded in a ``bigquery://project/dataset`` URI.
The BigQuery SQLAlchemy driver uses the URL ``database`` component as the
default dataset, but only when ``host`` (the project) is also present.
The triple-slash form ``bigquery:///project`` puts the project in
``database`` with no host, so we guard against misidentifying it as a
dataset.
"""
if sqlalchemy_uri.host and sqlalchemy_uri.database:
return sqlalchemy_uri.database
return None
@classmethod
def get_allow_cost_estimate(cls, extra: dict[str, Any]) -> bool:
return True

View File

@@ -342,43 +342,11 @@ PermissionModelView.include_route_methods = {RouteMethod.LIST}
ViewMenuModelView.include_route_methods = {RouteMethod.LIST}
# Keys on an adhoc column/metric that a guest may legitimately change through a
# supported native filter, and which therefore must not count as payload
# tampering. The time grain of a temporal x-axis is baked into its `BASE_AXIS`
# column by `normalizeTimeColumn` on the frontend (it copies
# `extras.time_grain_sqla` onto the column), so a Time Grain filter alters the
# column payload without changing which data is queried.
GUEST_OVERRIDABLE_VALUE_KEYS = frozenset({"timeGrain"})
def _strip_overridable_keys(value: Any) -> Any:
"""
Recursively drop guest-overridable keys from a value.
Adhoc columns/metrics can be nested inside sequences (e.g. an ``orderby``
entry is a ``(column, bool)`` tuple), so the overridable keys must be
stripped at every level rather than only from a top-level dict.
"""
if isinstance(value, dict):
return {
key: _strip_overridable_keys(val)
for key, val in value.items()
if key not in GUEST_OVERRIDABLE_VALUE_KEYS
}
if isinstance(value, (list, tuple)):
return [_strip_overridable_keys(item) for item in value]
return value
def freeze_value(value: Any) -> str:
"""
Used to compare column and metric sets.
Guest-overridable keys (e.g. the time grain baked into a temporal x-axis
column) are dropped so that legitimate native-filter changes don't read as
payload tampering.
"""
return json.dumps(_strip_overridable_keys(value), sort_keys=True)
return json.dumps(value, sort_keys=True)
def _native_filter_allowed_targets(

View File

@@ -305,13 +305,6 @@ class TestBigQueryDbEngineSpec(SupersetTestCase):
@mock.patch("superset.models.core.Database.db_engine_spec", BigQueryEngineSpec)
@mock.patch("sqlalchemy_bigquery._helpers.create_bigquery_client", mock.Mock)
@mock.patch(
"superset.db_engine_specs.bigquery.BigQueryEngineSpec.adjust_engine_params",
new=lambda uri, connect_args, catalog=None, schema=None, **kw: (
uri,
connect_args,
),
)
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_calculated_column_in_order_by(self):
table = self.get_table(name="birth_names")

View File

@@ -37,9 +37,18 @@ logger = logging.getLogger(__name__)
class TestEmailSmtp(SupersetTestCase):
def setUp(self):
SMTP_CONFIG_KEYS = ("SMTP_SSL", "SMTP_SSL_SERVER_AUTH", "SMTP_STARTTLS")
def setUp(self) -> None:
self._original_smtp_config = {
key: current_app.config[key] for key in self.SMTP_CONFIG_KEYS
}
current_app.config["SMTP_SSL"] = False
def tearDown(self) -> None:
current_app.config.update(self._original_smtp_config)
super().tearDown()
@mock.patch("superset.utils.core.send_mime_email")
def test_send_smtp(self, mock_send_mime):
attachment = tempfile.NamedTemporaryFile()
@@ -208,6 +217,7 @@ class TestEmailSmtp(SupersetTestCase):
@mock.patch("smtplib.SMTP")
def test_send_mime_ssl(self, mock_smtp, mock_smtp_ssl):
current_app.config["SMTP_SSL"] = True
current_app.config["SMTP_SSL_SERVER_AUTH"] = False
mock_smtp.return_value = mock.Mock()
mock_smtp_ssl.return_value = mock.Mock()
utils.send_mime_email(

View File

@@ -213,53 +213,6 @@ def test_chart_data_query_object_schema_time_grain_sqla_validation(
assert result["extras"]["time_grain_sqla"] is None
def test_chart_data_query_object_schema_deprecated_fields_renamed(
app_context: None,
) -> None:
"""Deprecated query object fields are renamed to their canonical names."""
schema = ChartDataQueryObjectSchema()
# groupby alone → becomes columns
result = schema.load({"groupby": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# groupby overwrites columns when both are provided
result = schema.load({"groupby": ["region"], "columns": ["country_name"]})
assert result.get("columns") == ["region"]
assert "groupby" not in result
# empty groupby is discarded; existing columns is preserved
result = schema.load({"groupby": [], "columns": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# null groupby is discarded; existing columns is preserved (allow_none=True)
result = schema.load({"groupby": None, "columns": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# no groupby → columns passes through unchanged
result = schema.load({"columns": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# granularity_sqla → granularity
result = schema.load({"granularity_sqla": "ds"})
assert result.get("granularity") == "ds"
assert "granularity_sqla" not in result
# timeseries_limit → series_limit
result = schema.load({"timeseries_limit": 5})
assert result.get("series_limit") == 5
assert "timeseries_limit" not in result
# timeseries_limit_metric → series_limit_metric
result = schema.load({"timeseries_limit_metric": "count"})
assert result.get("series_limit_metric") == "count"
assert "timeseries_limit_metric" not in result
@pytest.mark.parametrize(
"app",
[{"TIME_GRAIN_ADDONS": {"PT10M": "10 minutes"}}],

View File

@@ -312,3 +312,123 @@ def test_full_setting(
assert dttm_col.is_dttm
assert dttm_col.python_date_format == "epoch_s"
assert dttm_col.expression == "CAST(dttm as INTEGER)"
def test_smtp_ssl_server_auth_defaults_to_true() -> None:
"""
The shipped default for SMTP_SSL_SERVER_AUTH validates the SMTP server's
TLS certificate. Operators can still opt out by overriding it to False.
"""
from superset import config
assert config.SMTP_SSL_SERVER_AUTH is True
def _smtp_config(**overrides: Any) -> dict[str, Any]:
"""
Build a minimal SMTP config dict for ``send_mime_email`` tests, with
plaintext transport defaults; keyword ``overrides`` replace any key.
"""
config = {
"SMTP_HOST": "localhost",
"SMTP_PORT": 25,
"SMTP_USER": "",
"SMTP_PASSWORD": "",
"SMTP_STARTTLS": False,
"SMTP_SSL": False,
"SMTP_SSL_SERVER_AUTH": True,
}
config.update(overrides)
return config
def test_send_mime_email_ssl_server_auth_passes_context(
mocker: MockerFixture,
) -> None:
"""
With SMTP_SSL and SMTP_SSL_SERVER_AUTH enabled, ``send_mime_email`` builds a
default SSL context and threads it through to ``smtplib.SMTP_SSL`` so the
server certificate is validated.
"""
from email.mime.multipart import MIMEMultipart
from superset.utils import core as utils
create_default_context = mocker.patch(
"superset.utils.core.ssl.create_default_context"
)
smtp_ssl = mocker.patch("smtplib.SMTP_SSL")
smtp = mocker.patch("smtplib.SMTP")
utils.send_mime_email(
"from",
["to"],
MIMEMultipart(),
_smtp_config(SMTP_SSL=True, SMTP_SSL_SERVER_AUTH=True),
dryrun=False,
)
create_default_context.assert_called_once_with()
assert not smtp.called
smtp_ssl.assert_called_once_with(
"localhost", 25, context=create_default_context.return_value
)
def test_send_mime_email_starttls_server_auth_passes_context(
mocker: MockerFixture,
) -> None:
"""
With STARTTLS and SMTP_SSL_SERVER_AUTH enabled, ``send_mime_email`` builds a
default SSL context and threads it through to ``starttls`` so the server
certificate is validated.
"""
from email.mime.multipart import MIMEMultipart
from superset.utils import core as utils
create_default_context = mocker.patch(
"superset.utils.core.ssl.create_default_context"
)
smtp = mocker.patch("smtplib.SMTP")
utils.send_mime_email(
"from",
["to"],
MIMEMultipart(),
_smtp_config(SMTP_STARTTLS=True, SMTP_SSL_SERVER_AUTH=True),
dryrun=False,
)
create_default_context.assert_called_once_with()
smtp.return_value.starttls.assert_called_once_with(
context=create_default_context.return_value
)
def test_send_mime_email_server_auth_disabled_skips_context(
mocker: MockerFixture,
) -> None:
"""
When SMTP_SSL_SERVER_AUTH is disabled no SSL context is built and ``None`` is
passed through, preserving the opt-out (certificate validation skipped).
"""
from email.mime.multipart import MIMEMultipart
from superset.utils import core as utils
create_default_context = mocker.patch(
"superset.utils.core.ssl.create_default_context"
)
smtp_ssl = mocker.patch("smtplib.SMTP_SSL")
utils.send_mime_email(
"from",
["to"],
MIMEMultipart(),
_smtp_config(SMTP_SSL=True, SMTP_SSL_SERVER_AUTH=False),
dryrun=False,
)
assert not create_default_context.called
smtp_ssl.assert_called_once_with("localhost", 25, context=None)

View File

@@ -448,92 +448,7 @@ def test_adjust_engine_params_catalog_as_host() -> None:
{},
catalog="other-project",
)[0]
assert uri.host == "other-project"
assert not uri.database # no dataset when only catalog is overridden
def test_adjust_engine_params_schema_as_dataset() -> None:
"""
Test that passing a schema sets it as the BigQuery default dataset.
BigQuery requires table names to be fully qualified (project.dataset.table)
unless a default dataset is set via the URL database component. When schema
is provided, the URL database should be updated so unqualified table names
resolve to schema.table_name.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
url = make_url("bigquery://project")
# Without schema, URL is unchanged
uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
assert str(uri) == "bigquery://project"
# With schema, database component is set to enable default dataset
uri = BigQueryEngineSpec.adjust_engine_params(
url,
{},
schema="my_dataset",
)[0]
assert uri.database == "my_dataset"
# catalog + schema: catalog goes to host, schema goes to database
uri = BigQueryEngineSpec.adjust_engine_params(
url,
{},
catalog="other-project",
schema="my_dataset",
)[0]
assert uri.host == "other-project"
assert uri.database == "my_dataset"
# Triple-slash form (bigquery:///project): project must not be overwritten
triple_slash_url = make_url("bigquery:///my_project")
uri = BigQueryEngineSpec.adjust_engine_params(
triple_slash_url,
{},
schema="my_dataset",
)[0]
assert uri.host == "my_project"
assert uri.database == "my_dataset"
def test_get_schema_from_engine_params() -> None:
"""
Test that get_schema_from_engine_params returns the dataset from
bigquery://project/dataset URIs and None for all other URL forms.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
# Standard form: project in host, dataset in database
assert (
BigQueryEngineSpec.get_schema_from_engine_params(
make_url("bigquery://project/my_dataset"), {}
)
== "my_dataset"
)
# Project-only URI — no default dataset configured
assert (
BigQueryEngineSpec.get_schema_from_engine_params(
make_url("bigquery://project"), {}
)
is None
)
# Triple-slash form — database component is the project, not a dataset
assert (
BigQueryEngineSpec.get_schema_from_engine_params(
make_url("bigquery:///my_project"), {}
)
is None
)
# Bare URI — no project, no dataset
assert (
BigQueryEngineSpec.get_schema_from_engine_params(make_url("bigquery://"), {})
is None
)
assert str(uri) == "bigquery://other-project/"
def test_get_materialized_view_names() -> None:

View File

@@ -1218,182 +1218,6 @@ def test_query_context_modified_orderby(mocker: MockerFixture) -> None:
assert query_context_modified(query_context)
def test_query_context_modified_time_grain_native_filter(
mocker: MockerFixture,
) -> None:
"""
Test `query_context_modified` when a guest applies a Time Grain native filter.
Reproduces https://github.com/apache/superset/issues/32768.
On a chart that uses a generic x-axis, the selected time grain is baked into the
``BASE_AXIS`` adhoc column as a ``timeGrain`` property (see
``normalizeTimeColumn`` on the frontend, which copies ``extras.time_grain_sqla``
onto the column). A Time Grain native filter is a supported, read-only guest
interaction: it only changes the granularity at which the *same* dimension is
bucketed, never which metrics or columns are queried.
Previously, because the changed time grain travels inside the ``columns``
payload, the subset comparison treated the request as tampering and
``query_context_modified`` returned ``True`` -- so guests hit "Guest user cannot
modify chart payload" whenever they picked a grain other than the chart default.
``freeze_value`` now drops the guest-overridable ``timeGrain`` key before
comparing, so a pure time-grain change is no longer flagged as a modification.
This test guards that behavior.
"""
# The chart was saved with a monthly grain on its x-axis column.
stored_axis_column: AdhocColumn = {
"label": "order_date",
"sqlExpression": "order_date",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
# The guest picked a daily grain via the dashboard Time Grain native filter;
# `normalizeTimeColumn` rewrote the otherwise-identical column accordingly.
requested_axis_column: AdhocColumn = {
**stored_axis_column,
"timeGrain": "P1D",
}
query_context = mocker.MagicMock()
query_context.slice_.id = 42
query_context.slice_.params_dict = {
"metrics": ["count"],
}
query_context.slice_.query_context = json.dumps(
{
"queries": [
{
"columns": [stored_axis_column],
"metrics": ["count"],
}
],
}
)
# Native-filter data requests don't carry the mutated columns at the top level;
# the grain change only shows up inside the query's columns.
query_context.form_data = {
"slice_id": 42,
"metrics": ["count"],
}
query_context.queries = [
QueryObject(
columns=[requested_axis_column],
metrics=["count"],
),
]
assert not query_context_modified(query_context)
def test_query_context_modified_time_grain_with_tampered_column(
mocker: MockerFixture,
) -> None:
"""
Test that relaxing the time grain comparison does not open a tamper hole.
Only the ``timeGrain`` key is guest-overridable. A request that changes the
grain *and* also swaps a non-overridable attribute (here ``sqlExpression``,
which selects which column is queried) must still be flagged as tampering --
otherwise a guest could query an arbitrary column under cover of a Time Grain
filter.
"""
stored_axis_column: AdhocColumn = {
"label": "order_date",
"sqlExpression": "order_date",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
# Guest changes the grain (allowed) but also rewrites the SQL expression to a
# different column (not allowed) -- this must still read as a modification.
tampered_axis_column: AdhocColumn = {
**stored_axis_column,
"sqlExpression": "secret_column",
"timeGrain": "P1D",
}
query_context = mocker.MagicMock()
query_context.slice_.id = 42
query_context.slice_.params_dict = {
"metrics": ["count"],
}
query_context.slice_.query_context = json.dumps(
{
"queries": [
{
"columns": [stored_axis_column],
"metrics": ["count"],
}
],
}
)
query_context.form_data = {
"slice_id": 42,
"metrics": ["count"],
}
query_context.queries = [
QueryObject(
columns=[tampered_axis_column],
metrics=["count"],
),
]
assert query_context_modified(query_context)
def test_query_context_modified_time_grain_in_orderby(
mocker: MockerFixture,
) -> None:
"""
Test `query_context_modified` when the time grain travels inside `orderby`.
Each ``orderby`` entry is an ``(column, bool)`` tuple, so a temporal x-axis
adhoc column carrying the guest-overridable ``timeGrain`` is nested one level
deep rather than sitting at the top level. The overridable key must still be
stripped before comparing, otherwise sorting by the temporal axis would make
a pure time-grain change read as tampering.
"""
stored_axis_column: AdhocColumn = {
"label": "order_date",
"sqlExpression": "order_date",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
requested_axis_column: AdhocColumn = {
**stored_axis_column,
"timeGrain": "P1D",
}
query_context = mocker.MagicMock()
query_context.slice_.id = 42
query_context.slice_.params_dict = {
"metrics": ["count"],
}
query_context.slice_.query_context = json.dumps(
{
"queries": [
{
"orderby": [[stored_axis_column, True]],
"metrics": ["count"],
}
],
}
)
query_context.form_data = {
"slice_id": 42,
"metrics": ["count"],
}
query_context.queries = [
QueryObject(
orderby=[(requested_axis_column, True)],
metrics=["count"],
),
]
assert not query_context_modified(query_context)
def test_get_catalog_perm() -> None:
"""
Test the `get_catalog_perm` method.