Compare commits

..

4 Commits

Author SHA1 Message Date
Elizabeth Thompson
9058b49971 fix(reports): commit permalink before Playwright navigates to tab URL
CreateDashboardPermalinkCommand only flushes the INSERT when invoked
inside an outer @transaction() (the nesting guard skips the commit).
The row is therefore not visible to Playwright's separate DB connection,
causing dashboard_permalink to return a 404, the <body class="standalone">
element to never appear, and the screenshot to time out after 600s.

Commit immediately after CreateDashboardPermalinkCommand.run() returns,
matching the pattern already used by create_log() in the same class.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-15 23:50:30 +00:00
Amin Ghadersohi
c1b5d05f83 fix(bigquery): set default dataset from schema in adjust_engine_params (#40776) 2026-06-15 18:37:06 -04:00
Evan Rusackas
e16bb29faf fix(embedded): allow guests to apply a Time Grain native filter (#32768) (#41017)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
2026-06-15 15:22:21 -07:00
Elizabeth Thompson
09b4bc51a3 fix(charts): rename deprecated query object fields in schema before QueryObject construction (#41056) 2026-06-15 14:45:41 -07:00
10 changed files with 416 additions and 21 deletions

View File

@@ -72,23 +72,20 @@ services:
- -c
- |
url="http://host.docker.internal:9000/static/assets/manifest.json"
max_attempts=300 # ~10 minutes at 2s intervals; first build can be slow
echo "Waiting for webpack dev server at $$url..."
max_attempts=150 # ~5 minutes at 2s intervals
echo "Waiting for webpack dev server at $url..."
attempt=0
until curl -sf --max-time 5 -H "Host: localhost" -o /dev/null "$$url"; do
attempt=$$((attempt + 1))
if [ "$$attempt" -ge "$$max_attempts" ]; then
echo "ERROR: webpack dev server did not serve $$url after $$max_attempts attempts." >&2
until curl -sf --max-time 5 -o /dev/null "$url"; do
attempt=$((attempt + 1))
if [ "$attempt" -ge "$max_attempts" ]; then
echo "ERROR: webpack dev server did not serve $url after $max_attempts attempts (~5 minutes)." >&2
echo "Is the dev server running? With BUILD_SUPERSET_FRONTEND_IN_DOCKER=false you must start it on the host (e.g. 'npm run dev' in superset-frontend)." >&2
exit 1
fi
if [ $$((attempt % 15)) -eq 0 ]; then
echo "Still waiting for webpack dev server... ($$attempt/$$max_attempts)"
fi
sleep 2
done
echo "Webpack dev server is ready; starting nginx."
exec /docker-entrypoint.sh nginx -g 'daemon off;'
exec nginx -g 'daemon off;'
redis:
image: redis:7

View File

@@ -81,17 +81,17 @@ case "${1}" in
app)
echo "Starting web app (using development server)..."
# Always run in Flask debug mode here: this is the dev compose entrypoint,
# and Superset's Talisman selector keys off app.debug to serve the dev CSP
# (which permits 'unsafe-eval' required by React Refresh / HMR).
export FLASK_DEBUG=1
# Werkzeug's interactive debugger (/console) is a separate, security-sensitive
# feature and must be opted into explicitly via SUPERSET_DEBUG_ENABLED=true.
# Environment-based debugger control for security
# Only enable Werkzeug interactive debugger when explicitly requested
# Modern Werkzeug (3.0+) includes PIN protection, but defense-in-depth approach
# Override FLASK_DEBUG so the effective state matches SUPERSET_DEBUG_ENABLED even
# when FLASK_DEBUG=true is inherited from docker/.env or .flaskenv
if [[ "${SUPERSET_DEBUG_ENABLED:-}" == "true" ]]; then
export FLASK_DEBUG=1
DEBUGGER_FLAG="--debugger"
echo " ⚠️ Werkzeug debugger enabled (requires PIN for /console access)"
else
export FLASK_DEBUG=0
DEBUGGER_FLAG="--no-debugger"
echo " 🔒 Werkzeug debugger disabled (set SUPERSET_DEBUG_ENABLED=true to enable)"
fi

View File

@@ -1454,6 +1454,21 @@ class ChartDataQueryObjectSchema(Schema):
allow_none=True,
)
@post_load
def rename_deprecated_fields(
self, data: dict[str, Any], **kwargs: Any
) -> dict[str, Any]:
_renames = (
("groupby", "columns"),
("granularity_sqla", "granularity"),
("timeseries_limit", "series_limit"),
("timeseries_limit_metric", "series_limit_metric"),
)
for old, new in _renames:
if value := data.pop(old, None):
data[new] = value
return data
class ChartDataQueryContextSchema(Schema):
query_context_factory: QueryContextFactory | None = None

View File

@@ -358,6 +358,11 @@ class BaseReportState:
dashboard_id=str(self._report_schedule.dashboard.uuid),
state=dashboard_state,
).run()
# Commit the permalink immediately so Playwright's separate DB connection
# can resolve the URL. CreateDashboardPermalinkCommand only flushes when
# called inside an outer @transaction(), leaving the row invisible to
# other connections until we explicitly commit here.
db.session.commit() # pylint: disable=consider-using-transaction
return get_url_path(
"Superset.dashboard_permalink",

View File

@@ -195,6 +195,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
allows_hidden_cc_in_orderby = True
supports_catalog = supports_dynamic_catalog = supports_cross_catalog_queries = True
supports_dynamic_schema = True
# when editing the database, mask this field in `encrypted_extra`
# pylint: disable=invalid-name
@@ -740,11 +741,41 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
catalog: str | None = None,
schema: str | None = None,
) -> tuple[URL, dict[str, Any]]:
if catalog:
uri = uri.set(host=catalog, database="")
if not uri.host:
# Triple-slash form (e.g., bigquery:///project): project is in database.
default_catalog = uri.database
default_schema = None
else:
# Standard forms: bigquery://project, bigquery://project/dataset
default_catalog = uri.host
default_schema = uri.database or None # coerce empty string to None
uri = uri.set(
host=catalog or default_catalog,
database=schema or default_schema,
)
return uri, connect_args
@classmethod
def get_schema_from_engine_params(
cls,
sqlalchemy_uri: URL,
connect_args: dict[str, Any],
) -> str | None:
"""
Return the default dataset encoded in a ``bigquery://project/dataset`` URI.
The BigQuery SQLAlchemy driver uses the URL ``database`` component as the
default dataset, but only when ``host`` (the project) is also present.
The triple-slash form ``bigquery:///project`` puts the project in
``database`` with no host, so we guard against misidentifying it as a
dataset.
"""
if sqlalchemy_uri.host and sqlalchemy_uri.database:
return sqlalchemy_uri.database
return None
@classmethod
def get_allow_cost_estimate(cls, extra: dict[str, Any]) -> bool:
return True

View File

@@ -342,11 +342,43 @@ PermissionModelView.include_route_methods = {RouteMethod.LIST}
ViewMenuModelView.include_route_methods = {RouteMethod.LIST}
# Keys on an adhoc column/metric that a guest may legitimately change through a
# supported native filter, and which therefore must not count as payload
# tampering. The time grain of a temporal x-axis is baked into its `BASE_AXIS`
# column by `normalizeTimeColumn` on the frontend (it copies
# `extras.time_grain_sqla` onto the column), so a Time Grain filter alters the
# column payload without changing which data is queried.
GUEST_OVERRIDABLE_VALUE_KEYS = frozenset({"timeGrain"})
def _strip_overridable_keys(value: Any) -> Any:
"""
Recursively drop guest-overridable keys from a value.
Adhoc columns/metrics can be nested inside sequences (e.g. an ``orderby``
entry is a ``(column, bool)`` tuple), so the overridable keys must be
stripped at every level rather than only from a top-level dict.
"""
if isinstance(value, dict):
return {
key: _strip_overridable_keys(val)
for key, val in value.items()
if key not in GUEST_OVERRIDABLE_VALUE_KEYS
}
if isinstance(value, (list, tuple)):
return [_strip_overridable_keys(item) for item in value]
return value
def freeze_value(value: Any) -> str:
"""
Used to compare column and metric sets.
Guest-overridable keys (e.g. the time grain baked into a temporal x-axis
column) are dropped so that legitimate native-filter changes don't read as
payload tampering.
"""
return json.dumps(value, sort_keys=True)
return json.dumps(_strip_overridable_keys(value), sort_keys=True)
def _native_filter_allowed_targets(

View File

@@ -305,6 +305,13 @@ class TestBigQueryDbEngineSpec(SupersetTestCase):
@mock.patch("superset.models.core.Database.db_engine_spec", BigQueryEngineSpec)
@mock.patch("sqlalchemy_bigquery._helpers.create_bigquery_client", mock.Mock)
@mock.patch(
"superset.db_engine_specs.bigquery.BigQueryEngineSpec.adjust_engine_params",
new=lambda uri, connect_args, catalog=None, schema=None, **kw: (
uri,
connect_args,
),
)
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_calculated_column_in_order_by(self):
table = self.get_table(name="birth_names")

View File

@@ -213,6 +213,53 @@ def test_chart_data_query_object_schema_time_grain_sqla_validation(
assert result["extras"]["time_grain_sqla"] is None
def test_chart_data_query_object_schema_deprecated_fields_renamed(
app_context: None,
) -> None:
"""Deprecated query object fields are renamed to their canonical names."""
schema = ChartDataQueryObjectSchema()
# groupby alone → becomes columns
result = schema.load({"groupby": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# groupby overwrites columns when both are provided
result = schema.load({"groupby": ["region"], "columns": ["country_name"]})
assert result.get("columns") == ["region"]
assert "groupby" not in result
# empty groupby is discarded; existing columns is preserved
result = schema.load({"groupby": [], "columns": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# null groupby is discarded; existing columns is preserved (allow_none=True)
result = schema.load({"groupby": None, "columns": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# no groupby → columns passes through unchanged
result = schema.load({"columns": ["country_name"]})
assert result.get("columns") == ["country_name"]
assert "groupby" not in result
# granularity_sqla → granularity
result = schema.load({"granularity_sqla": "ds"})
assert result.get("granularity") == "ds"
assert "granularity_sqla" not in result
# timeseries_limit → series_limit
result = schema.load({"timeseries_limit": 5})
assert result.get("series_limit") == 5
assert "timeseries_limit" not in result
# timeseries_limit_metric → series_limit_metric
result = schema.load({"timeseries_limit_metric": "count"})
assert result.get("series_limit_metric") == "count"
assert "timeseries_limit_metric" not in result
@pytest.mark.parametrize(
"app",
[{"TIME_GRAIN_ADDONS": {"PT10M": "10 minutes"}}],

View File

@@ -448,7 +448,92 @@ def test_adjust_engine_params_catalog_as_host() -> None:
{},
catalog="other-project",
)[0]
assert str(uri) == "bigquery://other-project/"
assert uri.host == "other-project"
assert not uri.database # no dataset when only catalog is overridden
def test_adjust_engine_params_schema_as_dataset() -> None:
"""
Test that passing a schema sets it as the BigQuery default dataset.
BigQuery requires table names to be fully qualified (project.dataset.table)
unless a default dataset is set via the URL database component. When schema
is provided, the URL database should be updated so unqualified table names
resolve to schema.table_name.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
url = make_url("bigquery://project")
# Without schema, URL is unchanged
uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
assert str(uri) == "bigquery://project"
# With schema, database component is set to enable default dataset
uri = BigQueryEngineSpec.adjust_engine_params(
url,
{},
schema="my_dataset",
)[0]
assert uri.database == "my_dataset"
# catalog + schema: catalog goes to host, schema goes to database
uri = BigQueryEngineSpec.adjust_engine_params(
url,
{},
catalog="other-project",
schema="my_dataset",
)[0]
assert uri.host == "other-project"
assert uri.database == "my_dataset"
# Triple-slash form (bigquery:///project): project must not be overwritten
triple_slash_url = make_url("bigquery:///my_project")
uri = BigQueryEngineSpec.adjust_engine_params(
triple_slash_url,
{},
schema="my_dataset",
)[0]
assert uri.host == "my_project"
assert uri.database == "my_dataset"
def test_get_schema_from_engine_params() -> None:
"""
Test that get_schema_from_engine_params returns the dataset from
bigquery://project/dataset URIs and None for all other URL forms.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
# Standard form: project in host, dataset in database
assert (
BigQueryEngineSpec.get_schema_from_engine_params(
make_url("bigquery://project/my_dataset"), {}
)
== "my_dataset"
)
# Project-only URI — no default dataset configured
assert (
BigQueryEngineSpec.get_schema_from_engine_params(
make_url("bigquery://project"), {}
)
is None
)
# Triple-slash form — database component is the project, not a dataset
assert (
BigQueryEngineSpec.get_schema_from_engine_params(
make_url("bigquery:///my_project"), {}
)
is None
)
# Bare URI — no project, no dataset
assert (
BigQueryEngineSpec.get_schema_from_engine_params(make_url("bigquery://"), {})
is None
)
def test_get_materialized_view_names() -> None:

View File

@@ -1218,6 +1218,182 @@ def test_query_context_modified_orderby(mocker: MockerFixture) -> None:
assert query_context_modified(query_context)
def test_query_context_modified_time_grain_native_filter(
mocker: MockerFixture,
) -> None:
"""
Test `query_context_modified` when a guest applies a Time Grain native filter.
Reproduces https://github.com/apache/superset/issues/32768.
On a chart that uses a generic x-axis, the selected time grain is baked into the
``BASE_AXIS`` adhoc column as a ``timeGrain`` property (see
``normalizeTimeColumn`` on the frontend, which copies ``extras.time_grain_sqla``
onto the column). A Time Grain native filter is a supported, read-only guest
interaction: it only changes the granularity at which the *same* dimension is
bucketed, never which metrics or columns are queried.
Previously, because the changed time grain travels inside the ``columns``
payload, the subset comparison treated the request as tampering and
``query_context_modified`` returned ``True`` -- so guests hit "Guest user cannot
modify chart payload" whenever they picked a grain other than the chart default.
``freeze_value`` now drops the guest-overridable ``timeGrain`` key before
comparing, so a pure time-grain change is no longer flagged as a modification.
This test guards that behavior.
"""
# The chart was saved with a monthly grain on its x-axis column.
stored_axis_column: AdhocColumn = {
"label": "order_date",
"sqlExpression": "order_date",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
# The guest picked a daily grain via the dashboard Time Grain native filter;
# `normalizeTimeColumn` rewrote the otherwise-identical column accordingly.
requested_axis_column: AdhocColumn = {
**stored_axis_column,
"timeGrain": "P1D",
}
query_context = mocker.MagicMock()
query_context.slice_.id = 42
query_context.slice_.params_dict = {
"metrics": ["count"],
}
query_context.slice_.query_context = json.dumps(
{
"queries": [
{
"columns": [stored_axis_column],
"metrics": ["count"],
}
],
}
)
# Native-filter data requests don't carry the mutated columns at the top level;
# the grain change only shows up inside the query's columns.
query_context.form_data = {
"slice_id": 42,
"metrics": ["count"],
}
query_context.queries = [
QueryObject(
columns=[requested_axis_column],
metrics=["count"],
),
]
assert not query_context_modified(query_context)
def test_query_context_modified_time_grain_with_tampered_column(
mocker: MockerFixture,
) -> None:
"""
Test that relaxing the time grain comparison does not open a tamper hole.
Only the ``timeGrain`` key is guest-overridable. A request that changes the
grain *and* also swaps a non-overridable attribute (here ``sqlExpression``,
which selects which column is queried) must still be flagged as tampering --
otherwise a guest could query an arbitrary column under cover of a Time Grain
filter.
"""
stored_axis_column: AdhocColumn = {
"label": "order_date",
"sqlExpression": "order_date",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
# Guest changes the grain (allowed) but also rewrites the SQL expression to a
# different column (not allowed) -- this must still read as a modification.
tampered_axis_column: AdhocColumn = {
**stored_axis_column,
"sqlExpression": "secret_column",
"timeGrain": "P1D",
}
query_context = mocker.MagicMock()
query_context.slice_.id = 42
query_context.slice_.params_dict = {
"metrics": ["count"],
}
query_context.slice_.query_context = json.dumps(
{
"queries": [
{
"columns": [stored_axis_column],
"metrics": ["count"],
}
],
}
)
query_context.form_data = {
"slice_id": 42,
"metrics": ["count"],
}
query_context.queries = [
QueryObject(
columns=[tampered_axis_column],
metrics=["count"],
),
]
assert query_context_modified(query_context)
def test_query_context_modified_time_grain_in_orderby(
mocker: MockerFixture,
) -> None:
"""
Test `query_context_modified` when the time grain travels inside `orderby`.
Each ``orderby`` entry is an ``(column, bool)`` tuple, so a temporal x-axis
adhoc column carrying the guest-overridable ``timeGrain`` is nested one level
deep rather than sitting at the top level. The overridable key must still be
stripped before comparing, otherwise sorting by the temporal axis would make
a pure time-grain change read as tampering.
"""
stored_axis_column: AdhocColumn = {
"label": "order_date",
"sqlExpression": "order_date",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
requested_axis_column: AdhocColumn = {
**stored_axis_column,
"timeGrain": "P1D",
}
query_context = mocker.MagicMock()
query_context.slice_.id = 42
query_context.slice_.params_dict = {
"metrics": ["count"],
}
query_context.slice_.query_context = json.dumps(
{
"queries": [
{
"orderby": [[stored_axis_column, True]],
"metrics": ["count"],
}
],
}
)
query_context.form_data = {
"slice_id": 42,
"metrics": ["count"],
}
query_context.queries = [
QueryObject(
orderby=[(requested_axis_column, True)],
metrics=["count"],
),
]
assert not query_context_modified(query_context)
def test_get_catalog_perm() -> None:
"""
Test the `get_catalog_perm` method.