mirror of
https://github.com/apache/superset.git
synced 2026-06-16 21:19:18 +00:00
Compare commits
4 Commits
fix-docker
...
fix-report
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9058b49971 | ||
|
|
c1b5d05f83 | ||
|
|
e16bb29faf | ||
|
|
09b4bc51a3 |
@@ -72,23 +72,20 @@ services:
|
||||
- -c
|
||||
- |
|
||||
url="http://host.docker.internal:9000/static/assets/manifest.json"
|
||||
max_attempts=300 # ~10 minutes at 2s intervals; first build can be slow
|
||||
echo "Waiting for webpack dev server at $$url..."
|
||||
max_attempts=150 # ~5 minutes at 2s intervals
|
||||
echo "Waiting for webpack dev server at $url..."
|
||||
attempt=0
|
||||
until curl -sf --max-time 5 -H "Host: localhost" -o /dev/null "$$url"; do
|
||||
attempt=$$((attempt + 1))
|
||||
if [ "$$attempt" -ge "$$max_attempts" ]; then
|
||||
echo "ERROR: webpack dev server did not serve $$url after $$max_attempts attempts." >&2
|
||||
until curl -sf --max-time 5 -o /dev/null "$url"; do
|
||||
attempt=$((attempt + 1))
|
||||
if [ "$attempt" -ge "$max_attempts" ]; then
|
||||
echo "ERROR: webpack dev server did not serve $url after $max_attempts attempts (~5 minutes)." >&2
|
||||
echo "Is the dev server running? With BUILD_SUPERSET_FRONTEND_IN_DOCKER=false you must start it on the host (e.g. 'npm run dev' in superset-frontend)." >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ $$((attempt % 15)) -eq 0 ]; then
|
||||
echo "Still waiting for webpack dev server... ($$attempt/$$max_attempts)"
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "Webpack dev server is ready; starting nginx."
|
||||
exec /docker-entrypoint.sh nginx -g 'daemon off;'
|
||||
exec nginx -g 'daemon off;'
|
||||
|
||||
redis:
|
||||
image: redis:7
|
||||
|
||||
@@ -81,17 +81,17 @@ case "${1}" in
|
||||
app)
|
||||
echo "Starting web app (using development server)..."
|
||||
|
||||
# Always run in Flask debug mode here: this is the dev compose entrypoint,
|
||||
# and Superset's Talisman selector keys off app.debug to serve the dev CSP
|
||||
# (which permits 'unsafe-eval' required by React Refresh / HMR).
|
||||
export FLASK_DEBUG=1
|
||||
|
||||
# Werkzeug's interactive debugger (/console) is a separate, security-sensitive
|
||||
# feature and must be opted into explicitly via SUPERSET_DEBUG_ENABLED=true.
|
||||
# Environment-based debugger control for security
|
||||
# Only enable Werkzeug interactive debugger when explicitly requested
|
||||
# Modern Werkzeug (3.0+) includes PIN protection, but defense-in-depth approach
|
||||
# Override FLASK_DEBUG so the effective state matches SUPERSET_DEBUG_ENABLED even
|
||||
# when FLASK_DEBUG=true is inherited from docker/.env or .flaskenv
|
||||
if [[ "${SUPERSET_DEBUG_ENABLED:-}" == "true" ]]; then
|
||||
export FLASK_DEBUG=1
|
||||
DEBUGGER_FLAG="--debugger"
|
||||
echo " ⚠️ Werkzeug debugger enabled (requires PIN for /console access)"
|
||||
else
|
||||
export FLASK_DEBUG=0
|
||||
DEBUGGER_FLAG="--no-debugger"
|
||||
echo " 🔒 Werkzeug debugger disabled (set SUPERSET_DEBUG_ENABLED=true to enable)"
|
||||
fi
|
||||
|
||||
@@ -1454,6 +1454,21 @@ class ChartDataQueryObjectSchema(Schema):
|
||||
allow_none=True,
|
||||
)
|
||||
|
||||
@post_load
|
||||
def rename_deprecated_fields(
|
||||
self, data: dict[str, Any], **kwargs: Any
|
||||
) -> dict[str, Any]:
|
||||
_renames = (
|
||||
("groupby", "columns"),
|
||||
("granularity_sqla", "granularity"),
|
||||
("timeseries_limit", "series_limit"),
|
||||
("timeseries_limit_metric", "series_limit_metric"),
|
||||
)
|
||||
for old, new in _renames:
|
||||
if value := data.pop(old, None):
|
||||
data[new] = value
|
||||
return data
|
||||
|
||||
|
||||
class ChartDataQueryContextSchema(Schema):
|
||||
query_context_factory: QueryContextFactory | None = None
|
||||
|
||||
@@ -358,6 +358,11 @@ class BaseReportState:
|
||||
dashboard_id=str(self._report_schedule.dashboard.uuid),
|
||||
state=dashboard_state,
|
||||
).run()
|
||||
# Commit the permalink immediately so Playwright's separate DB connection
|
||||
# can resolve the URL. CreateDashboardPermalinkCommand only flushes when
|
||||
# called inside an outer @transaction(), leaving the row invisible to
|
||||
# other connections until we explicitly commit here.
|
||||
db.session.commit() # pylint: disable=consider-using-transaction
|
||||
|
||||
return get_url_path(
|
||||
"Superset.dashboard_permalink",
|
||||
|
||||
@@ -195,6 +195,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
allows_hidden_cc_in_orderby = True
|
||||
|
||||
supports_catalog = supports_dynamic_catalog = supports_cross_catalog_queries = True
|
||||
supports_dynamic_schema = True
|
||||
|
||||
# when editing the database, mask this field in `encrypted_extra`
|
||||
# pylint: disable=invalid-name
|
||||
@@ -740,11 +741,41 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
catalog: str | None = None,
|
||||
schema: str | None = None,
|
||||
) -> tuple[URL, dict[str, Any]]:
|
||||
if catalog:
|
||||
uri = uri.set(host=catalog, database="")
|
||||
if not uri.host:
|
||||
# Triple-slash form (e.g., bigquery:///project): project is in database.
|
||||
default_catalog = uri.database
|
||||
default_schema = None
|
||||
else:
|
||||
# Standard forms: bigquery://project, bigquery://project/dataset
|
||||
default_catalog = uri.host
|
||||
default_schema = uri.database or None # coerce empty string to None
|
||||
|
||||
uri = uri.set(
|
||||
host=catalog or default_catalog,
|
||||
database=schema or default_schema,
|
||||
)
|
||||
|
||||
return uri, connect_args
|
||||
|
||||
@classmethod
|
||||
def get_schema_from_engine_params(
|
||||
cls,
|
||||
sqlalchemy_uri: URL,
|
||||
connect_args: dict[str, Any],
|
||||
) -> str | None:
|
||||
"""
|
||||
Return the default dataset encoded in a ``bigquery://project/dataset`` URI.
|
||||
|
||||
The BigQuery SQLAlchemy driver uses the URL ``database`` component as the
|
||||
default dataset, but only when ``host`` (the project) is also present.
|
||||
The triple-slash form ``bigquery:///project`` puts the project in
|
||||
``database`` with no host, so we guard against misidentifying it as a
|
||||
dataset.
|
||||
"""
|
||||
if sqlalchemy_uri.host and sqlalchemy_uri.database:
|
||||
return sqlalchemy_uri.database
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_allow_cost_estimate(cls, extra: dict[str, Any]) -> bool:
|
||||
return True
|
||||
|
||||
@@ -342,11 +342,43 @@ PermissionModelView.include_route_methods = {RouteMethod.LIST}
|
||||
ViewMenuModelView.include_route_methods = {RouteMethod.LIST}
|
||||
|
||||
|
||||
# Keys on an adhoc column/metric that a guest may legitimately change through a
|
||||
# supported native filter, and which therefore must not count as payload
|
||||
# tampering. The time grain of a temporal x-axis is baked into its `BASE_AXIS`
|
||||
# column by `normalizeTimeColumn` on the frontend (it copies
|
||||
# `extras.time_grain_sqla` onto the column), so a Time Grain filter alters the
|
||||
# column payload without changing which data is queried.
|
||||
GUEST_OVERRIDABLE_VALUE_KEYS = frozenset({"timeGrain"})
|
||||
|
||||
|
||||
def _strip_overridable_keys(value: Any) -> Any:
|
||||
"""
|
||||
Recursively drop guest-overridable keys from a value.
|
||||
|
||||
Adhoc columns/metrics can be nested inside sequences (e.g. an ``orderby``
|
||||
entry is a ``(column, bool)`` tuple), so the overridable keys must be
|
||||
stripped at every level rather than only from a top-level dict.
|
||||
"""
|
||||
if isinstance(value, dict):
|
||||
return {
|
||||
key: _strip_overridable_keys(val)
|
||||
for key, val in value.items()
|
||||
if key not in GUEST_OVERRIDABLE_VALUE_KEYS
|
||||
}
|
||||
if isinstance(value, (list, tuple)):
|
||||
return [_strip_overridable_keys(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def freeze_value(value: Any) -> str:
|
||||
"""
|
||||
Used to compare column and metric sets.
|
||||
|
||||
Guest-overridable keys (e.g. the time grain baked into a temporal x-axis
|
||||
column) are dropped so that legitimate native-filter changes don't read as
|
||||
payload tampering.
|
||||
"""
|
||||
return json.dumps(value, sort_keys=True)
|
||||
return json.dumps(_strip_overridable_keys(value), sort_keys=True)
|
||||
|
||||
|
||||
def _native_filter_allowed_targets(
|
||||
|
||||
@@ -305,6 +305,13 @@ class TestBigQueryDbEngineSpec(SupersetTestCase):
|
||||
|
||||
@mock.patch("superset.models.core.Database.db_engine_spec", BigQueryEngineSpec)
|
||||
@mock.patch("sqlalchemy_bigquery._helpers.create_bigquery_client", mock.Mock)
|
||||
@mock.patch(
|
||||
"superset.db_engine_specs.bigquery.BigQueryEngineSpec.adjust_engine_params",
|
||||
new=lambda uri, connect_args, catalog=None, schema=None, **kw: (
|
||||
uri,
|
||||
connect_args,
|
||||
),
|
||||
)
|
||||
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
||||
def test_calculated_column_in_order_by(self):
|
||||
table = self.get_table(name="birth_names")
|
||||
|
||||
@@ -213,6 +213,53 @@ def test_chart_data_query_object_schema_time_grain_sqla_validation(
|
||||
assert result["extras"]["time_grain_sqla"] is None
|
||||
|
||||
|
||||
def test_chart_data_query_object_schema_deprecated_fields_renamed(
|
||||
app_context: None,
|
||||
) -> None:
|
||||
"""Deprecated query object fields are renamed to their canonical names."""
|
||||
schema = ChartDataQueryObjectSchema()
|
||||
|
||||
# groupby alone → becomes columns
|
||||
result = schema.load({"groupby": ["country_name"]})
|
||||
assert result.get("columns") == ["country_name"]
|
||||
assert "groupby" not in result
|
||||
|
||||
# groupby overwrites columns when both are provided
|
||||
result = schema.load({"groupby": ["region"], "columns": ["country_name"]})
|
||||
assert result.get("columns") == ["region"]
|
||||
assert "groupby" not in result
|
||||
|
||||
# empty groupby is discarded; existing columns is preserved
|
||||
result = schema.load({"groupby": [], "columns": ["country_name"]})
|
||||
assert result.get("columns") == ["country_name"]
|
||||
assert "groupby" not in result
|
||||
|
||||
# null groupby is discarded; existing columns is preserved (allow_none=True)
|
||||
result = schema.load({"groupby": None, "columns": ["country_name"]})
|
||||
assert result.get("columns") == ["country_name"]
|
||||
assert "groupby" not in result
|
||||
|
||||
# no groupby → columns passes through unchanged
|
||||
result = schema.load({"columns": ["country_name"]})
|
||||
assert result.get("columns") == ["country_name"]
|
||||
assert "groupby" not in result
|
||||
|
||||
# granularity_sqla → granularity
|
||||
result = schema.load({"granularity_sqla": "ds"})
|
||||
assert result.get("granularity") == "ds"
|
||||
assert "granularity_sqla" not in result
|
||||
|
||||
# timeseries_limit → series_limit
|
||||
result = schema.load({"timeseries_limit": 5})
|
||||
assert result.get("series_limit") == 5
|
||||
assert "timeseries_limit" not in result
|
||||
|
||||
# timeseries_limit_metric → series_limit_metric
|
||||
result = schema.load({"timeseries_limit_metric": "count"})
|
||||
assert result.get("series_limit_metric") == "count"
|
||||
assert "timeseries_limit_metric" not in result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"app",
|
||||
[{"TIME_GRAIN_ADDONS": {"PT10M": "10 minutes"}}],
|
||||
|
||||
@@ -448,7 +448,92 @@ def test_adjust_engine_params_catalog_as_host() -> None:
|
||||
{},
|
||||
catalog="other-project",
|
||||
)[0]
|
||||
assert str(uri) == "bigquery://other-project/"
|
||||
assert uri.host == "other-project"
|
||||
assert not uri.database # no dataset when only catalog is overridden
|
||||
|
||||
|
||||
def test_adjust_engine_params_schema_as_dataset() -> None:
|
||||
"""
|
||||
Test that passing a schema sets it as the BigQuery default dataset.
|
||||
|
||||
BigQuery requires table names to be fully qualified (project.dataset.table)
|
||||
unless a default dataset is set via the URL database component. When schema
|
||||
is provided, the URL database should be updated so unqualified table names
|
||||
resolve to schema.table_name.
|
||||
"""
|
||||
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
|
||||
|
||||
url = make_url("bigquery://project")
|
||||
|
||||
# Without schema, URL is unchanged
|
||||
uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
|
||||
assert str(uri) == "bigquery://project"
|
||||
|
||||
# With schema, database component is set to enable default dataset
|
||||
uri = BigQueryEngineSpec.adjust_engine_params(
|
||||
url,
|
||||
{},
|
||||
schema="my_dataset",
|
||||
)[0]
|
||||
assert uri.database == "my_dataset"
|
||||
|
||||
# catalog + schema: catalog goes to host, schema goes to database
|
||||
uri = BigQueryEngineSpec.adjust_engine_params(
|
||||
url,
|
||||
{},
|
||||
catalog="other-project",
|
||||
schema="my_dataset",
|
||||
)[0]
|
||||
assert uri.host == "other-project"
|
||||
assert uri.database == "my_dataset"
|
||||
|
||||
# Triple-slash form (bigquery:///project): project must not be overwritten
|
||||
triple_slash_url = make_url("bigquery:///my_project")
|
||||
uri = BigQueryEngineSpec.adjust_engine_params(
|
||||
triple_slash_url,
|
||||
{},
|
||||
schema="my_dataset",
|
||||
)[0]
|
||||
assert uri.host == "my_project"
|
||||
assert uri.database == "my_dataset"
|
||||
|
||||
|
||||
def test_get_schema_from_engine_params() -> None:
|
||||
"""
|
||||
Test that get_schema_from_engine_params returns the dataset from
|
||||
bigquery://project/dataset URIs and None for all other URL forms.
|
||||
"""
|
||||
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
|
||||
|
||||
# Standard form: project in host, dataset in database
|
||||
assert (
|
||||
BigQueryEngineSpec.get_schema_from_engine_params(
|
||||
make_url("bigquery://project/my_dataset"), {}
|
||||
)
|
||||
== "my_dataset"
|
||||
)
|
||||
|
||||
# Project-only URI — no default dataset configured
|
||||
assert (
|
||||
BigQueryEngineSpec.get_schema_from_engine_params(
|
||||
make_url("bigquery://project"), {}
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
# Triple-slash form — database component is the project, not a dataset
|
||||
assert (
|
||||
BigQueryEngineSpec.get_schema_from_engine_params(
|
||||
make_url("bigquery:///my_project"), {}
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
# Bare URI — no project, no dataset
|
||||
assert (
|
||||
BigQueryEngineSpec.get_schema_from_engine_params(make_url("bigquery://"), {})
|
||||
is None
|
||||
)
|
||||
|
||||
|
||||
def test_get_materialized_view_names() -> None:
|
||||
|
||||
@@ -1218,6 +1218,182 @@ def test_query_context_modified_orderby(mocker: MockerFixture) -> None:
|
||||
assert query_context_modified(query_context)
|
||||
|
||||
|
||||
def test_query_context_modified_time_grain_native_filter(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test `query_context_modified` when a guest applies a Time Grain native filter.
|
||||
|
||||
Reproduces https://github.com/apache/superset/issues/32768.
|
||||
|
||||
On a chart that uses a generic x-axis, the selected time grain is baked into the
|
||||
``BASE_AXIS`` adhoc column as a ``timeGrain`` property (see
|
||||
``normalizeTimeColumn`` on the frontend, which copies ``extras.time_grain_sqla``
|
||||
onto the column). A Time Grain native filter is a supported, read-only guest
|
||||
interaction: it only changes the granularity at which the *same* dimension is
|
||||
bucketed, never which metrics or columns are queried.
|
||||
|
||||
Previously, because the changed time grain travels inside the ``columns``
|
||||
payload, the subset comparison treated the request as tampering and
|
||||
``query_context_modified`` returned ``True`` -- so guests hit "Guest user cannot
|
||||
modify chart payload" whenever they picked a grain other than the chart default.
|
||||
|
||||
``freeze_value`` now drops the guest-overridable ``timeGrain`` key before
|
||||
comparing, so a pure time-grain change is no longer flagged as a modification.
|
||||
This test guards that behavior.
|
||||
"""
|
||||
# The chart was saved with a monthly grain on its x-axis column.
|
||||
stored_axis_column: AdhocColumn = {
|
||||
"label": "order_date",
|
||||
"sqlExpression": "order_date",
|
||||
"columnType": "BASE_AXIS",
|
||||
"timeGrain": "P1M",
|
||||
}
|
||||
# The guest picked a daily grain via the dashboard Time Grain native filter;
|
||||
# `normalizeTimeColumn` rewrote the otherwise-identical column accordingly.
|
||||
requested_axis_column: AdhocColumn = {
|
||||
**stored_axis_column,
|
||||
"timeGrain": "P1D",
|
||||
}
|
||||
|
||||
query_context = mocker.MagicMock()
|
||||
query_context.slice_.id = 42
|
||||
query_context.slice_.params_dict = {
|
||||
"metrics": ["count"],
|
||||
}
|
||||
query_context.slice_.query_context = json.dumps(
|
||||
{
|
||||
"queries": [
|
||||
{
|
||||
"columns": [stored_axis_column],
|
||||
"metrics": ["count"],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
# Native-filter data requests don't carry the mutated columns at the top level;
|
||||
# the grain change only shows up inside the query's columns.
|
||||
query_context.form_data = {
|
||||
"slice_id": 42,
|
||||
"metrics": ["count"],
|
||||
}
|
||||
query_context.queries = [
|
||||
QueryObject(
|
||||
columns=[requested_axis_column],
|
||||
metrics=["count"],
|
||||
),
|
||||
]
|
||||
|
||||
assert not query_context_modified(query_context)
|
||||
|
||||
|
||||
def test_query_context_modified_time_grain_with_tampered_column(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test that relaxing the time grain comparison does not open a tamper hole.
|
||||
|
||||
Only the ``timeGrain`` key is guest-overridable. A request that changes the
|
||||
grain *and* also swaps a non-overridable attribute (here ``sqlExpression``,
|
||||
which selects which column is queried) must still be flagged as tampering --
|
||||
otherwise a guest could query an arbitrary column under cover of a Time Grain
|
||||
filter.
|
||||
"""
|
||||
stored_axis_column: AdhocColumn = {
|
||||
"label": "order_date",
|
||||
"sqlExpression": "order_date",
|
||||
"columnType": "BASE_AXIS",
|
||||
"timeGrain": "P1M",
|
||||
}
|
||||
# Guest changes the grain (allowed) but also rewrites the SQL expression to a
|
||||
# different column (not allowed) -- this must still read as a modification.
|
||||
tampered_axis_column: AdhocColumn = {
|
||||
**stored_axis_column,
|
||||
"sqlExpression": "secret_column",
|
||||
"timeGrain": "P1D",
|
||||
}
|
||||
|
||||
query_context = mocker.MagicMock()
|
||||
query_context.slice_.id = 42
|
||||
query_context.slice_.params_dict = {
|
||||
"metrics": ["count"],
|
||||
}
|
||||
query_context.slice_.query_context = json.dumps(
|
||||
{
|
||||
"queries": [
|
||||
{
|
||||
"columns": [stored_axis_column],
|
||||
"metrics": ["count"],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
query_context.form_data = {
|
||||
"slice_id": 42,
|
||||
"metrics": ["count"],
|
||||
}
|
||||
query_context.queries = [
|
||||
QueryObject(
|
||||
columns=[tampered_axis_column],
|
||||
metrics=["count"],
|
||||
),
|
||||
]
|
||||
|
||||
assert query_context_modified(query_context)
|
||||
|
||||
|
||||
def test_query_context_modified_time_grain_in_orderby(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test `query_context_modified` when the time grain travels inside `orderby`.
|
||||
|
||||
Each ``orderby`` entry is an ``(column, bool)`` tuple, so a temporal x-axis
|
||||
adhoc column carrying the guest-overridable ``timeGrain`` is nested one level
|
||||
deep rather than sitting at the top level. The overridable key must still be
|
||||
stripped before comparing, otherwise sorting by the temporal axis would make
|
||||
a pure time-grain change read as tampering.
|
||||
"""
|
||||
stored_axis_column: AdhocColumn = {
|
||||
"label": "order_date",
|
||||
"sqlExpression": "order_date",
|
||||
"columnType": "BASE_AXIS",
|
||||
"timeGrain": "P1M",
|
||||
}
|
||||
requested_axis_column: AdhocColumn = {
|
||||
**stored_axis_column,
|
||||
"timeGrain": "P1D",
|
||||
}
|
||||
|
||||
query_context = mocker.MagicMock()
|
||||
query_context.slice_.id = 42
|
||||
query_context.slice_.params_dict = {
|
||||
"metrics": ["count"],
|
||||
}
|
||||
query_context.slice_.query_context = json.dumps(
|
||||
{
|
||||
"queries": [
|
||||
{
|
||||
"orderby": [[stored_axis_column, True]],
|
||||
"metrics": ["count"],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
query_context.form_data = {
|
||||
"slice_id": 42,
|
||||
"metrics": ["count"],
|
||||
}
|
||||
query_context.queries = [
|
||||
QueryObject(
|
||||
orderby=[(requested_axis_column, True)],
|
||||
metrics=["count"],
|
||||
),
|
||||
]
|
||||
|
||||
assert not query_context_modified(query_context)
|
||||
|
||||
|
||||
def test_get_catalog_perm() -> None:
|
||||
"""
|
||||
Test the `get_catalog_perm` method.
|
||||
|
||||
Reference in New Issue
Block a user