Compare commits

..

2 Commits

Author SHA1 Message Date
Evan
c8b9e990ae fix(chart): reject query-context datasource missing a type
A datasource object with a matching id but no type was treated as valid,
but query-context loading reads datasource["type"] directly and would
raise KeyError when the saved context is later replayed.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 07:49:04 -07:00
Claude Code
1c438a57d4 fix(chart): keep query-context updates bound to the chart's datasource
On the query-context-only update path UpdateChartCommand intentionally
skips the ownership check so report and alert workers can refresh a
chart's cached payload. Validate that the submitted query context still
targets the chart's own datasource (id and type) before saving, so a
cached payload cannot be repointed at an unrelated datasource. Payloads
without a parseable datasource fall back to the chart's datasource at
execution time and are left unchanged.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 06:40:07 -07:00
36 changed files with 226 additions and 1808 deletions

View File

@@ -24,15 +24,6 @@ assists people when migrating to a new version.
## Next
### Webhook alerts/reports block private/internal hosts by default
Webhook alert/report dispatch (`WebhookNotification.send`) now validates the target URL's host against the same private/internal-IP block applied to dataset import URLs. If the resolved host is in a loopback, link-local, private (RFC-1918), shared-CGNAT, or multicast range, the webhook is rejected with `NotificationParamException`.
Deployments that intentionally point webhooks at internal targets (chatops bridges, internal automation servers, on-premises Mattermost/Rocket.Chat, etc.) can opt out by setting `ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS = True` in `superset_config.py`. This mirrors the existing `DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS` opt-out for dataset imports.
### Impala cancel_query blocks private/internal hosts by default
The Impala engine spec's `cancel_query` issues an HTTP request from the Superset backend to the host configured on the Impala database connection. That host is now validated before the request: if it resolves to a private/internal IP range, the cancel call is refused and a warning is logged. Operators whose Impala cluster runs on an internal network can opt out by setting `IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS = True` in `superset_config.py`. This mirrors the dataset-import and webhook opt-out flags.
### Map chart renderer and OpenStreetMap migration behavior
The MapLibre migration for deck.gl charts preserves saved non-Mapbox styles on

View File

@@ -222,7 +222,7 @@ development = [
"pip",
"polib", # used by scripts/translations/ and their unit tests
"pre-commit",
"progress>=1.6.1,<2",
"progress>=1.5,<2",
"psutil",
"pyfakefs",
"pyinstrument>=5.1.2,<6",

View File

@@ -686,7 +686,7 @@ prison==0.2.1
# via
# -c requirements/base-constraint.txt
# flask-appbuilder
progress==1.6.1
progress==1.6
# via apache-superset
prompt-toolkit==3.0.51
# via

View File

@@ -1208,7 +1208,9 @@ const DatabaseModal: FunctionComponent<DatabaseModalProps> = ({
// For all other options, sort alphabetically
return String(a.label).localeCompare(String(b.label));
}}
getPopupContainer={() => document.body}
getPopupContainer={triggerNode =>
triggerNode.parentElement || document.body
}
dropdownStyle={{ maxHeight: 400, overflow: 'auto' }}
/>
<Alert

View File

@@ -103,6 +103,19 @@ class DatasourceTypeUpdateRequiredValidationError(ValidationError):
)
class ChartQueryContextDatasourceMismatchValidationError(ValidationError):
"""
Raised when a query-context-only update carries a datasource that does not
match the chart's own datasource.
"""
def __init__(self) -> None:
super().__init__(
_("The query context datasource does not match the chart datasource"),
field_name="query_context",
)
class ChartNotFoundError(CommandException):
message = "Chart not found."

View File

@@ -29,6 +29,7 @@ from superset.commands.chart.exceptions import (
ChartForbiddenError,
ChartInvalidError,
ChartNotFoundError,
ChartQueryContextDatasourceMismatchValidationError,
ChartUpdateFailedError,
DashboardsForbiddenError,
DashboardsNotFoundValidationError,
@@ -41,6 +42,7 @@ from superset.exceptions import SupersetSecurityException
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.tags.models import ObjectType
from superset.utils import json
from superset.utils.decorators import on_error, transaction
logger = logging.getLogger(__name__)
@@ -101,6 +103,52 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
if not security_manager.is_owner(dash):
raise DashboardsForbiddenError()
def _validate_query_context_datasource(
self, exceptions: list[ValidationError]
) -> None:
"""
Ensure a query-context-only update keeps the chart's own datasource.
The submitted query context is only verified when it carries a parseable
``datasource`` object; a payload that references a different datasource than
the chart's persisted one is rejected. Payloads without a datasource fall
back to the chart's datasource at execution time and need no check.
"""
if not self._model:
return
raw_query_context = self._properties.get("query_context")
if not raw_query_context:
return
try:
query_context = json.loads(raw_query_context)
except (TypeError, ValueError):
# An unparseable payload cannot be verified or replayed; leave it for
# downstream handling rather than guessing at its intent.
return
datasource = (
query_context.get("datasource") if isinstance(query_context, dict) else None
)
if not isinstance(datasource, dict):
return
try:
ids_match = int(datasource["id"]) == self._model.datasource_id
except (KeyError, TypeError, ValueError):
ids_match = False
# A datasource object must carry a type that matches the chart's own.
# Treating a missing type as valid would let an id-only payload through,
# and query-context loading reads datasource["type"] directly, so that
# payload raises KeyError when the saved context is later replayed.
datasource_type = datasource.get("type")
types_match = str(datasource_type) == self._model.datasource_type
if not ids_match or not types_match:
exceptions.append(ChartQueryContextDatasourceMismatchValidationError())
def validate(self) -> None: # noqa: C901
exceptions: list[ValidationError] = []
dashboard_ids = self._properties.get("dashboards")
@@ -144,6 +192,9 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
security_manager.raise_for_access(chart=self._model)
except SupersetSecurityException as ex:
raise ChartForbiddenError() from ex
# Keep the refreshed payload bound to the chart's own datasource so it
# cannot be repointed at an unrelated one.
self._validate_query_context_datasource(exceptions)
# validate tags
try:

View File

@@ -31,15 +31,12 @@ import yaml
from superset.commands.base import BaseCommand
from superset.commands.dashboard.exceptions import DashboardNotFoundError
from superset.common.db_query_status import QueryStatus
from superset.daos.dashboard import DashboardDAO
from superset.exceptions import SupersetSecurityException
if TYPE_CHECKING:
from superset.connectors.sqla.models import SqlaTable
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.superset_typing import QueryObjectDict
from superset.sql.parse import SQLStatement, Table
@@ -238,6 +235,8 @@ def export_dataset_data(
sample_rows: int | None = None,
) -> bytes | None:
"""Export dataset data to Parquet format. Returns bytes or None on failure."""
import pandas as pd # pylint: disable=import-outside-toplevel
from superset import db # pylint: disable=import-outside-toplevel
# Ensure dataset is attached to session and relationships are loaded
@@ -252,50 +251,35 @@ def export_dataset_data(
logger.warning("Dataset %s has no database", dataset.table_name)
return None
# Only export rows the requester is entitled to read. The dataset's own
# access check is applied here, and the rows below are fetched through the
# dataset's query builder (the same path the chart-data API uses) so that
# per-row filters are applied consistently. A requester without access to
# the dataset yields no data file rather than the raw underlying table.
try:
dataset.raise_for_access()
except SupersetSecurityException:
logger.info(
"Skipping data export for dataset %s: requester not entitled",
dataset.table_name,
)
return None
columns = [col.column_name for col in dataset.columns if not col.expression]
if not columns:
logger.warning("No columns to export for %s", dataset.table_name)
return None
try:
logger.info("Exporting data for %s to Parquet...", dataset.table_name)
# Fetch through the dataset's query builder so that the projection,
# per-row filters, and (for virtual datasets) the wrapping query are
# produced the same way as the chart-data path. The row cap is applied
# at the SQL level via row_limit rather than after a full table read.
query_obj: QueryObjectDict = {
"columns": columns,
"metrics": [],
"orderby": [],
"is_timeseries": False,
"filter": [],
"extras": {},
"row_limit": sample_rows,
}
result = dataset.query(query_obj)
if result.status == QueryStatus.FAILED:
# The query path reports failures via status rather than raising;
# omit the data file instead of writing an empty/partial Parquet.
logger.warning(
"Query failed while exporting data for %s", dataset.table_name
)
return None
df = result.df
# Check if this is a virtual dataset (SQL-based)
if dataset.sql:
sql = dataset.sql
else:
# For physical tables, build SELECT query from columns
columns = [col.column_name for col in dataset.columns if not col.expression]
if not columns:
logger.warning("No columns to export for %s", dataset.table_name)
return None
# Build simple SELECT query (quote identifiers to handle spaces/keywords)
column_list = ", ".join(f'"{c}"' for c in columns)
quoted_table = f'"{dataset.table_name}"'
if dataset.schema:
table_ref = f'"{dataset.schema}".{quoted_table}'
else:
table_ref = quoted_table
sql = f"SELECT {column_list} FROM {table_ref}" # noqa: S608
with dataset.database.get_sqla_engine() as engine:
df = pd.read_sql(sql, engine)
if sample_rows and len(df) > sample_rows:
df = df.head(sample_rows)
logger.info("Sampled to %d rows", sample_rows)
# Write to bytes buffer
buf = BytesIO()

View File

@@ -16,12 +16,9 @@
# under the License.
import gzip
import logging
import os
import re
from typing import Any
from urllib import request
from urllib.parse import urljoin, urlparse
from urllib.request import HTTPRedirectHandler
import pandas as pd
from flask import current_app as app
@@ -43,35 +40,9 @@ from superset.models.core import Database
from superset.sql.parse import Table
from superset.utils import json
from superset.utils.core import get_user
from superset.utils.network import is_safe_host
logger = logging.getLogger(__name__)
class _ValidatingRedirectHandler(HTTPRedirectHandler):
"""Re-validates the redirect target URL before following any HTTP redirect.
Prevents bypasses where an initial URL passes validation but a subsequent
redirect points to a disallowed destination.
"""
def redirect_request(
self,
req: request.Request,
fp: Any,
code: int,
msg: str,
headers: Any,
newurl: str,
) -> request.Request | None:
"""Validate each redirect target before delegating to the parent handler."""
# Resolve relative redirects against the originating request URL so that
# validate_data_uri receives a fully-qualified URL in all cases.
absolute_url = urljoin(req.full_url, newurl)
validate_data_uri(absolute_url)
return super().redirect_request(req, fp, code, msg, headers, newurl)
CHUNKSIZE = 512
VARCHAR = re.compile(r"VARCHAR\((\d+)\)", re.IGNORECASE)
@@ -117,37 +88,12 @@ def get_dtype(df: pd.DataFrame, dataset: SqlaTable) -> dict[str, VisitableType]:
def validate_data_uri(data_uri: str) -> None:
"""
Validate that the data URI is permitted for dataset import.
Validate that the data URI is configured on DATASET_IMPORT_ALLOWED_URLS
has a valid URL.
Local ``file://`` URIs are allowed only when the path is confined to the
bundled examples folder. All other URIs must match a pattern in
``DATASET_IMPORT_ALLOWED_DATA_URLS`` *and* resolve to a publicly-routable host.
:param data_uri: the URI to validate
:raises DatasetForbiddenDataURI: if the URI is not permitted
:param data_uri:
:return:
"""
parsed = urlparse(data_uri)
# ``urlparse`` lower-cases the scheme, so gating on it (rather than a
# case-sensitive ``startswith("file://")``) also rejects mixed-case
# variants like ``FiLe://`` that would otherwise skip the local-file
# sandbox check below.
if parsed.scheme == "file":
from urllib.request import url2pathname
from superset.examples.helpers import get_examples_folder
# Reject non-local authority components (e.g. file://remotehost/path).
if parsed.netloc and parsed.netloc.lower() != "localhost":
raise DatasetForbiddenDataURI()
# url2pathname handles URL-encoded characters and platform path separators.
file_path = url2pathname(parsed.path)
# Resolve symlinks and relative components before comparing.
real_path = os.path.realpath(file_path)
examples_folder = os.path.realpath(get_examples_folder())
if not real_path.startswith(examples_folder + os.sep):
raise DatasetForbiddenDataURI()
return
allowed_urls = app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"]
for allowed_url in allowed_urls:
try:
@@ -158,12 +104,6 @@ def validate_data_uri(data_uri: str) -> None:
)
raise
if match:
if not app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"]:
hostname = parsed.hostname
# Fail-closed: reject URIs that have no parseable hostname as
# well as those that resolve to non-public addresses.
if not hostname or not is_safe_host(hostname):
raise DatasetForbiddenDataURI()
return
raise DatasetForbiddenDataURI()
@@ -342,8 +282,7 @@ def load_data(data_uri: str, dataset: SqlaTable, database: Database) -> None:
validate_data_uri(data_uri)
logger.info("Downloading data from %s", data_uri)
opener = request.build_opener(_ValidatingRedirectHandler)
data = opener.open(data_uri) # pylint: disable=consider-using-with # noqa: S310
data = request.urlopen(data_uri) # pylint: disable=consider-using-with # noqa: S310
if data_uri.endswith(".gz"):
data = gzip.open(data)
df = pd.read_csv(data, encoding="utf-8")

View File

@@ -541,14 +541,10 @@ class QueryContextProcessor:
:raises SupersetSecurityException: If the user cannot access the resource
"""
# Evaluate access before validating the queries: query validation
# renders the request's filter expressions, so the access decision must
# come first to avoid rendering caller-supplied input for a resource the
# caller is not allowed to access.
for query in self._query_context.queries:
query.validate()
if self._qc_datasource.type == DatasourceType.QUERY:
security_manager.raise_for_access(query=self._qc_datasource)
else:
security_manager.raise_for_access(query_context=self._query_context)
for query in self._query_context.queries:
query.validate()

View File

@@ -2058,21 +2058,6 @@ REPORT_MINIMUM_INTERVAL = int(timedelta(minutes=0).total_seconds())
# Enforce HTTPS for webhook alerts/reports
ALERT_REPORTS_WEBHOOK_HTTPS_ONLY = True
# When True, webhook alert/report dispatch is permitted to call private/internal
# IP addresses (RFC-1918, loopback, link-local). Intended for deployments where
# the webhook target is on an internal network (a chatops bridge, an internal
# Mattermost/Rocket.Chat, an automation server, etc.). Leave False (the default)
# in any internet-facing deployment.
ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS: bool = False
# When True, Impala's cancel_query HTTP call is permitted to target hosts in
# private/internal IP ranges (RFC-1918, loopback, link-local). Intended for
# operators whose Impala cluster runs on an internal network. Leave False (the
# default) in any deployment where untrusted users can create Impala database
# connections, so a maliciously-configured impala:// URL cannot be used to
# trigger outbound requests to internal targets via the cancel endpoint.
IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS: bool = False
# A custom prefix to use on all Alerts & Reports emails
EMAIL_REPORTS_SUBJECT_PREFIX = "[Report] "
@@ -2362,21 +2347,13 @@ PREVENT_UNSAFE_DB_CONNECTIONS = True
# If true all default urls on datasets will be handled as relative URLs by the frontend
PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True
# Define a list of allowed URL patterns (regex) for dataset data imports (v1).
# Define a list of allowed URLs for dataset data imports (v1).
# Simple example to only allow URLs that belong to certain domains:
# DATASET_IMPORT_ALLOWED_DATA_URLS = [
# ALLOWED_IMPORT_URL_DOMAINS = [
# r"^https://.+\.domain1\.com\/?.*", r"^https://.+\.domain2\.com\/?.*"
# ]
# Local file:// URIs used for bundled example data are always permitted
# regardless of this setting.
DATASET_IMPORT_ALLOWED_DATA_URLS = [r".*"]
# When True, dataset import is permitted to fetch data from private/internal
# IP addresses (RFC-1918, loopback, link-local). Intended for air-gapped or
# on-premises deployments where the data source is on an internal network.
# Leave False (the default) in any internet-facing deployment.
DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS: bool = False
# Path used to store SSL certificates that are generated when using custom certs.
# Defaults to temporary directory.
# Example: SSL_CERT_PATH = "/certs"

View File

@@ -2477,27 +2477,6 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
return None
@staticmethod
def validate_cancel_query_id(
cancel_query_id: str | None,
pattern: str = r"^\d+$",
) -> bool:
"""
Validate that a cancel_query_id matches expected format.
This is a defense-in-depth measure to prevent SQL injection in cancel_query
implementations that use string interpolation. While cancel_query_id typically
comes from trusted database sources (e.g., CONNECTION_ID()), validation ensures
safety even if the data source is compromised.
:param cancel_query_id: The query identifier to validate
:param pattern: Regex pattern to match (default: numeric only)
:return: True if valid, False otherwise
"""
if cancel_query_id is None:
return False
return bool(re.fullmatch(pattern, str(cancel_query_id)))
@classmethod
def cancel_query( # pylint: disable=unused-argument
cls,

View File

@@ -32,7 +32,6 @@ from superset import db
from superset.constants import QUERY_EARLY_CANCEL_KEY, TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec, DatabaseCategory
from superset.models.sql_lab import Query
from superset.utils.network import is_safe_host
if TYPE_CHECKING:
from superset.models.core import Database
@@ -205,37 +204,13 @@ class ImpalaEngineSpec(BaseEngineSpec):
:param cursor: New cursor instance to the db of the query
:param query: Query instance
:param cancel_query_id: Impala query ID in format "hex:hex"
:param cancel_query_id: impala db not need
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent URL injection
# Impala query IDs are in "hex:hex" form (16 hex chars per side)
if not cls.validate_cancel_query_id(
cancel_query_id, r"^[A-Fa-f0-9]{16}:[A-Fa-f0-9]{16}$"
):
return False
try:
impala_host = query.database.url_object.host
# The cancel call issues an outbound HTTP request from the
# Superset backend to whatever host the DB connection was
# configured with; validate it before the call to keep this
# path consistent with the dataset-import and webhook URL
# checks. Operators with internal Impala targets can opt out
# via IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS.
if not impala_host:
return False
if not app.config[
"IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS"
] and not is_safe_host(impala_host):
logger.warning(
"Impala cancel_query refused: target host is not allowed"
)
return False
url = f"http://{impala_host}:25000/cancel_query?query_id={cancel_query_id}"
# Do not follow redirects: a validated host could otherwise 30x the
# request to an internal target, bypassing the is_safe_host check.
response = requests.post(url, timeout=3, allow_redirects=False)
response = requests.post(url, timeout=3)
except Exception: # pylint: disable=broad-except
return False

View File

@@ -457,11 +457,6 @@ class MySQLEngineSpec(BasicParametersMixin, BaseEngineSpec):
:param cancel_query_id: MySQL Connection ID
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent SQL injection
# MySQL CONNECTION_ID() returns an unsigned integer
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+$"):
return False
try:
cursor.execute(f"KILL CONNECTION {cancel_query_id}")
except Exception: # pylint: disable=broad-except

View File

@@ -392,13 +392,7 @@ class OcientEngineSpec(BaseEngineSpec):
def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool:
with OcientEngineSpec.query_id_mapping_lock:
if query.id in OcientEngineSpec.query_id_mapping:
ocient_query_id = OcientEngineSpec.query_id_mapping[query.id]
# Validate query ID to prevent SQL injection (defense-in-depth)
# Ocient query IDs are alphanumeric with underscores and dashes
if not cls.validate_cancel_query_id(str(ocient_query_id), r"^[\w\-]+$"):
return False
cursor.execute(f"CANCEL {ocient_query_id}")
cursor.execute(f"CANCEL {OcientEngineSpec.query_id_mapping[query.id]}")
# Query has been cancelled, so we can safely remove the cursor from
# the cache
del OcientEngineSpec.query_id_mapping[query.id]

View File

@@ -862,11 +862,6 @@ WHERE datistemplate = false;
:param cancel_query_id: Postgres PID
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent SQL injection
# PostgreSQL pg_backend_pid() returns an integer
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+$"):
return False
try:
cursor.execute(
"SELECT pg_terminate_backend(pid) " # noqa: S608

View File

@@ -346,11 +346,6 @@ class RedshiftEngineSpec(BasicParametersMixin, PostgresBaseEngineSpec):
:param cancel_query_id: Redshift PID
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent SQL injection
# Redshift pg_backend_pid() returns an integer
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+$"):
return False
try:
logger.info("Killing Redshift PID:%s", str(cancel_query_id))
cursor.execute(

View File

@@ -577,11 +577,6 @@ class SingleStoreSpec(BasicParametersMixin, BaseEngineSpec):
:param cancel_query_id: SingleStore connection ID and aggregator ID
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent SQL injection
# SingleStore: "CONNECTION_ID AGGREGATOR_ID" (two space-separated ints)
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+(\s+\d+)?$"):
return False
try:
cursor.execute(f"KILL CONNECTION {cancel_query_id}")
except Exception: # pylint: disable=broad-except

View File

@@ -334,11 +334,6 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
:param cancel_query_id: Snowflake Session ID
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent SQL injection
# Snowflake CURRENT_SESSION() returns an alphanumeric VARCHAR session ID
if not cls.validate_cancel_query_id(cancel_query_id, r"^[a-zA-Z0-9]+$"):
return False
try:
cursor.execute(f"SELECT SYSTEM$CANCEL_ALL_QUERIES({cancel_query_id})")
except Exception: # pylint: disable=broad-except

View File

@@ -429,12 +429,6 @@ class TrinoEngineSpec(PrestoBaseEngineSpec):
:param cancel_query_id: Trino `queryId`
:return: True if query cancelled successfully, False otherwise
"""
# Validate cancel_query_id to prevent SQL injection
# Trino query IDs look like yyyymmdd_hhmmss_nnnnn_xxxxx
# (alphanumeric with underscores)
if not cls.validate_cancel_query_id(cancel_query_id, r"^[a-zA-Z0-9_]+$"):
return False
try:
cursor.execute(
f"CALL system.runtime.kill_query(query_id => '{cancel_query_id}',"

View File

@@ -735,22 +735,6 @@ def to_datetime(
return datetime.strptime(value, format)
class SupersetSandboxedEnvironment(SandboxedEnvironment):
"""
Sandbox that denies attribute access to the base environment/template
classes and to the internals of ``functools.partial`` objects, none of
which templates need. Calling such objects is unaffected; only attribute
access is denied.
"""
def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
if attr in {"environment_class", "template_class"}:
return False
if isinstance(obj, partial):
return False
return super().is_safe_attribute(obj, attr, value)
class BaseTemplateProcessor:
"""
Base class for database-specific jinja context
@@ -781,7 +765,7 @@ class BaseTemplateProcessor:
self._applied_filters = applied_filters
self._removed_filters = removed_filters
self._context: dict[str, Any] = {}
self.env: Environment = SupersetSandboxedEnvironment(undefined=DebugUndefined)
self.env: Environment = SandboxedEnvironment(undefined=DebugUndefined)
self.set_context(**kwargs)
# custom filters
@@ -951,14 +935,13 @@ class JinjaTemplateProcessor(BaseTemplateProcessor):
}
)
# The `metric` filter needs the env and full context to expand other
# filters. Bind them through a closure rather than positional args so the
# template environment is not reachable via the macro's public
# ``partial.args`` from inside a template.
def metric_with_context(metric_key: str, dataset_id: int | None = None) -> str:
return metric_macro(self.env, self._context, metric_key, dataset_id)
self._context["metric"] = partial(safe_proxy, metric_with_context)
# The `metric` filter needs the full context, in order to expand other filters
self._context["metric"] = partial(
safe_proxy,
metric_macro,
self.env,
self._context,
)
class NoOpTemplateProcessor(BaseTemplateProcessor):

View File

@@ -34,19 +34,6 @@ superset mcp run --port 5009
# http://localhost:5009/mcp/
"""
import warnings
# authlib.jose is deprecated in authlib 1.3+ in favour of joserfc, but fastmcp
# still imports it internally. This filter must live here — it runs when any
# superset.mcp_service submodule is first imported, which is before fastmcp
# triggers the authlib.jose import. Placed in __init__.py so both the
# production server path and test imports get it without duplicating the
# filterwarnings call in every affected module.
warnings.filterwarnings(
"ignore",
message=r"authlib\.jose module is deprecated",
)
__version__ = "1.0.0"
# Tools are auto-registered when imported by the MCP service

View File

@@ -75,13 +75,6 @@ def _suppress_third_party_warnings() -> None:
category=FutureWarning,
module=r"google\..*",
)
# authlib.jose deprecation warning is suppressed at package init time
# (superset/mcp_service/__init__.py), but add it here too for any late
# imports that may occur after tool execution begins.
warnings.filterwarnings(
"ignore",
message=r"authlib\.jose module is deprecated",
)
class FastMCPValidationFilter(logging.Filter):

View File

@@ -32,7 +32,6 @@ from superset.reports.notifications.exceptions import (
)
from superset.utils import json
from superset.utils.decorators import statsd_gauge
from superset.utils.network import is_safe_host
logger = logging.getLogger(__name__)
@@ -94,39 +93,6 @@ class WebhookNotification(BaseNotification):
)
return files
def _validate_webhook_url(self, url: str) -> None:
"""
Validate the webhook target URL before dispatch.
Checks that the scheme is HTTP(S) (and HTTPS when required by config),
that a hostname is present, and, unless the operator opts out via
``ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS``, that the host does not
resolve to a private/internal address.
:raises NotificationParamException: if any of the above checks fail.
"""
parsed = urlparse(url)
scheme = parsed.scheme.lower()
if scheme not in ("http", "https"):
raise NotificationParamException(
"Webhook failed: only HTTP and HTTPS webhook URLs are supported."
)
if current_app.config["ALERT_REPORTS_WEBHOOK_HTTPS_ONLY"] and scheme != "https":
raise NotificationParamException(
"Webhook failed: HTTPS is required by config for webhook URLs."
)
if not parsed.hostname:
raise NotificationParamException(
"Webhook failed: URL must include a valid hostname."
)
# Operators with internal webhook targets (chatops bridges, internal
# automation, etc.) can opt out of the private-IP block via
# ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS.
if current_app.config["ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS"]:
return
if not is_safe_host(parsed.hostname):
raise NotificationParamException("Webhook URL target host is not allowed.")
@backoff.on_exception(
backoff.expo, NotificationUnprocessableException, factor=10, base=2, max_tries=5
)
@@ -138,7 +104,11 @@ class WebhookNotification(BaseNotification):
is not enabled."
)
wh_url = self._get_webhook_url()
self._validate_webhook_url(wh_url)
if current_app.config["ALERT_REPORTS_WEBHOOK_HTTPS_ONLY"]:
if urlparse(wh_url).scheme.lower() != "https":
raise NotificationParamException(
"Webhook failed: HTTPS is required by config for webhook URLs."
)
payload = self._get_req_payload()
files = self._get_files()
@@ -151,17 +121,9 @@ class WebhookNotification(BaseNotification):
else:
data[key] = value
response = requests.post(
wh_url,
data=data,
files=files,
timeout=60,
allow_redirects=False,
)
response = requests.post(wh_url, data=data, files=files, timeout=60)
else:
response = requests.post(
wh_url, json=payload, timeout=60, allow_redirects=False
)
response = requests.post(wh_url, json=payload, timeout=60)
logger.info(
"Webhook sent to %s, status code: %s", wh_url, response.status_code
@@ -177,14 +139,6 @@ class WebhookNotification(BaseNotification):
f"Webhook failed with status code {response.status_code}: \
{response.text}"
)
if response.status_code >= 300:
# Redirects are intentionally not followed (allow_redirects=False),
# so a 3xx means the request never reached the final target. Treat
# it as a failure rather than silently reporting success.
raise NotificationParamException(
f"Webhook returned an unfollowed redirect "
f"(status code {response.status_code})"
)
except requests.exceptions.RequestException as ex:
raise NotificationUnprocessableException(str(ex)) from ex

View File

@@ -349,145 +349,6 @@ def freeze_value(value: Any) -> str:
return json.dumps(value, sort_keys=True)
def _native_filter_allowed_targets(
query_context: "QueryContext", form_data: dict[str, Any]
) -> Optional[tuple[set[str], set[str]]]:
"""
Return ``(allowed_columns, allowed_metrics)`` a native-filter data request
may read, or ``None`` when the request cannot be tied to a native filter on
the requesting dashboard (in which case the caller must fail closed).
``allowed_columns`` are the target column(s) of the filter identified by
``native_filter_id`` that point at the request's datasource. ``allowed_metrics``
are the saved-metric name(s) the filter is configured to sort its values by
(``controlValues.sortMetric``), which a legitimate value lookup sends.
"""
# pylint: disable=import-outside-toplevel
from superset import db
from superset.models.dashboard import Dashboard
native_filter_id = form_data.get("native_filter_id")
dashboard_id = form_data.get("dashboardId")
if not native_filter_id or not dashboard_id:
return None
dashboard = (
db.session.query(Dashboard).filter(Dashboard.id == dashboard_id).one_or_none()
)
if dashboard is None or not dashboard.json_metadata:
return None
try:
metadata = json.loads(dashboard.json_metadata)
except (TypeError, ValueError):
return None
datasource = getattr(query_context, "datasource", None)
datasource_id = datasource.data.get("id") if datasource else None
allowed_columns: set[str] = set()
allowed_metrics: set[str] = set()
for fltr in metadata.get("native_filter_configuration", []):
if fltr.get("id") != native_filter_id:
continue
for target in fltr.get("targets", []):
column = target.get("column")
if (
target.get("datasetId") == datasource_id
and isinstance(column, dict)
and column.get("name")
):
allowed_columns.add(column["name"])
# The filter may be configured to sort its values by a saved metric; a
# legitimate value lookup then sends that metric name.
sort_metric = (fltr.get("controlValues") or {}).get("sortMetric") or fltr.get(
"sortMetric"
)
if isinstance(sort_metric, str):
allowed_metrics.add(sort_metric)
# Filter ids are unique, so the matching filter is the only one.
break
return allowed_columns, allowed_metrics
def _native_filter_term_allowed(
term: Any, allowed_columns: set[str], allowed_metrics: set[str]
) -> bool:
"""
Whether a value-returning term (metric or order-by expression) is allowed on
a native-filter request: a plain reference to a target column or the
configured sort metric, or a simple aggregate over a target column. Free-form
SQL terms and other saved metrics cannot be validated and are not allowed.
"""
if isinstance(term, str):
return term in allowed_columns or term in allowed_metrics
if isinstance(term, dict) and term.get("expressionType") == "SIMPLE":
return (term.get("column") or {}).get("column_name") in allowed_columns
return False
def _native_filter_query_modified(
query: Any, allowed_columns: set[str], allowed_metrics: set[str]
) -> bool:
"""Whether a single query in a native-filter request reads beyond its targets."""
# Columns and group-by may only reference target column(s); adhoc (free-form
# SQL) columns cannot be validated, so reject them.
for key in ("columns", "groupby"):
for col in getattr(query, key, None) or []:
if not isinstance(col, str) or col not in allowed_columns:
return True
for metric in getattr(query, "metrics", None) or []:
if not _native_filter_term_allowed(metric, allowed_columns, allowed_metrics):
return True
# order-by entries are ``(expression, asc)`` pairs.
for order in getattr(query, "orderby", None) or []:
expr = order[0] if isinstance(order, (list, tuple)) and order else order
if not _native_filter_term_allowed(expr, allowed_columns, allowed_metrics):
return True
return False
def _native_filter_request_modified(query_context: "QueryContext") -> bool:
"""
Validate a chartless data request that targets a native filter.
Only requests identified as native-filter lookups (by the ``NATIVE_FILTER``
type marker or a ``native_filter_id``) are constrained; other chartless
paths (drill-to-detail, drill-by, samples) carry neither and are validated by
the datasource-access checks in raise_for_access, so they are not treated as
modified here.
A native filter may only read the column(s) it targets on the dashboard it
belongs to. The request is treated as modified (and therefore rejected for
guest users) when it cannot be tied to a native filter on the requesting
dashboard, or when any value-returning term (column, group-by, metric, or
order-by) references something other than a target column, a simple
aggregate over a target column, or the filter's configured sort metric.
Free-form SQL terms and saved metrics other than the configured sort metric
are rejected. Row-restricting clauses (``filter``/``extras``) are not
constrained here: cross-filters legitimately reference other columns and
they do not return column values; that blind-inference surface is a separate
concern shared with the chart path.
"""
form_data = query_context.form_data or {}
if not (
form_data.get("type") == "NATIVE_FILTER" or form_data.get("native_filter_id")
):
return False
targets = _native_filter_allowed_targets(query_context, form_data)
# Fail closed when the request cannot be tied to a native filter.
if targets is None:
return True
# Empty allowed sets (filter resolved but no matching column/metric target)
# intentionally deny every value-returning term below.
allowed_columns, allowed_metrics = targets
return any(
_native_filter_query_modified(query, allowed_columns, allowed_metrics)
for query in query_context.queries
)
def query_context_modified(query_context: "QueryContext") -> bool:
"""
Check if a query context has been modified.
@@ -498,14 +359,8 @@ def query_context_modified(query_context: "QueryContext") -> bool:
form_data = query_context.form_data
stored_chart = query_context.slice_
# Native-filter data requests have no associated chart (no slice_id). Rather
# than accepting any payload, constrain them to the column(s) the dashboard's
# native filter is allowed to target; other chartless paths keep prior
# behavior (see _native_filter_request_modified).
if stored_chart is None:
return _native_filter_request_modified(query_context)
if form_data is None:
# native filter requests
if form_data is None or stored_chart is None:
return False
# cannot request a different chart

View File

@@ -14,65 +14,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import ipaddress
import platform
import socket
import subprocess
# Networks that must never be reached via user-supplied hostnames.
# Includes loopback, RFC-1918 private ranges, link-local (covers cloud
# metadata endpoints such as 169.254.169.254), shared address space
# (RFC 6598, 100.64.0.0/10), multicast (ip.is_global returns True for
# multicast addresses in Python, so explicit blocking is required), and
# IPv6 equivalents.
_SSRF_UNSAFE_NETWORKS = (
ipaddress.ip_network("0.0.0.0/8"),
ipaddress.ip_network("10.0.0.0/8"),
ipaddress.ip_network("100.64.0.0/10"),
ipaddress.ip_network("127.0.0.0/8"),
ipaddress.ip_network("169.254.0.0/16"),
ipaddress.ip_network("172.16.0.0/12"),
ipaddress.ip_network("192.168.0.0/16"),
ipaddress.ip_network("224.0.0.0/4"), # IPv4 multicast — is_global is True in Python
ipaddress.ip_network("::1/128"),
ipaddress.ip_network("fc00::/7"),
ipaddress.ip_network("fe80::/10"),
ipaddress.ip_network("ff00::/8"), # IPv6 multicast
)
PORT_TIMEOUT = 5
PING_TIMEOUT = 5
def is_safe_host(host: str) -> bool:
"""
Return True if ``host`` resolves exclusively to public, globally-routable
IP addresses.
Returns False if any resolved address falls within a private, loopback,
link-local, or otherwise non-routable range. An unresolvable host also
returns False.
"""
try:
results = socket.getaddrinfo(host, None)
except socket.gaierror:
return False
if not results:
return False
for _, _, _, _, sockaddr in results:
try:
ip = ipaddress.ip_address(sockaddr[0])
except ValueError:
return False
# Unwrap IPv4-mapped IPv6 addresses (e.g. ::ffff:127.0.0.1) so they
# are checked against the IPv4 unsafe networks rather than bypassing.
if isinstance(ip, ipaddress.IPv6Address) and ip.ipv4_mapped:
ip = ip.ipv4_mapped
if not ip.is_global or any(ip in net for net in _SSRF_UNSAFE_NETWORKS):
return False
return True
def is_port_open(host: str, port: int) -> bool:
"""
Test if a given port in a host is open.

View File

@@ -2961,70 +2961,6 @@ class TestDashboardApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCas
assert "Content-Disposition" in rv.headers
assert "_example.zip" in rv.headers["Content-Disposition"]
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_export_as_example_data_respects_row_level_filters(self) -> None:
"""
Dashboard API: export_as_example with export_data must apply the
dataset's row-level filters, so a restricted user only receives the
rows they are entitled to (not the full underlying table).
"""
import pandas as pd
from superset.connectors.sqla.models import RowLevelSecurityFilter
table = self.get_table(name="birth_names")
gamma = security_manager.find_role("Gamma")
# birth_names access so the row-level filter (not the access check) is
# what scopes the result, and permission to call the endpoint. Track
# what we grant so the shared test role is restored afterwards.
granted = []
for perm, view in (
("datasource_access", table.perm),
("can_export_as_example", "Dashboard"),
("can_export", "Dashboard"),
):
pvm = security_manager.find_permission_view_menu(perm, view)
if pvm and pvm not in gamma.permissions:
gamma.permissions.append(pvm)
granted.append(pvm)
rls = RowLevelSecurityFilter(
name="export_example_girls_only",
filter_type="Regular",
clause="gender = 'girl'",
group_key="gender",
)
rls.tables.append(table)
rls.roles.append(gamma)
db.session.add(rls)
db.session.commit()
try:
self.login(GAMMA_USERNAME)
dashboard_id = get_dashboards_ids(["births"])[0]
uri = (
f"api/v1/dashboard/{dashboard_id}/export_as_example/"
"?export_data=true&sample_rows=500"
)
rv = self.client.get(uri)
assert rv.status_code == 200
with ZipFile(BytesIO(rv.data)) as zf:
parquet_files = [n for n in zf.namelist() if n.endswith(".parquet")]
assert parquet_files, f"expected a data file: {zf.namelist()}"
for name in parquet_files:
df = pd.read_parquet(BytesIO(zf.read(name)))
if "gender" in df.columns:
unexpected = set(df["gender"].unique()) - {"girl"}
assert not unexpected, (
f"export returned rows outside the configured "
f"row-level filter: {unexpected}"
)
finally:
db.session.delete(rls)
for pvm in granted:
if pvm in gamma.permissions:
gamma.permissions.remove(pvm)
db.session.commit()
@patch("superset.commands.database.importers.v1.utils.add_permissions")
def test_import_dashboard(self, mock_add_permissions):
"""

View File

@@ -17,10 +17,11 @@
import pytest
from pytest_mock import MockerFixture
from superset.commands.chart.exceptions import ChartForbiddenError
from superset.commands.chart.exceptions import ChartForbiddenError, ChartInvalidError
from superset.commands.chart.update import UpdateChartCommand
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
from superset.utils import json
def _ownership_exc() -> SupersetSecurityException:
@@ -151,3 +152,78 @@ def test_update_chart_owner_can_perform_regular_update(
find_by_id.assert_called_once_with(1)
raise_for_ownership.assert_called_once()
def _query_context_payload(datasource: object) -> dict[str, object]:
"""Build a query-context-only update payload targeting ``datasource``."""
return {
"query_context": json.dumps({"datasource": datasource, "queries": []}),
"query_context_generation": True,
}
def test_update_chart_query_context_matching_datasource_is_allowed(
mocker: MockerFixture,
) -> None:
"""A query context that targets the chart's own datasource is accepted."""
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
find_by_id.return_value = mocker.MagicMock(
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
)
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
mocker.patch("superset.commands.chart.update.security_manager.raise_for_access")
UpdateChartCommand(
1, _query_context_payload({"id": 42, "type": "table"})
).validate()
@pytest.mark.parametrize(
"datasource",
[
{"id": 99, "type": "table"}, # different id
{"id": 42, "type": "query"}, # different type
{"id": "99", "type": "table"}, # different id as string
{"id": 42}, # matching id but missing type
],
)
def test_update_chart_query_context_mismatched_datasource_is_rejected(
mocker: MockerFixture,
datasource: dict[str, object],
) -> None:
"""A query context pointing at a different datasource is rejected with a 4xx."""
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
find_by_id.return_value = mocker.MagicMock(
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
)
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
mocker.patch("superset.commands.chart.update.security_manager.raise_for_access")
with pytest.raises(ChartInvalidError):
UpdateChartCommand(1, _query_context_payload(datasource)).validate()
@pytest.mark.parametrize(
"query_context",
[
"{}", # no datasource key
'{"datasource": null}', # null datasource
"not-json", # unparseable payload
],
)
def test_update_chart_query_context_without_datasource_is_allowed(
mocker: MockerFixture,
query_context: str,
) -> None:
"""Payloads with no verifiable datasource fall back to the chart's own."""
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
find_by_id.return_value = mocker.MagicMock(
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
)
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
mocker.patch("superset.commands.chart.update.security_manager.raise_for_access")
UpdateChartCommand(
1,
{"query_context": query_context, "query_context_generation": True},
).validate()

View File

@@ -16,9 +16,6 @@
# under the License.
from __future__ import annotations
from collections.abc import Iterator
from io import BytesIO
from typing import Any
from unittest.mock import MagicMock, patch
from uuid import uuid4
@@ -30,14 +27,10 @@ from superset.commands.dashboard.export_example import (
_make_bytes_generator,
_make_yaml_generator,
export_chart,
export_dataset_data,
export_dataset_yaml,
ExportExampleCommand,
sanitize_filename,
)
from superset.common.db_query_status import QueryStatus
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
def test_sanitize_filename_basic():
@@ -328,98 +321,3 @@ def test_export_example_command_no_data():
assert "dataset.yaml" in files
assert "data.parquet" not in files
assert "dashboard.yaml" in files
def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock:
"""A dataset mock whose query() returns ``rows`` as a DataFrame."""
import pandas as pd
dataset = MagicMock()
dataset.table_name = "private_table"
dataset.database = MagicMock() # truthy
dataset.columns = [
MagicMock(column_name=name, expression=None) for name in ("uid", "data")
]
result = MagicMock()
result.status = QueryStatus.SUCCESS
result.df = pd.DataFrame(rows)
dataset.query.return_value = result
return dataset
@pytest.fixture
def patched_db() -> Iterator[MagicMock]:
"""Patch ``superset.db`` so merge() returns the dataset unchanged."""
with patch("superset.db") as mock_db:
mock_db.session.merge.side_effect = lambda d: d
yield mock_db
def test_export_dataset_data_skips_when_access_denied(patched_db: MagicMock) -> None:
"""
A requester without access to the dataset must get no data file, and the
underlying rows must never be fetched (no fallback raw read).
"""
dataset = _make_data_export_dataset([{"uid": 1, "data": "secret"}])
dataset.raise_for_access.side_effect = SupersetSecurityException(
SupersetError(
message="denied",
error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
level=ErrorLevel.ERROR,
)
)
assert export_dataset_data(dataset) is None
dataset.raise_for_access.assert_called_once()
dataset.query.assert_not_called()
def test_export_dataset_data_fetches_through_query_path(patched_db: MagicMock) -> None:
"""
Rows are fetched through the dataset's own query builder (which applies the
per-row filters), and exactly those rows are what gets written to Parquet.
"""
import pandas as pd
own_rows = [{"uid": 6, "data": "row-for-user-6"}]
dataset = _make_data_export_dataset(own_rows)
payload = export_dataset_data(dataset)
assert payload is not None
dataset.raise_for_access.assert_called_once()
dataset.query.assert_called_once()
# The fetch goes through query() with a column projection, not a raw read.
query_obj = dataset.query.call_args.args[0]
assert query_obj["columns"] == ["uid", "data"]
assert query_obj["is_timeseries"] is False
assert "row_limit" in query_obj
# Only the rows query() returned are exported.
exported = pd.read_parquet(BytesIO(payload))
assert exported.to_dict("records") == own_rows
def test_export_dataset_data_applies_row_limit_at_query_level(
patched_db: MagicMock,
) -> None:
"""sample_rows is passed as a SQL-level row_limit, not applied post-fetch."""
dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}])
export_dataset_data(dataset, sample_rows=5)
query_obj = dataset.query.call_args.args[0]
assert query_obj["row_limit"] == 5
def test_export_dataset_data_skips_on_failed_query(patched_db: MagicMock) -> None:
"""
The query path signals failure via status rather than raising, so a failed
query must yield no data file instead of an empty/partial Parquet.
"""
dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}])
dataset.query.return_value.status = QueryStatus.FAILED
assert export_dataset_data(dataset) is None

View File

@@ -1821,37 +1821,3 @@ def test_get_df_payload_no_warning_when_not_memory_limited() -> None:
result = processor.get_df_payload(query_obj, force_cached=False)
assert result["warning"] is None
def test_raise_for_access_evaluates_access_before_validate():
"""
Access must be evaluated before the queries are validated, because query
validation renders the request's filter expressions. When access is denied,
no query is validated (so caller-supplied input is never rendered).
"""
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
from superset.utils.core import DatasourceType
query = MagicMock()
query_context = MagicMock()
query_context.queries = [query]
query_context.datasource.type = DatasourceType.TABLE
processor = QueryContextProcessor(query_context)
denied = SupersetSecurityException(
SupersetError(
message="denied",
error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
level=ErrorLevel.ERROR,
)
)
with patch(
"superset.common.query_context_processor.security_manager.raise_for_access",
side_effect=denied,
):
with pytest.raises(SupersetSecurityException):
processor.raise_for_access()
query.validate.assert_not_called()

View File

@@ -22,7 +22,6 @@ import re
import uuid
from typing import Any
from unittest.mock import Mock, patch
from urllib import request
import pytest
from flask import current_app
@@ -810,20 +809,16 @@ def test_import_dataset_extra_empty_string(
assert sqla_table.extra is None # noqa: E711
@patch("superset.commands.dataset.importers.v1.utils.is_safe_host", return_value=True)
@patch("superset.commands.dataset.importers.v1.utils.request.build_opener")
@patch("superset.commands.dataset.importers.v1.utils.request.urlopen")
def test_import_column_allowed_data_url(
mock_build_opener: Mock,
mock_is_safe_host: Mock,
mock_urlopen: Mock,
mocker: MockerFixture,
session: Session,
) -> None:
"""
Test importing a dataset when using data key to fetch data from a URL.
"""
mock_opener = Mock()
mock_opener.open.return_value = io.StringIO("col1\nvalue1\nvalue2\n")
mock_build_opener.return_value = mock_opener
mock_urlopen.return_value = io.StringIO("col1\nvalue1\nvalue2\n")
mocker.patch.object(security_manager, "can_access", return_value=True)
@@ -1051,134 +1046,10 @@ def test_import_dataset_access_check(
(["*"], "https://host1.domain3.com/data.csv", False, re.error),
],
)
def test_validate_data_uri(
allowed_urls: list[str],
data_uri: str,
expected: bool,
exception_class: type[Exception] | None,
) -> None:
"""Tests allowlist pattern matching. is_safe_host is stubbed out so that
fake/unresolvable test hostnames do not interfere with DNS-based checks
(those are covered by the dedicated is_safe_host tests below)."""
def test_validate_data_uri(allowed_urls, data_uri, expected, exception_class):
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = allowed_urls
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
with patch(
"superset.commands.dataset.importers.v1.utils.is_safe_host",
return_value=True,
):
if expected:
validate_data_uri(data_uri)
else:
with pytest.raises(exception_class):
validate_data_uri(data_uri)
def test_validate_data_uri_file_scheme_examples_allowed() -> None:
"""file:// URIs pointing inside the examples folder are permitted."""
import os
from superset.examples.helpers import get_examples_folder
examples_folder = get_examples_folder()
uri_in_examples = (
f"file://{os.path.join(examples_folder, 'birth_names', 'data.parquet')}"
)
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = []
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
# Should not raise
validate_data_uri(uri_in_examples)
def test_validate_data_uri_file_scheme_outside_examples_blocked() -> None:
"""file:// URIs outside the examples folder are blocked."""
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = []
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
with pytest.raises(DatasetForbiddenDataURI):
validate_data_uri("file:///etc/passwd")
@pytest.mark.parametrize(
"data_uri",
["FiLe:///etc/passwd", "FILE:///etc/passwd", "file:/etc/passwd"],
)
def test_validate_data_uri_file_scheme_case_insensitive(data_uri: str) -> None:
"""Mixed-case / single-slash file URIs still go through the sandbox check
and are blocked when outside the examples folder, so they cannot skip the
local-file check via a case-sensitive scheme gate."""
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = []
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
with pytest.raises(DatasetForbiddenDataURI):
if expected:
validate_data_uri(data_uri)
@pytest.mark.parametrize(
"data_uri",
[
# Userinfo-injection: allowlist matches the trusted hostname in the
# authority but urlparse().hostname resolves to the actual target.
"https://allowed.example.com@169.254.169.254/latest/meta-data/",
"https://allowed.example.com@10.0.0.1/internal",
"https://allowed.example.com@127.0.0.1/admin",
],
)
def test_validate_data_uri_blocks_userinfo_ssrf_injection(data_uri: str) -> None:
"""Userinfo-injected private IPs must be rejected even when the leading
hostname matches an allowlist pattern."""
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
with patch(
"superset.commands.dataset.importers.v1.utils.is_safe_host",
return_value=False,
):
with pytest.raises(DatasetForbiddenDataURI):
else:
with pytest.raises(exception_class):
validate_data_uri(data_uri)
def test_validate_data_uri_allow_internal_flag_bypasses_host_check() -> None:
"""When DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS is True, internal hosts
must be permitted to support air-gapped / on-premises deployments."""
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = True
with patch(
"superset.commands.dataset.importers.v1.utils.is_safe_host",
return_value=False,
) as mock_check:
validate_data_uri("http://10.0.0.5/data.csv")
mock_check.assert_not_called()
def test_validate_data_uri_no_hostname_raises() -> None:
"""A URI that produces no parseable hostname (e.g. opaque data: URIs) must
be rejected — fail-closed: no hostname means no safe host confirmation."""
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
# urlparse("data:text/csv,...").hostname is None, which fails the
# "not hostname or not is_safe_host(hostname)" guard.
with pytest.raises(DatasetForbiddenDataURI):
validate_data_uri("data:text/csv,col1,col2")
def test_redirect_handler_blocks_disallowed_redirect_target() -> None:
"""The redirect handler must reject a redirect to a disallowed host by
re-running validate_data_uri() on the new URL before following it."""
from superset.commands.dataset.importers.v1.utils import (
_ValidatingRedirectHandler,
)
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
handler = _ValidatingRedirectHandler()
with patch(
"superset.commands.dataset.importers.v1.utils.is_safe_host",
return_value=False,
):
with pytest.raises(DatasetForbiddenDataURI):
handler.redirect_request(
request.Request("http://public.example.com/data.csv"),
None,
302,
"Found",
{},
"http://169.254.169.254/latest/meta-data/",
)

View File

@@ -1,476 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Tests for cancel_query_id validation to prevent SQL/command injection.
These tests verify that the validate_cancel_query_id method properly
rejects malicious input that could be used for SQL injection attacks.
"""
from unittest.mock import Mock, patch
class TestValidateCancelQueryId:
"""Tests for BaseEngineSpec.validate_cancel_query_id"""
def test_validate_cancel_query_id_valid_numeric(self) -> None:
"""Test that valid numeric IDs pass validation"""
from superset.db_engine_specs.base import BaseEngineSpec
assert BaseEngineSpec.validate_cancel_query_id("12345") is True
assert BaseEngineSpec.validate_cancel_query_id("1") is True
assert BaseEngineSpec.validate_cancel_query_id("999999999") is True
def test_validate_cancel_query_id_invalid_sql_injection(self) -> None:
"""Test that SQL injection payloads are rejected"""
from superset.db_engine_specs.base import BaseEngineSpec
# Common SQL injection payloads
assert (
BaseEngineSpec.validate_cancel_query_id("1; DROP TABLE users; --") is False
)
assert BaseEngineSpec.validate_cancel_query_id("1' OR '1'='1") is False
assert (
BaseEngineSpec.validate_cancel_query_id("1 UNION SELECT * FROM users")
is False
)
assert BaseEngineSpec.validate_cancel_query_id("1; DELETE FROM users;") is False
def test_validate_cancel_query_id_invalid_special_chars(self) -> None:
"""Test that special characters are rejected"""
from superset.db_engine_specs.base import BaseEngineSpec
assert BaseEngineSpec.validate_cancel_query_id("123&admin=true") is False
assert BaseEngineSpec.validate_cancel_query_id("123#fragment") is False
assert BaseEngineSpec.validate_cancel_query_id("123\n456") is False
assert BaseEngineSpec.validate_cancel_query_id("123\r\nHost: evil") is False
def test_validate_cancel_query_id_none(self) -> None:
"""Test that None is rejected"""
from superset.db_engine_specs.base import BaseEngineSpec
assert BaseEngineSpec.validate_cancel_query_id(None) is False
def test_validate_cancel_query_id_empty_string(self) -> None:
"""Test that empty string is rejected"""
from superset.db_engine_specs.base import BaseEngineSpec
assert BaseEngineSpec.validate_cancel_query_id("") is False
def test_validate_cancel_query_id_custom_pattern(self) -> None:
"""Test custom regex patterns"""
from superset.db_engine_specs.base import BaseEngineSpec
# Hex pattern with exact length (for Impala - 16 hex chars per side)
assert (
BaseEngineSpec.validate_cancel_query_id(
"abc123def4567890:789abc123def4567", r"[A-Fa-f0-9]{16}:[A-Fa-f0-9]{16}"
)
is True
)
assert (
BaseEngineSpec.validate_cancel_query_id(
"invalid:pattern!", r"[A-Fa-f0-9]{16}:[A-Fa-f0-9]{16}"
)
is False
)
# Alphanumeric with underscores (for Trino)
assert (
BaseEngineSpec.validate_cancel_query_id(
"20240101_123456_00001_abcde", r"[a-zA-Z0-9_]+"
)
is True
)
assert (
BaseEngineSpec.validate_cancel_query_id("20240101-123456", r"[a-zA-Z0-9_]+")
is False
)
class TestMySQLCancelQueryValidation:
"""Tests for MySQL cancel_query input validation"""
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
"""Test that valid MySQL connection ID works"""
from superset.db_engine_specs.mysql import MySQLEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "12345") is True
cursor_mock.execute.assert_called_once_with("KILL CONNECTION 12345")
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
"""Test that SQL injection is blocked"""
from superset.db_engine_specs.mysql import MySQLEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
# SQL injection payload should be rejected before execute is called
result = MySQLEngineSpec.cancel_query(
cursor_mock, query, "1; DROP TABLE users; --"
)
assert result is False
cursor_mock.execute.assert_not_called()
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_non_numeric_blocked(self, engine_mock: Mock) -> None:
"""Test that non-numeric IDs are blocked"""
from superset.db_engine_specs.mysql import MySQLEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "abc") is False
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "12.34") is False
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "-123") is False
cursor_mock.execute.assert_not_called()
class TestSingleStoreCancelQueryValidation:
"""Tests for SingleStore cancel_query input validation"""
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
"""Test valid SingleStore connection ID format (two space-separated integers)"""
from superset.db_engine_specs.singlestore import SingleStoreSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
# Single integer should work
assert SingleStoreSpec.cancel_query(cursor_mock, query, "12345") is True
# Two space-separated integers should work
cursor_mock.reset_mock()
assert SingleStoreSpec.cancel_query(cursor_mock, query, "12345 67890") is True
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
"""Test that SQL injection is blocked"""
from superset.db_engine_specs.singlestore import SingleStoreSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
result = SingleStoreSpec.cancel_query(
cursor_mock, query, "1; DROP TABLE users; --"
)
assert result is False
cursor_mock.execute.assert_not_called()
class TestPostgresCancelQueryValidation:
"""Tests for PostgreSQL cancel_query input validation"""
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
"""Test that valid PostgreSQL PID works"""
from superset.db_engine_specs.postgres import PostgresEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
assert PostgresEngineSpec.cancel_query(cursor_mock, query, "12345") is True
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
"""Test that SQL injection is blocked"""
from superset.db_engine_specs.postgres import PostgresEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
# SQL injection in WHERE clause
result = PostgresEngineSpec.cancel_query(cursor_mock, query, "1' OR '1'='1")
assert result is False
cursor_mock.execute.assert_not_called()
class TestRedshiftCancelQueryValidation:
"""Tests for Redshift cancel_query input validation"""
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
"""Test that valid Redshift PID works"""
from superset.db_engine_specs.redshift import RedshiftEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
assert RedshiftEngineSpec.cancel_query(cursor_mock, query, "12345") is True
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
"""Test that SQL injection is blocked"""
from superset.db_engine_specs.redshift import RedshiftEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
result = RedshiftEngineSpec.cancel_query(
cursor_mock, query, "1; DROP TABLE users; --"
)
assert result is False
cursor_mock.execute.assert_not_called()
class TestSnowflakeCancelQueryValidation:
"""Tests for Snowflake cancel_query input validation"""
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
"""Test that valid Snowflake session ID works"""
from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
# Snowflake session IDs are alphanumeric (VARCHAR)
assert (
SnowflakeEngineSpec.cancel_query(cursor_mock, query, "34359980038") is True
)
cursor_mock.reset_mock()
# Also test alphanumeric (per Snowflake docs)
assert (
SnowflakeEngineSpec.cancel_query(cursor_mock, query, "ABC123def456") is True
)
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
"""Test that SQL injection is blocked"""
from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
result = SnowflakeEngineSpec.cancel_query(
cursor_mock, query, "1); DROP TABLE users; --"
)
assert result is False
cursor_mock.execute.assert_not_called()
class TestTrinoCancelQueryValidation:
"""Tests for Trino cancel_query input validation"""
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
"""Test that valid Trino query ID works"""
from superset.db_engine_specs.trino import TrinoEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
# Trino query IDs are alphanumeric with underscores
assert (
TrinoEngineSpec.cancel_query(
cursor_mock, query, "20240101_123456_00001_abcde"
)
is True
)
@patch("sqlalchemy.engine.Engine.connect")
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
"""Test that SQL injection is blocked"""
from superset.db_engine_specs.trino import TrinoEngineSpec
from superset.models.sql_lab import Query
query = Query()
cursor_mock = engine_mock.return_value.__enter__.return_value
result = TrinoEngineSpec.cancel_query(
cursor_mock, query, "query_id'); DROP TABLE users; --"
)
assert result is False
cursor_mock.execute.assert_not_called()
class TestImpalaCancelQueryValidation:
"""Tests for Impala cancel_query input validation"""
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
@patch("requests.post")
def test_cancel_query_valid_id(
self, requests_mock: Mock, safe_host_mock: Mock
) -> None:
"""Test that valid Impala query ID works"""
from superset.db_engine_specs.impala import ImpalaEngineSpec
from superset.models.core import Database
from superset.models.sql_lab import Query
# Mock the database and query. The host-safety check (added alongside
# the cancel call) is mocked as allowed here so the test stays focused
# on query-ID validation; host validation is covered separately.
mock_db = Mock(spec=Database)
mock_db.url_object.host = "impala-host"
query = Mock(spec=Query)
query.database = mock_db
requests_mock.return_value.status_code = 200
# Valid Impala query ID format: 16 hex chars per side
result = ImpalaEngineSpec.cancel_query(
None, query, "abc123def4567890:789abc123def4567"
)
assert result is True
requests_mock.assert_called_once()
# Also test uppercase hex (should be valid)
requests_mock.reset_mock()
requests_mock.return_value.status_code = 200
result = ImpalaEngineSpec.cancel_query(
None, query, "ABC123DEF4567890:789ABC123DEF4567"
)
assert result is True
@patch("requests.post")
def test_cancel_query_url_injection_blocked(self, requests_mock: Mock) -> None:
"""Test that URL injection is blocked"""
from superset.db_engine_specs.impala import ImpalaEngineSpec
from superset.models.core import Database
from superset.models.sql_lab import Query
mock_db = Mock(spec=Database)
mock_db.url_object.host = "impala-host"
query = Mock(spec=Query)
query.database = mock_db
# URL injection payloads should be rejected
result = ImpalaEngineSpec.cancel_query(None, query, "abc123&admin=true")
assert result is False
requests_mock.assert_not_called()
result = ImpalaEngineSpec.cancel_query(None, query, "abc123#fragment")
assert result is False
requests_mock.assert_not_called()
@patch("requests.post")
def test_cancel_query_invalid_format_blocked(self, requests_mock: Mock) -> None:
"""Test that invalid format is blocked"""
from superset.db_engine_specs.impala import ImpalaEngineSpec
from superset.models.core import Database
from superset.models.sql_lab import Query
mock_db = Mock(spec=Database)
mock_db.url_object.host = "impala-host"
query = Mock(spec=Query)
query.database = mock_db
# Missing colon
assert ImpalaEngineSpec.cancel_query(None, query, "abc123def4567890") is False
# Wrong length (too short)
assert ImpalaEngineSpec.cancel_query(None, query, "abc:def") is False
# Wrong length (too long)
assert (
ImpalaEngineSpec.cancel_query(
None, query, "abc123def45678901:789abc123def45678"
)
is False
)
# Special characters
assert ImpalaEngineSpec.cancel_query(None, query, "abc!:def@ghijklmn") is False
# Non-hex characters
assert (
ImpalaEngineSpec.cancel_query(
None, query, "ghijklmnopqrstuv:ghijklmnopqrstuv"
)
is False
)
requests_mock.assert_not_called()
@patch("requests.post")
def test_cancel_query_null_host_blocked(self, requests_mock: Mock) -> None:
"""Test that missing host returns False"""
from superset.db_engine_specs.impala import ImpalaEngineSpec
from superset.models.core import Database
from superset.models.sql_lab import Query
mock_db = Mock(spec=Database)
mock_db.url_object.host = None # Null host
query = Mock(spec=Query)
query.database = mock_db
# Valid query ID but null host should fail
result = ImpalaEngineSpec.cancel_query(
None, query, "abc123def4567890:789abc123def4567"
)
assert result is False
requests_mock.assert_not_called()
# Also test empty string host
mock_db.url_object.host = ""
result = ImpalaEngineSpec.cancel_query(
None, query, "abc123def4567890:789abc123def4567"
)
assert result is False
requests_mock.assert_not_called()
class TestOcientCancelQueryValidation:
"""Tests for Ocient cancel_query input validation.
Ocient validates the query ID it looked up from ``query_id_mapping`` (which
originates from the database), so these tests seed that mapping directly.
"""
def test_cancel_query_valid_id(self) -> None:
"""Test that a valid mapped Ocient query ID is cancelled"""
from superset.db_engine_specs.ocient import OcientEngineSpec
from superset.models.sql_lab import Query
query = Mock(spec=Query)
query.id = "ocient-valid"
cursor_mock = Mock()
OcientEngineSpec.query_id_mapping[query.id] = "abc_123-DEF"
try:
assert OcientEngineSpec.cancel_query(cursor_mock, query, "") is True
cursor_mock.execute.assert_called_once_with("CANCEL abc_123-DEF")
finally:
OcientEngineSpec.query_id_mapping.pop(query.id, None)
def test_cancel_query_sql_injection_blocked(self) -> None:
"""Test that a malicious mapped query ID is rejected before execute"""
from superset.db_engine_specs.ocient import OcientEngineSpec
from superset.models.sql_lab import Query
query = Mock(spec=Query)
query.id = "ocient-injection"
cursor_mock = Mock()
OcientEngineSpec.query_id_mapping[query.id] = "1; DROP TABLE users; --"
try:
assert OcientEngineSpec.cancel_query(cursor_mock, query, "") is False
cursor_mock.execute.assert_not_called()
finally:
OcientEngineSpec.query_id_mapping.pop(query.id, None)

View File

@@ -60,13 +60,11 @@ def test_get_cancel_query_id() -> None:
)
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
@patch("requests.post")
def test_cancel_query(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
def test_cancel_query(post_mock: Mock) -> None:
query = Query()
database = Database(
database_name="test_impala",
sqlalchemy_uri="impala://impala.example.com:21050/default",
database_name="test_impala", sqlalchemy_uri="impala://localhost:21050/default"
)
query.database = database
@@ -77,20 +75,17 @@ def test_cancel_query(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
post_mock.assert_called_once_with(
"http://impala.example.com:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
"http://localhost:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
timeout=3,
allow_redirects=False,
)
assert result is True
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
@patch("requests.post")
def test_cancel_query_failed(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
def test_cancel_query_failed(post_mock: Mock) -> None:
query = Query()
database = Database(
database_name="test_impala",
sqlalchemy_uri="impala://impala.example.com:21050/default",
database_name="test_impala", sqlalchemy_uri="impala://localhost:21050/default"
)
query.database = database
@@ -101,20 +96,17 @@ def test_cancel_query_failed(post_mock: Mock, _safe_host: Mock) -> None: # noqa
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
post_mock.assert_called_once_with(
"http://impala.example.com:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
"http://localhost:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
timeout=3,
allow_redirects=False,
)
assert result is False
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
@patch("requests.post")
def test_cancel_query_exception(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
def test_cancel_query_exception(post_mock: Mock) -> None:
query = Query()
database = Database(
database_name="test_impala",
sqlalchemy_uri="impala://impala.example.com:21050/default",
database_name="test_impala", sqlalchemy_uri="impala://localhost:21050/default"
)
query.database = database
@@ -123,52 +115,3 @@ def test_cancel_query_exception(post_mock: Mock, _safe_host: Mock) -> None: # n
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
assert result is False
@patch("requests.post")
def test_cancel_query_blocks_internal_host(post_mock: Mock, app_context: None) -> None:
"""A private/internal Impala host is refused by default (no HTTP call)."""
query = Query()
database = Database(
database_name="test_impala",
sqlalchemy_uri="impala://169.254.169.254:21050/default",
)
query.database = database
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
assert result is False
post_mock.assert_not_called()
@patch("requests.post")
def test_cancel_query_allows_internal_host_with_opt_out(
post_mock: Mock, app_context: None
) -> None:
"""IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS=True permits internal targets."""
from flask import current_app
query = Query()
database = Database(
database_name="test_impala",
sqlalchemy_uri="impala://10.0.0.5:21050/default",
)
query.database = database
response_mock = Mock()
response_mock.status_code = 200
post_mock.return_value = response_mock
original = current_app.config.get("IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS")
current_app.config["IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS"] = True
try:
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
finally:
current_app.config["IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS"] = original
post_mock.assert_called_once_with(
"http://10.0.0.5:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
timeout=3,
allow_redirects=False,
)
assert result is True

View File

@@ -25,7 +25,6 @@ from flask import current_app
from flask_appbuilder.security.sqla.models import Role
from freezegun import freeze_time
from jinja2 import DebugUndefined
from jinja2.exceptions import SecurityError
from jinja2.sandbox import SandboxedEnvironment
from pytest_mock import MockerFixture
from sqlalchemy.dialects import mysql
@@ -993,40 +992,6 @@ def test_metric_macro_expansion(mocker: MockerFixture) -> None:
assert processor.process_template("{{ metric('c') }}") == "42"
def test_metric_macro_does_not_expose_environment(mocker: MockerFixture) -> None:
"""
A template must not be able to read the template environment through the
``metric`` macro's bound arguments.
"""
database = Database(id=1, database_name="my_database", sqlalchemy_uri="sqlite://")
mock_g = mocker.patch("superset.jinja_context.g")
mock_g.form_data = {"datasource": {"id": 1}}
processor = get_template_processor(database=database)
# Attribute access on the macro's partial is denied, so a reference to its
# bound args resolves to an undefined and any further use raises instead of
# yielding the environment object.
with pytest.raises(SecurityError):
processor.process_template("{{ metric.args[1] }}")
with pytest.raises(SecurityError):
processor.process_template(
"{{ metric.args[1].template_class.environment_class() }}"
)
def test_supersetsandboxedenvironment_denies_unsafe_attributes() -> None:
"""is_safe_attribute denies env/template class attrs and all attrs on partials."""
from functools import partial
from superset.jinja_context import SupersetSandboxedEnvironment
env = SupersetSandboxedEnvironment()
assert env.is_safe_attribute(env, "environment_class", None) is False
assert env.is_safe_attribute(env, "template_class", None) is False
macro = partial(lambda value: value, 1)
assert env.is_safe_attribute(macro, "args", macro.args) is False
assert env.is_safe_attribute(macro, "func", macro.func) is False
def test_metric_macro_recursive_compound(mocker: MockerFixture) -> None:
"""
Test the ``metric_macro`` when the definition is compound.

View File

@@ -223,49 +223,3 @@ def test_send_http_only_https_check(monkeypatch, mock_header_data) -> None:
with pytest.raises(NotificationParamException, match="HTTPS is required by config"):
webhook_notification.send()
def test_send_treats_redirect_as_failure(monkeypatch, mock_header_data) -> None:
"""
A 3xx response is a failure: redirects are not followed
(allow_redirects=False), so the request never reached the final target and
must not be reported as success.
"""
from superset.reports.models import ReportRecipients, ReportRecipientType
from superset.reports.notifications.base import NotificationContent
content = NotificationContent(
name="test alert", header_data=mock_header_data, description="Test description"
)
webhook_notification = WebhookNotification(
recipient=ReportRecipients(
type=ReportRecipientType.WEBHOOK,
recipient_config_json='{"target": "https://example.com/webhook"}',
),
content=content,
)
class MockCurrentApp:
config = {
"ALERT_REPORTS_WEBHOOK_HTTPS_ONLY": True,
"ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS": True,
}
class MockResponse:
status_code = 302
text = ""
monkeypatch.setattr(
"superset.reports.notifications.webhook.current_app", MockCurrentApp
)
monkeypatch.setattr(
"superset.reports.notifications.webhook.feature_flag_manager.is_feature_enabled",
lambda flag: True,
)
monkeypatch.setattr(
"superset.reports.notifications.webhook.requests.post",
lambda *args, **kwargs: MockResponse(),
)
with pytest.raises(NotificationParamException, match="redirect"):
webhook_notification.send()

View File

@@ -19,7 +19,6 @@
import json # noqa: TID251
from types import SimpleNamespace
from typing import Any
import pytest
from flask_appbuilder.security.sqla.models import Role, User
@@ -657,188 +656,16 @@ def test_query_context_modified_tampered(
assert query_context_modified(query_context)
def _native_filter_ctx(
mocker: MockerFixture,
queries: list[Any],
*,
native_filter_id: str | None = "F1",
dashboard_id: int | None = 10,
dataset_id: int = 20,
targets: list[Any] | None = None,
control_values: dict[str, Any] | None = None,
) -> Any:
"""Build a native-filter query context (no slice_) + patched dashboard."""
if targets is None:
targets = [{"datasetId": dataset_id, "column": {"name": "region"}}]
qc = mocker.MagicMock()
qc.slice_ = None
qc.form_data = {
"type": "NATIVE_FILTER",
"native_filter_id": native_filter_id,
"dashboardId": dashboard_id,
}
qc.datasource.data = {"id": dataset_id}
qc.queries = queries
dash = mocker.MagicMock()
dash.json_metadata = json.dumps(
{
"native_filter_configuration": [
{
"id": "F1",
"targets": targets,
"controlValues": control_values or {},
}
]
}
)
query_chain = mocker.patch("superset.db.session.query")
query_chain.return_value.filter.return_value.one_or_none.return_value = dash
return qc
def test_query_context_modified_native_filter_target_column_allowed(
mocker: MockerFixture,
) -> None:
"""A native-filter request reading only its target column is allowed."""
query = SimpleNamespace(columns=["region"], metrics=[], groupby=[])
qc = _native_filter_ctx(mocker, [query])
assert not query_context_modified(qc)
def test_query_context_modified_native_filter_arbitrary_column_blocked(
mocker: MockerFixture,
) -> None:
"""A native-filter request reading a non-target column is modified."""
query = SimpleNamespace(columns=["ssn"], metrics=[], groupby=[])
qc = _native_filter_ctx(mocker, [query])
assert query_context_modified(qc)
def test_query_context_modified_native_filter_simple_metric_on_target_allowed(
mocker: MockerFixture,
) -> None:
"""A range-style request (simple aggregate over the target column) is allowed."""
query = SimpleNamespace(
columns=[],
metrics=[
{
"expressionType": "SIMPLE",
"column": {"column_name": "region"},
"aggregate": "MIN",
}
],
groupby=[],
)
qc = _native_filter_ctx(mocker, [query])
assert not query_context_modified(qc)
def test_query_context_modified_native_filter_adhoc_metric_blocked(
mocker: MockerFixture,
) -> None:
"""A free-form SQL metric on the native-filter path is modified."""
query = SimpleNamespace(
columns=[],
metrics=[{"expressionType": "SQL", "sqlExpression": "SUM(salary)"}],
groupby=[],
)
qc = _native_filter_ctx(mocker, [query])
assert query_context_modified(qc)
def test_query_context_modified_native_filter_adhoc_column_blocked(
mocker: MockerFixture,
) -> None:
"""An adhoc (free-form SQL) column on the native-filter path is modified."""
query = SimpleNamespace(
columns=[{"sqlExpression": "ssn", "label": "x"}], metrics=[], groupby=[]
)
qc = _native_filter_ctx(mocker, [query])
assert query_context_modified(qc)
def test_query_context_modified_native_filter_no_filter_context_blocked(
mocker: MockerFixture,
) -> None:
"""Without a native_filter_id / dashboardId the request fails closed."""
query = SimpleNamespace(columns=["region"], metrics=[], groupby=[])
qc = _native_filter_ctx(mocker, [query], native_filter_id=None)
assert query_context_modified(qc)
def test_query_context_modified_native_filter_configured_sort_metric_allowed(
mocker: MockerFixture,
) -> None:
"""A value lookup sorted by the filter's configured saved metric is allowed."""
query = SimpleNamespace(
columns=["region"],
metrics=["total"],
groupby=[],
orderby=[["total", True]],
)
qc = _native_filter_ctx(mocker, [query], control_values={"sortMetric": "total"})
assert not query_context_modified(qc)
def test_query_context_modified_native_filter_arbitrary_saved_metric_blocked(
mocker: MockerFixture,
) -> None:
"""A saved metric other than the filter's configured sort metric is modified."""
query = SimpleNamespace(columns=["region"], metrics=["salary_total"], groupby=[])
qc = _native_filter_ctx(mocker, [query], control_values={"sortMetric": "total"})
assert query_context_modified(qc)
def test_query_context_modified_native_filter_orderby_arbitrary_column_blocked(
mocker: MockerFixture,
) -> None:
"""Ordering by a non-target column on the native-filter path is modified."""
query = SimpleNamespace(
columns=["region"], metrics=[], groupby=[], orderby=[["ssn", True]]
)
qc = _native_filter_ctx(mocker, [query])
assert query_context_modified(qc)
def test_query_context_modified_native_filter_orderby_adhoc_blocked(
mocker: MockerFixture,
) -> None:
"""Ordering by a free-form SQL expression on the native-filter path is modified."""
query = SimpleNamespace(
columns=["region"],
metrics=[],
groupby=[],
orderby=[[{"sqlExpression": "ssn"}, True]],
)
qc = _native_filter_ctx(mocker, [query])
assert query_context_modified(qc)
def test_query_context_modified_chartless_non_native_filter_allowed(
mocker: MockerFixture,
) -> None:
def test_query_context_modified_native_filter(mocker: MockerFixture) -> None:
"""
A chartless request that is not a native filter (drill-to-detail, drill-by,
samples) is validated by the datasource-access checks in raise_for_access and
is not constrained here.
"""
qc = mocker.MagicMock()
qc.slice_ = None
qc.form_data = {"dashboardId": 10, "slice_id": 0, "groupby": ["ssn"]}
assert not query_context_modified(qc)
Test the `query_context_modified` function with a native filter request.
A native filter request has no chart (slice) associated with it.
"""
query_context = mocker.MagicMock()
query_context.slice_ = None
def test_query_context_modified_native_filter_without_type_marker_blocked(
mocker: MockerFixture,
) -> None:
"""
A request identified by native_filter_id is constrained even when the
NATIVE_FILTER type marker is absent.
"""
query = SimpleNamespace(columns=["ssn"], metrics=[], groupby=[])
qc = _native_filter_ctx(mocker, [query])
del qc.form_data["type"]
assert query_context_modified(qc)
assert not query_context_modified(query_context)
def test_query_context_modified_mixed_chart(mocker: MockerFixture) -> None:

View File

@@ -1,133 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import patch
import pytest
from superset.utils.network import is_safe_host
@pytest.mark.parametrize(
("resolved_ip", "expected"),
[
# Public IPs → safe
("93.184.216.34", True), # example.com
("8.8.8.8", True), # Google DNS
("2606:2800:220:1:248:1893:25c8:1946", True), # example.com IPv6
# Loopback → unsafe
("127.0.0.1", False),
("::1", False),
# RFC-1918 private ranges → unsafe
("10.0.0.1", False),
("10.255.255.255", False),
("172.16.0.1", False),
("172.31.255.255", False),
("192.168.0.1", False),
("192.168.255.255", False),
# Link-local / IMDS → unsafe
("169.254.169.254", False), # AWS/GCP/Azure metadata
("169.254.0.1", False),
# 0.0.0.0/8 → unsafe
("0.0.0.1", False),
# IPv4 multicast (224.0.0.0/4) → unsafe; ip.is_global returns True for
# multicast in Python, so the explicit blocklist entry is the only guard.
("224.0.0.1", False),
("239.255.255.255", False),
# IPv6 private → unsafe
("fc00::1", False),
("fe80::1", False),
# IPv6 multicast (ff00::/8) → unsafe
("ff02::1", False),
("ff0e::1", False),
],
)
def test_is_safe_host_ip_classification(resolved_ip: str, expected: bool) -> None:
"""Hosts resolving to private/internal IPs must be rejected."""
with patch(
"superset.utils.network.socket.getaddrinfo",
return_value=[(None, None, None, None, (resolved_ip, 0))],
):
assert is_safe_host("any-hostname") is expected
def test_is_safe_host_unresolvable_returns_false() -> None:
"""Unresolvable hostnames must return False (fail-closed)."""
import socket
with patch(
"superset.utils.network.socket.getaddrinfo",
side_effect=socket.gaierror("Name or service not known"),
):
assert is_safe_host("nonexistent.invalid") is False
def test_is_safe_host_empty_results_returns_false() -> None:
"""An empty getaddrinfo result must return False (fail-closed)."""
with patch(
"superset.utils.network.socket.getaddrinfo",
return_value=[],
):
assert is_safe_host("empty-result.example.com") is False
def test_is_safe_host_malformed_sockaddr_returns_false() -> None:
"""A sockaddr that cannot be parsed as an IP address must return False."""
with patch(
"superset.utils.network.socket.getaddrinfo",
return_value=[(None, None, None, None, ("not-an-ip", 0))],
):
assert is_safe_host("malformed") is False
def test_is_safe_host_rejects_if_any_ip_is_private() -> None:
"""A hostname that resolves to both a public and a private IP (split-DNS
or multi-homed host) must be rejected — all resolved IPs must be safe."""
with patch(
"superset.utils.network.socket.getaddrinfo",
return_value=[
(None, None, None, None, ("8.8.8.8", 0)),
(None, None, None, None, ("10.0.0.1", 0)), # private — must fail
],
):
assert is_safe_host("dual-homed.example.com") is False
@pytest.mark.parametrize(
"mapped_ip",
[
"::ffff:127.0.0.1", # loopback via IPv4-mapped IPv6
"::ffff:10.0.0.1", # RFC-1918 via IPv4-mapped IPv6
"::ffff:169.254.169.254", # link-local via IPv4-mapped IPv6
],
)
def test_is_safe_host_rejects_ipv4_mapped_ipv6(mapped_ip: str) -> None:
"""IPv4-mapped IPv6 addresses must be unwrapped and checked against the
IPv4 unsafe networks — not treated as safe IPv6 addresses."""
with patch(
"superset.utils.network.socket.getaddrinfo",
return_value=[(None, None, None, None, (mapped_ip, 0, 0, 0))],
):
assert is_safe_host("mapped") is False
def test_is_safe_host_rejects_cgnat_range() -> None:
"""100.64.0.0/10 (RFC 6598 shared address space) must be rejected."""
with patch(
"superset.utils.network.socket.getaddrinfo",
return_value=[(None, None, None, None, ("100.100.100.200", 0))],
):
assert is_safe_host("cgnat-host") is False