mirror of
https://github.com/apache/superset.git
synced 2026-06-13 19:49:18 +00:00
Compare commits
2 Commits
master
...
fix/chart-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c8b9e990ae | ||
|
|
1c438a57d4 |
@@ -24,15 +24,6 @@ assists people when migrating to a new version.
|
||||
|
||||
## Next
|
||||
|
||||
### Webhook alerts/reports block private/internal hosts by default
|
||||
|
||||
Webhook alert/report dispatch (`WebhookNotification.send`) now validates the target URL's host against the same private/internal-IP block applied to dataset import URLs. If the resolved host is in a loopback, link-local, private (RFC-1918), shared-CGNAT, or multicast range, the webhook is rejected with `NotificationParamException`.
|
||||
|
||||
Deployments that intentionally point webhooks at internal targets (chatops bridges, internal automation servers, on-premises Mattermost/Rocket.Chat, etc.) can opt out by setting `ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS = True` in `superset_config.py`. This mirrors the existing `DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS` opt-out for dataset imports.
|
||||
|
||||
### Impala cancel_query blocks private/internal hosts by default
|
||||
|
||||
The Impala engine spec's `cancel_query` issues an HTTP request from the Superset backend to the host configured on the Impala database connection. That host is now validated before the request: if it resolves to a private/internal IP range, the cancel call is refused and a warning is logged. Operators whose Impala cluster runs on an internal network can opt out by setting `IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS = True` in `superset_config.py`. This mirrors the dataset-import and webhook opt-out flags.
|
||||
### Map chart renderer and OpenStreetMap migration behavior
|
||||
|
||||
The MapLibre migration for deck.gl charts preserves saved non-Mapbox styles on
|
||||
|
||||
@@ -222,7 +222,7 @@ development = [
|
||||
"pip",
|
||||
"polib", # used by scripts/translations/ and their unit tests
|
||||
"pre-commit",
|
||||
"progress>=1.6.1,<2",
|
||||
"progress>=1.5,<2",
|
||||
"psutil",
|
||||
"pyfakefs",
|
||||
"pyinstrument>=5.1.2,<6",
|
||||
|
||||
@@ -686,7 +686,7 @@ prison==0.2.1
|
||||
# via
|
||||
# -c requirements/base-constraint.txt
|
||||
# flask-appbuilder
|
||||
progress==1.6.1
|
||||
progress==1.6
|
||||
# via apache-superset
|
||||
prompt-toolkit==3.0.51
|
||||
# via
|
||||
|
||||
@@ -1208,7 +1208,9 @@ const DatabaseModal: FunctionComponent<DatabaseModalProps> = ({
|
||||
// For all other options, sort alphabetically
|
||||
return String(a.label).localeCompare(String(b.label));
|
||||
}}
|
||||
getPopupContainer={() => document.body}
|
||||
getPopupContainer={triggerNode =>
|
||||
triggerNode.parentElement || document.body
|
||||
}
|
||||
dropdownStyle={{ maxHeight: 400, overflow: 'auto' }}
|
||||
/>
|
||||
<Alert
|
||||
|
||||
@@ -103,6 +103,19 @@ class DatasourceTypeUpdateRequiredValidationError(ValidationError):
|
||||
)
|
||||
|
||||
|
||||
class ChartQueryContextDatasourceMismatchValidationError(ValidationError):
|
||||
"""
|
||||
Raised when a query-context-only update carries a datasource that does not
|
||||
match the chart's own datasource.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
_("The query context datasource does not match the chart datasource"),
|
||||
field_name="query_context",
|
||||
)
|
||||
|
||||
|
||||
class ChartNotFoundError(CommandException):
|
||||
message = "Chart not found."
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ from superset.commands.chart.exceptions import (
|
||||
ChartForbiddenError,
|
||||
ChartInvalidError,
|
||||
ChartNotFoundError,
|
||||
ChartQueryContextDatasourceMismatchValidationError,
|
||||
ChartUpdateFailedError,
|
||||
DashboardsForbiddenError,
|
||||
DashboardsNotFoundValidationError,
|
||||
@@ -41,6 +42,7 @@ from superset.exceptions import SupersetSecurityException
|
||||
from superset.models.dashboard import Dashboard
|
||||
from superset.models.slice import Slice
|
||||
from superset.tags.models import ObjectType
|
||||
from superset.utils import json
|
||||
from superset.utils.decorators import on_error, transaction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -101,6 +103,52 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
|
||||
if not security_manager.is_owner(dash):
|
||||
raise DashboardsForbiddenError()
|
||||
|
||||
def _validate_query_context_datasource(
|
||||
self, exceptions: list[ValidationError]
|
||||
) -> None:
|
||||
"""
|
||||
Ensure a query-context-only update keeps the chart's own datasource.
|
||||
|
||||
The submitted query context is only verified when it carries a parseable
|
||||
``datasource`` object; a payload that references a different datasource than
|
||||
the chart's persisted one is rejected. Payloads without a datasource fall
|
||||
back to the chart's datasource at execution time and need no check.
|
||||
"""
|
||||
if not self._model:
|
||||
return
|
||||
|
||||
raw_query_context = self._properties.get("query_context")
|
||||
if not raw_query_context:
|
||||
return
|
||||
|
||||
try:
|
||||
query_context = json.loads(raw_query_context)
|
||||
except (TypeError, ValueError):
|
||||
# An unparseable payload cannot be verified or replayed; leave it for
|
||||
# downstream handling rather than guessing at its intent.
|
||||
return
|
||||
|
||||
datasource = (
|
||||
query_context.get("datasource") if isinstance(query_context, dict) else None
|
||||
)
|
||||
if not isinstance(datasource, dict):
|
||||
return
|
||||
|
||||
try:
|
||||
ids_match = int(datasource["id"]) == self._model.datasource_id
|
||||
except (KeyError, TypeError, ValueError):
|
||||
ids_match = False
|
||||
|
||||
# A datasource object must carry a type that matches the chart's own.
|
||||
# Treating a missing type as valid would let an id-only payload through,
|
||||
# and query-context loading reads datasource["type"] directly, so that
|
||||
# payload raises KeyError when the saved context is later replayed.
|
||||
datasource_type = datasource.get("type")
|
||||
types_match = str(datasource_type) == self._model.datasource_type
|
||||
|
||||
if not ids_match or not types_match:
|
||||
exceptions.append(ChartQueryContextDatasourceMismatchValidationError())
|
||||
|
||||
def validate(self) -> None: # noqa: C901
|
||||
exceptions: list[ValidationError] = []
|
||||
dashboard_ids = self._properties.get("dashboards")
|
||||
@@ -144,6 +192,9 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
|
||||
security_manager.raise_for_access(chart=self._model)
|
||||
except SupersetSecurityException as ex:
|
||||
raise ChartForbiddenError() from ex
|
||||
# Keep the refreshed payload bound to the chart's own datasource so it
|
||||
# cannot be repointed at an unrelated one.
|
||||
self._validate_query_context_datasource(exceptions)
|
||||
|
||||
# validate tags
|
||||
try:
|
||||
|
||||
@@ -31,15 +31,12 @@ import yaml
|
||||
|
||||
from superset.commands.base import BaseCommand
|
||||
from superset.commands.dashboard.exceptions import DashboardNotFoundError
|
||||
from superset.common.db_query_status import QueryStatus
|
||||
from superset.daos.dashboard import DashboardDAO
|
||||
from superset.exceptions import SupersetSecurityException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
from superset.models.dashboard import Dashboard
|
||||
from superset.models.slice import Slice
|
||||
from superset.superset_typing import QueryObjectDict
|
||||
|
||||
from superset.sql.parse import SQLStatement, Table
|
||||
|
||||
@@ -238,6 +235,8 @@ def export_dataset_data(
|
||||
sample_rows: int | None = None,
|
||||
) -> bytes | None:
|
||||
"""Export dataset data to Parquet format. Returns bytes or None on failure."""
|
||||
import pandas as pd # pylint: disable=import-outside-toplevel
|
||||
|
||||
from superset import db # pylint: disable=import-outside-toplevel
|
||||
|
||||
# Ensure dataset is attached to session and relationships are loaded
|
||||
@@ -252,50 +251,35 @@ def export_dataset_data(
|
||||
logger.warning("Dataset %s has no database", dataset.table_name)
|
||||
return None
|
||||
|
||||
# Only export rows the requester is entitled to read. The dataset's own
|
||||
# access check is applied here, and the rows below are fetched through the
|
||||
# dataset's query builder (the same path the chart-data API uses) so that
|
||||
# per-row filters are applied consistently. A requester without access to
|
||||
# the dataset yields no data file rather than the raw underlying table.
|
||||
try:
|
||||
dataset.raise_for_access()
|
||||
except SupersetSecurityException:
|
||||
logger.info(
|
||||
"Skipping data export for dataset %s: requester not entitled",
|
||||
dataset.table_name,
|
||||
)
|
||||
return None
|
||||
|
||||
columns = [col.column_name for col in dataset.columns if not col.expression]
|
||||
if not columns:
|
||||
logger.warning("No columns to export for %s", dataset.table_name)
|
||||
return None
|
||||
|
||||
try:
|
||||
logger.info("Exporting data for %s to Parquet...", dataset.table_name)
|
||||
|
||||
# Fetch through the dataset's query builder so that the projection,
|
||||
# per-row filters, and (for virtual datasets) the wrapping query are
|
||||
# produced the same way as the chart-data path. The row cap is applied
|
||||
# at the SQL level via row_limit rather than after a full table read.
|
||||
query_obj: QueryObjectDict = {
|
||||
"columns": columns,
|
||||
"metrics": [],
|
||||
"orderby": [],
|
||||
"is_timeseries": False,
|
||||
"filter": [],
|
||||
"extras": {},
|
||||
"row_limit": sample_rows,
|
||||
}
|
||||
result = dataset.query(query_obj)
|
||||
if result.status == QueryStatus.FAILED:
|
||||
# The query path reports failures via status rather than raising;
|
||||
# omit the data file instead of writing an empty/partial Parquet.
|
||||
logger.warning(
|
||||
"Query failed while exporting data for %s", dataset.table_name
|
||||
)
|
||||
return None
|
||||
df = result.df
|
||||
# Check if this is a virtual dataset (SQL-based)
|
||||
if dataset.sql:
|
||||
sql = dataset.sql
|
||||
else:
|
||||
# For physical tables, build SELECT query from columns
|
||||
columns = [col.column_name for col in dataset.columns if not col.expression]
|
||||
|
||||
if not columns:
|
||||
logger.warning("No columns to export for %s", dataset.table_name)
|
||||
return None
|
||||
|
||||
# Build simple SELECT query (quote identifiers to handle spaces/keywords)
|
||||
column_list = ", ".join(f'"{c}"' for c in columns)
|
||||
quoted_table = f'"{dataset.table_name}"'
|
||||
if dataset.schema:
|
||||
table_ref = f'"{dataset.schema}".{quoted_table}'
|
||||
else:
|
||||
table_ref = quoted_table
|
||||
sql = f"SELECT {column_list} FROM {table_ref}" # noqa: S608
|
||||
|
||||
with dataset.database.get_sqla_engine() as engine:
|
||||
df = pd.read_sql(sql, engine)
|
||||
|
||||
if sample_rows and len(df) > sample_rows:
|
||||
df = df.head(sample_rows)
|
||||
logger.info("Sampled to %d rows", sample_rows)
|
||||
|
||||
# Write to bytes buffer
|
||||
buf = BytesIO()
|
||||
|
||||
@@ -16,12 +16,9 @@
|
||||
# under the License.
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
from urllib import request
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from urllib.request import HTTPRedirectHandler
|
||||
|
||||
import pandas as pd
|
||||
from flask import current_app as app
|
||||
@@ -43,35 +40,9 @@ from superset.models.core import Database
|
||||
from superset.sql.parse import Table
|
||||
from superset.utils import json
|
||||
from superset.utils.core import get_user
|
||||
from superset.utils.network import is_safe_host
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _ValidatingRedirectHandler(HTTPRedirectHandler):
|
||||
"""Re-validates the redirect target URL before following any HTTP redirect.
|
||||
|
||||
Prevents bypasses where an initial URL passes validation but a subsequent
|
||||
redirect points to a disallowed destination.
|
||||
"""
|
||||
|
||||
def redirect_request(
|
||||
self,
|
||||
req: request.Request,
|
||||
fp: Any,
|
||||
code: int,
|
||||
msg: str,
|
||||
headers: Any,
|
||||
newurl: str,
|
||||
) -> request.Request | None:
|
||||
"""Validate each redirect target before delegating to the parent handler."""
|
||||
# Resolve relative redirects against the originating request URL so that
|
||||
# validate_data_uri receives a fully-qualified URL in all cases.
|
||||
absolute_url = urljoin(req.full_url, newurl)
|
||||
validate_data_uri(absolute_url)
|
||||
return super().redirect_request(req, fp, code, msg, headers, newurl)
|
||||
|
||||
|
||||
CHUNKSIZE = 512
|
||||
VARCHAR = re.compile(r"VARCHAR\((\d+)\)", re.IGNORECASE)
|
||||
|
||||
@@ -117,37 +88,12 @@ def get_dtype(df: pd.DataFrame, dataset: SqlaTable) -> dict[str, VisitableType]:
|
||||
|
||||
def validate_data_uri(data_uri: str) -> None:
|
||||
"""
|
||||
Validate that the data URI is permitted for dataset import.
|
||||
Validate that the data URI is configured on DATASET_IMPORT_ALLOWED_URLS
|
||||
has a valid URL.
|
||||
|
||||
Local ``file://`` URIs are allowed only when the path is confined to the
|
||||
bundled examples folder. All other URIs must match a pattern in
|
||||
``DATASET_IMPORT_ALLOWED_DATA_URLS`` *and* resolve to a publicly-routable host.
|
||||
|
||||
:param data_uri: the URI to validate
|
||||
:raises DatasetForbiddenDataURI: if the URI is not permitted
|
||||
:param data_uri:
|
||||
:return:
|
||||
"""
|
||||
parsed = urlparse(data_uri)
|
||||
# ``urlparse`` lower-cases the scheme, so gating on it (rather than a
|
||||
# case-sensitive ``startswith("file://")``) also rejects mixed-case
|
||||
# variants like ``FiLe://`` that would otherwise skip the local-file
|
||||
# sandbox check below.
|
||||
if parsed.scheme == "file":
|
||||
from urllib.request import url2pathname
|
||||
|
||||
from superset.examples.helpers import get_examples_folder
|
||||
|
||||
# Reject non-local authority components (e.g. file://remotehost/path).
|
||||
if parsed.netloc and parsed.netloc.lower() != "localhost":
|
||||
raise DatasetForbiddenDataURI()
|
||||
# url2pathname handles URL-encoded characters and platform path separators.
|
||||
file_path = url2pathname(parsed.path)
|
||||
# Resolve symlinks and relative components before comparing.
|
||||
real_path = os.path.realpath(file_path)
|
||||
examples_folder = os.path.realpath(get_examples_folder())
|
||||
if not real_path.startswith(examples_folder + os.sep):
|
||||
raise DatasetForbiddenDataURI()
|
||||
return
|
||||
|
||||
allowed_urls = app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"]
|
||||
for allowed_url in allowed_urls:
|
||||
try:
|
||||
@@ -158,12 +104,6 @@ def validate_data_uri(data_uri: str) -> None:
|
||||
)
|
||||
raise
|
||||
if match:
|
||||
if not app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"]:
|
||||
hostname = parsed.hostname
|
||||
# Fail-closed: reject URIs that have no parseable hostname as
|
||||
# well as those that resolve to non-public addresses.
|
||||
if not hostname or not is_safe_host(hostname):
|
||||
raise DatasetForbiddenDataURI()
|
||||
return
|
||||
raise DatasetForbiddenDataURI()
|
||||
|
||||
@@ -342,8 +282,7 @@ def load_data(data_uri: str, dataset: SqlaTable, database: Database) -> None:
|
||||
|
||||
validate_data_uri(data_uri)
|
||||
logger.info("Downloading data from %s", data_uri)
|
||||
opener = request.build_opener(_ValidatingRedirectHandler)
|
||||
data = opener.open(data_uri) # pylint: disable=consider-using-with # noqa: S310
|
||||
data = request.urlopen(data_uri) # pylint: disable=consider-using-with # noqa: S310
|
||||
if data_uri.endswith(".gz"):
|
||||
data = gzip.open(data)
|
||||
df = pd.read_csv(data, encoding="utf-8")
|
||||
|
||||
@@ -541,14 +541,10 @@ class QueryContextProcessor:
|
||||
|
||||
:raises SupersetSecurityException: If the user cannot access the resource
|
||||
"""
|
||||
# Evaluate access before validating the queries: query validation
|
||||
# renders the request's filter expressions, so the access decision must
|
||||
# come first to avoid rendering caller-supplied input for a resource the
|
||||
# caller is not allowed to access.
|
||||
for query in self._query_context.queries:
|
||||
query.validate()
|
||||
|
||||
if self._qc_datasource.type == DatasourceType.QUERY:
|
||||
security_manager.raise_for_access(query=self._qc_datasource)
|
||||
else:
|
||||
security_manager.raise_for_access(query_context=self._query_context)
|
||||
|
||||
for query in self._query_context.queries:
|
||||
query.validate()
|
||||
|
||||
@@ -2058,21 +2058,6 @@ REPORT_MINIMUM_INTERVAL = int(timedelta(minutes=0).total_seconds())
|
||||
# Enforce HTTPS for webhook alerts/reports
|
||||
ALERT_REPORTS_WEBHOOK_HTTPS_ONLY = True
|
||||
|
||||
# When True, webhook alert/report dispatch is permitted to call private/internal
|
||||
# IP addresses (RFC-1918, loopback, link-local). Intended for deployments where
|
||||
# the webhook target is on an internal network (a chatops bridge, an internal
|
||||
# Mattermost/Rocket.Chat, an automation server, etc.). Leave False (the default)
|
||||
# in any internet-facing deployment.
|
||||
ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS: bool = False
|
||||
|
||||
# When True, Impala's cancel_query HTTP call is permitted to target hosts in
|
||||
# private/internal IP ranges (RFC-1918, loopback, link-local). Intended for
|
||||
# operators whose Impala cluster runs on an internal network. Leave False (the
|
||||
# default) in any deployment where untrusted users can create Impala database
|
||||
# connections, so a maliciously-configured impala:// URL cannot be used to
|
||||
# trigger outbound requests to internal targets via the cancel endpoint.
|
||||
IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS: bool = False
|
||||
|
||||
# A custom prefix to use on all Alerts & Reports emails
|
||||
EMAIL_REPORTS_SUBJECT_PREFIX = "[Report] "
|
||||
|
||||
@@ -2362,21 +2347,13 @@ PREVENT_UNSAFE_DB_CONNECTIONS = True
|
||||
# If true all default urls on datasets will be handled as relative URLs by the frontend
|
||||
PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True
|
||||
|
||||
# Define a list of allowed URL patterns (regex) for dataset data imports (v1).
|
||||
# Define a list of allowed URLs for dataset data imports (v1).
|
||||
# Simple example to only allow URLs that belong to certain domains:
|
||||
# DATASET_IMPORT_ALLOWED_DATA_URLS = [
|
||||
# ALLOWED_IMPORT_URL_DOMAINS = [
|
||||
# r"^https://.+\.domain1\.com\/?.*", r"^https://.+\.domain2\.com\/?.*"
|
||||
# ]
|
||||
# Local file:// URIs used for bundled example data are always permitted
|
||||
# regardless of this setting.
|
||||
DATASET_IMPORT_ALLOWED_DATA_URLS = [r".*"]
|
||||
|
||||
# When True, dataset import is permitted to fetch data from private/internal
|
||||
# IP addresses (RFC-1918, loopback, link-local). Intended for air-gapped or
|
||||
# on-premises deployments where the data source is on an internal network.
|
||||
# Leave False (the default) in any internet-facing deployment.
|
||||
DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS: bool = False
|
||||
|
||||
# Path used to store SSL certificates that are generated when using custom certs.
|
||||
# Defaults to temporary directory.
|
||||
# Example: SSL_CERT_PATH = "/certs"
|
||||
|
||||
@@ -2477,27 +2477,6 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def validate_cancel_query_id(
|
||||
cancel_query_id: str | None,
|
||||
pattern: str = r"^\d+$",
|
||||
) -> bool:
|
||||
"""
|
||||
Validate that a cancel_query_id matches expected format.
|
||||
|
||||
This is a defense-in-depth measure to prevent SQL injection in cancel_query
|
||||
implementations that use string interpolation. While cancel_query_id typically
|
||||
comes from trusted database sources (e.g., CONNECTION_ID()), validation ensures
|
||||
safety even if the data source is compromised.
|
||||
|
||||
:param cancel_query_id: The query identifier to validate
|
||||
:param pattern: Regex pattern to match (default: numeric only)
|
||||
:return: True if valid, False otherwise
|
||||
"""
|
||||
if cancel_query_id is None:
|
||||
return False
|
||||
return bool(re.fullmatch(pattern, str(cancel_query_id)))
|
||||
|
||||
@classmethod
|
||||
def cancel_query( # pylint: disable=unused-argument
|
||||
cls,
|
||||
|
||||
@@ -32,7 +32,6 @@ from superset import db
|
||||
from superset.constants import QUERY_EARLY_CANCEL_KEY, TimeGrain
|
||||
from superset.db_engine_specs.base import BaseEngineSpec, DatabaseCategory
|
||||
from superset.models.sql_lab import Query
|
||||
from superset.utils.network import is_safe_host
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.models.core import Database
|
||||
@@ -205,37 +204,13 @@ class ImpalaEngineSpec(BaseEngineSpec):
|
||||
|
||||
:param cursor: New cursor instance to the db of the query
|
||||
:param query: Query instance
|
||||
:param cancel_query_id: Impala query ID in format "hex:hex"
|
||||
:param cancel_query_id: impala db not need
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent URL injection
|
||||
# Impala query IDs are in "hex:hex" form (16 hex chars per side)
|
||||
if not cls.validate_cancel_query_id(
|
||||
cancel_query_id, r"^[A-Fa-f0-9]{16}:[A-Fa-f0-9]{16}$"
|
||||
):
|
||||
return False
|
||||
|
||||
try:
|
||||
impala_host = query.database.url_object.host
|
||||
# The cancel call issues an outbound HTTP request from the
|
||||
# Superset backend to whatever host the DB connection was
|
||||
# configured with; validate it before the call to keep this
|
||||
# path consistent with the dataset-import and webhook URL
|
||||
# checks. Operators with internal Impala targets can opt out
|
||||
# via IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS.
|
||||
if not impala_host:
|
||||
return False
|
||||
if not app.config[
|
||||
"IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS"
|
||||
] and not is_safe_host(impala_host):
|
||||
logger.warning(
|
||||
"Impala cancel_query refused: target host is not allowed"
|
||||
)
|
||||
return False
|
||||
url = f"http://{impala_host}:25000/cancel_query?query_id={cancel_query_id}"
|
||||
# Do not follow redirects: a validated host could otherwise 30x the
|
||||
# request to an internal target, bypassing the is_safe_host check.
|
||||
response = requests.post(url, timeout=3, allow_redirects=False)
|
||||
response = requests.post(url, timeout=3)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
return False
|
||||
|
||||
|
||||
@@ -457,11 +457,6 @@ class MySQLEngineSpec(BasicParametersMixin, BaseEngineSpec):
|
||||
:param cancel_query_id: MySQL Connection ID
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent SQL injection
|
||||
# MySQL CONNECTION_ID() returns an unsigned integer
|
||||
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+$"):
|
||||
return False
|
||||
|
||||
try:
|
||||
cursor.execute(f"KILL CONNECTION {cancel_query_id}")
|
||||
except Exception: # pylint: disable=broad-except
|
||||
|
||||
@@ -392,13 +392,7 @@ class OcientEngineSpec(BaseEngineSpec):
|
||||
def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool:
|
||||
with OcientEngineSpec.query_id_mapping_lock:
|
||||
if query.id in OcientEngineSpec.query_id_mapping:
|
||||
ocient_query_id = OcientEngineSpec.query_id_mapping[query.id]
|
||||
# Validate query ID to prevent SQL injection (defense-in-depth)
|
||||
# Ocient query IDs are alphanumeric with underscores and dashes
|
||||
if not cls.validate_cancel_query_id(str(ocient_query_id), r"^[\w\-]+$"):
|
||||
return False
|
||||
|
||||
cursor.execute(f"CANCEL {ocient_query_id}")
|
||||
cursor.execute(f"CANCEL {OcientEngineSpec.query_id_mapping[query.id]}")
|
||||
# Query has been cancelled, so we can safely remove the cursor from
|
||||
# the cache
|
||||
del OcientEngineSpec.query_id_mapping[query.id]
|
||||
|
||||
@@ -862,11 +862,6 @@ WHERE datistemplate = false;
|
||||
:param cancel_query_id: Postgres PID
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent SQL injection
|
||||
# PostgreSQL pg_backend_pid() returns an integer
|
||||
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+$"):
|
||||
return False
|
||||
|
||||
try:
|
||||
cursor.execute(
|
||||
"SELECT pg_terminate_backend(pid) " # noqa: S608
|
||||
|
||||
@@ -346,11 +346,6 @@ class RedshiftEngineSpec(BasicParametersMixin, PostgresBaseEngineSpec):
|
||||
:param cancel_query_id: Redshift PID
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent SQL injection
|
||||
# Redshift pg_backend_pid() returns an integer
|
||||
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+$"):
|
||||
return False
|
||||
|
||||
try:
|
||||
logger.info("Killing Redshift PID:%s", str(cancel_query_id))
|
||||
cursor.execute(
|
||||
|
||||
@@ -577,11 +577,6 @@ class SingleStoreSpec(BasicParametersMixin, BaseEngineSpec):
|
||||
:param cancel_query_id: SingleStore connection ID and aggregator ID
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent SQL injection
|
||||
# SingleStore: "CONNECTION_ID AGGREGATOR_ID" (two space-separated ints)
|
||||
if not cls.validate_cancel_query_id(cancel_query_id, r"^\d+(\s+\d+)?$"):
|
||||
return False
|
||||
|
||||
try:
|
||||
cursor.execute(f"KILL CONNECTION {cancel_query_id}")
|
||||
except Exception: # pylint: disable=broad-except
|
||||
|
||||
@@ -334,11 +334,6 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
|
||||
:param cancel_query_id: Snowflake Session ID
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent SQL injection
|
||||
# Snowflake CURRENT_SESSION() returns an alphanumeric VARCHAR session ID
|
||||
if not cls.validate_cancel_query_id(cancel_query_id, r"^[a-zA-Z0-9]+$"):
|
||||
return False
|
||||
|
||||
try:
|
||||
cursor.execute(f"SELECT SYSTEM$CANCEL_ALL_QUERIES({cancel_query_id})")
|
||||
except Exception: # pylint: disable=broad-except
|
||||
|
||||
@@ -429,12 +429,6 @@ class TrinoEngineSpec(PrestoBaseEngineSpec):
|
||||
:param cancel_query_id: Trino `queryId`
|
||||
:return: True if query cancelled successfully, False otherwise
|
||||
"""
|
||||
# Validate cancel_query_id to prevent SQL injection
|
||||
# Trino query IDs look like yyyymmdd_hhmmss_nnnnn_xxxxx
|
||||
# (alphanumeric with underscores)
|
||||
if not cls.validate_cancel_query_id(cancel_query_id, r"^[a-zA-Z0-9_]+$"):
|
||||
return False
|
||||
|
||||
try:
|
||||
cursor.execute(
|
||||
f"CALL system.runtime.kill_query(query_id => '{cancel_query_id}',"
|
||||
|
||||
@@ -735,22 +735,6 @@ def to_datetime(
|
||||
return datetime.strptime(value, format)
|
||||
|
||||
|
||||
class SupersetSandboxedEnvironment(SandboxedEnvironment):
|
||||
"""
|
||||
Sandbox that denies attribute access to the base environment/template
|
||||
classes and to the internals of ``functools.partial`` objects, none of
|
||||
which templates need. Calling such objects is unaffected; only attribute
|
||||
access is denied.
|
||||
"""
|
||||
|
||||
def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
|
||||
if attr in {"environment_class", "template_class"}:
|
||||
return False
|
||||
if isinstance(obj, partial):
|
||||
return False
|
||||
return super().is_safe_attribute(obj, attr, value)
|
||||
|
||||
|
||||
class BaseTemplateProcessor:
|
||||
"""
|
||||
Base class for database-specific jinja context
|
||||
@@ -781,7 +765,7 @@ class BaseTemplateProcessor:
|
||||
self._applied_filters = applied_filters
|
||||
self._removed_filters = removed_filters
|
||||
self._context: dict[str, Any] = {}
|
||||
self.env: Environment = SupersetSandboxedEnvironment(undefined=DebugUndefined)
|
||||
self.env: Environment = SandboxedEnvironment(undefined=DebugUndefined)
|
||||
self.set_context(**kwargs)
|
||||
|
||||
# custom filters
|
||||
@@ -951,14 +935,13 @@ class JinjaTemplateProcessor(BaseTemplateProcessor):
|
||||
}
|
||||
)
|
||||
|
||||
# The `metric` filter needs the env and full context to expand other
|
||||
# filters. Bind them through a closure rather than positional args so the
|
||||
# template environment is not reachable via the macro's public
|
||||
# ``partial.args`` from inside a template.
|
||||
def metric_with_context(metric_key: str, dataset_id: int | None = None) -> str:
|
||||
return metric_macro(self.env, self._context, metric_key, dataset_id)
|
||||
|
||||
self._context["metric"] = partial(safe_proxy, metric_with_context)
|
||||
# The `metric` filter needs the full context, in order to expand other filters
|
||||
self._context["metric"] = partial(
|
||||
safe_proxy,
|
||||
metric_macro,
|
||||
self.env,
|
||||
self._context,
|
||||
)
|
||||
|
||||
|
||||
class NoOpTemplateProcessor(BaseTemplateProcessor):
|
||||
|
||||
@@ -34,19 +34,6 @@ superset mcp run --port 5009
|
||||
# http://localhost:5009/mcp/
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
# authlib.jose is deprecated in authlib 1.3+ in favour of joserfc, but fastmcp
|
||||
# still imports it internally. This filter must live here — it runs when any
|
||||
# superset.mcp_service submodule is first imported, which is before fastmcp
|
||||
# triggers the authlib.jose import. Placed in __init__.py so both the
|
||||
# production server path and test imports get it without duplicating the
|
||||
# filterwarnings call in every affected module.
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message=r"authlib\.jose module is deprecated",
|
||||
)
|
||||
|
||||
__version__ = "1.0.0"
|
||||
|
||||
# Tools are auto-registered when imported by the MCP service
|
||||
|
||||
@@ -75,13 +75,6 @@ def _suppress_third_party_warnings() -> None:
|
||||
category=FutureWarning,
|
||||
module=r"google\..*",
|
||||
)
|
||||
# authlib.jose deprecation warning is suppressed at package init time
|
||||
# (superset/mcp_service/__init__.py), but add it here too for any late
|
||||
# imports that may occur after tool execution begins.
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message=r"authlib\.jose module is deprecated",
|
||||
)
|
||||
|
||||
|
||||
class FastMCPValidationFilter(logging.Filter):
|
||||
|
||||
@@ -32,7 +32,6 @@ from superset.reports.notifications.exceptions import (
|
||||
)
|
||||
from superset.utils import json
|
||||
from superset.utils.decorators import statsd_gauge
|
||||
from superset.utils.network import is_safe_host
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -94,39 +93,6 @@ class WebhookNotification(BaseNotification):
|
||||
)
|
||||
return files
|
||||
|
||||
def _validate_webhook_url(self, url: str) -> None:
|
||||
"""
|
||||
Validate the webhook target URL before dispatch.
|
||||
|
||||
Checks that the scheme is HTTP(S) (and HTTPS when required by config),
|
||||
that a hostname is present, and, unless the operator opts out via
|
||||
``ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS``, that the host does not
|
||||
resolve to a private/internal address.
|
||||
|
||||
:raises NotificationParamException: if any of the above checks fail.
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
scheme = parsed.scheme.lower()
|
||||
if scheme not in ("http", "https"):
|
||||
raise NotificationParamException(
|
||||
"Webhook failed: only HTTP and HTTPS webhook URLs are supported."
|
||||
)
|
||||
if current_app.config["ALERT_REPORTS_WEBHOOK_HTTPS_ONLY"] and scheme != "https":
|
||||
raise NotificationParamException(
|
||||
"Webhook failed: HTTPS is required by config for webhook URLs."
|
||||
)
|
||||
if not parsed.hostname:
|
||||
raise NotificationParamException(
|
||||
"Webhook failed: URL must include a valid hostname."
|
||||
)
|
||||
# Operators with internal webhook targets (chatops bridges, internal
|
||||
# automation, etc.) can opt out of the private-IP block via
|
||||
# ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS.
|
||||
if current_app.config["ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS"]:
|
||||
return
|
||||
if not is_safe_host(parsed.hostname):
|
||||
raise NotificationParamException("Webhook URL target host is not allowed.")
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, NotificationUnprocessableException, factor=10, base=2, max_tries=5
|
||||
)
|
||||
@@ -138,7 +104,11 @@ class WebhookNotification(BaseNotification):
|
||||
is not enabled."
|
||||
)
|
||||
wh_url = self._get_webhook_url()
|
||||
self._validate_webhook_url(wh_url)
|
||||
if current_app.config["ALERT_REPORTS_WEBHOOK_HTTPS_ONLY"]:
|
||||
if urlparse(wh_url).scheme.lower() != "https":
|
||||
raise NotificationParamException(
|
||||
"Webhook failed: HTTPS is required by config for webhook URLs."
|
||||
)
|
||||
payload = self._get_req_payload()
|
||||
files = self._get_files()
|
||||
|
||||
@@ -151,17 +121,9 @@ class WebhookNotification(BaseNotification):
|
||||
else:
|
||||
data[key] = value
|
||||
|
||||
response = requests.post(
|
||||
wh_url,
|
||||
data=data,
|
||||
files=files,
|
||||
timeout=60,
|
||||
allow_redirects=False,
|
||||
)
|
||||
response = requests.post(wh_url, data=data, files=files, timeout=60)
|
||||
else:
|
||||
response = requests.post(
|
||||
wh_url, json=payload, timeout=60, allow_redirects=False
|
||||
)
|
||||
response = requests.post(wh_url, json=payload, timeout=60)
|
||||
|
||||
logger.info(
|
||||
"Webhook sent to %s, status code: %s", wh_url, response.status_code
|
||||
@@ -177,14 +139,6 @@ class WebhookNotification(BaseNotification):
|
||||
f"Webhook failed with status code {response.status_code}: \
|
||||
{response.text}"
|
||||
)
|
||||
if response.status_code >= 300:
|
||||
# Redirects are intentionally not followed (allow_redirects=False),
|
||||
# so a 3xx means the request never reached the final target. Treat
|
||||
# it as a failure rather than silently reporting success.
|
||||
raise NotificationParamException(
|
||||
f"Webhook returned an unfollowed redirect "
|
||||
f"(status code {response.status_code})"
|
||||
)
|
||||
|
||||
except requests.exceptions.RequestException as ex:
|
||||
raise NotificationUnprocessableException(str(ex)) from ex
|
||||
|
||||
@@ -349,145 +349,6 @@ def freeze_value(value: Any) -> str:
|
||||
return json.dumps(value, sort_keys=True)
|
||||
|
||||
|
||||
def _native_filter_allowed_targets(
|
||||
query_context: "QueryContext", form_data: dict[str, Any]
|
||||
) -> Optional[tuple[set[str], set[str]]]:
|
||||
"""
|
||||
Return ``(allowed_columns, allowed_metrics)`` a native-filter data request
|
||||
may read, or ``None`` when the request cannot be tied to a native filter on
|
||||
the requesting dashboard (in which case the caller must fail closed).
|
||||
|
||||
``allowed_columns`` are the target column(s) of the filter identified by
|
||||
``native_filter_id`` that point at the request's datasource. ``allowed_metrics``
|
||||
are the saved-metric name(s) the filter is configured to sort its values by
|
||||
(``controlValues.sortMetric``), which a legitimate value lookup sends.
|
||||
"""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from superset import db
|
||||
from superset.models.dashboard import Dashboard
|
||||
|
||||
native_filter_id = form_data.get("native_filter_id")
|
||||
dashboard_id = form_data.get("dashboardId")
|
||||
if not native_filter_id or not dashboard_id:
|
||||
return None
|
||||
|
||||
dashboard = (
|
||||
db.session.query(Dashboard).filter(Dashboard.id == dashboard_id).one_or_none()
|
||||
)
|
||||
if dashboard is None or not dashboard.json_metadata:
|
||||
return None
|
||||
try:
|
||||
metadata = json.loads(dashboard.json_metadata)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
datasource = getattr(query_context, "datasource", None)
|
||||
datasource_id = datasource.data.get("id") if datasource else None
|
||||
|
||||
allowed_columns: set[str] = set()
|
||||
allowed_metrics: set[str] = set()
|
||||
for fltr in metadata.get("native_filter_configuration", []):
|
||||
if fltr.get("id") != native_filter_id:
|
||||
continue
|
||||
for target in fltr.get("targets", []):
|
||||
column = target.get("column")
|
||||
if (
|
||||
target.get("datasetId") == datasource_id
|
||||
and isinstance(column, dict)
|
||||
and column.get("name")
|
||||
):
|
||||
allowed_columns.add(column["name"])
|
||||
# The filter may be configured to sort its values by a saved metric; a
|
||||
# legitimate value lookup then sends that metric name.
|
||||
sort_metric = (fltr.get("controlValues") or {}).get("sortMetric") or fltr.get(
|
||||
"sortMetric"
|
||||
)
|
||||
if isinstance(sort_metric, str):
|
||||
allowed_metrics.add(sort_metric)
|
||||
# Filter ids are unique, so the matching filter is the only one.
|
||||
break
|
||||
|
||||
return allowed_columns, allowed_metrics
|
||||
|
||||
|
||||
def _native_filter_term_allowed(
|
||||
term: Any, allowed_columns: set[str], allowed_metrics: set[str]
|
||||
) -> bool:
|
||||
"""
|
||||
Whether a value-returning term (metric or order-by expression) is allowed on
|
||||
a native-filter request: a plain reference to a target column or the
|
||||
configured sort metric, or a simple aggregate over a target column. Free-form
|
||||
SQL terms and other saved metrics cannot be validated and are not allowed.
|
||||
"""
|
||||
if isinstance(term, str):
|
||||
return term in allowed_columns or term in allowed_metrics
|
||||
if isinstance(term, dict) and term.get("expressionType") == "SIMPLE":
|
||||
return (term.get("column") or {}).get("column_name") in allowed_columns
|
||||
return False
|
||||
|
||||
|
||||
def _native_filter_query_modified(
|
||||
query: Any, allowed_columns: set[str], allowed_metrics: set[str]
|
||||
) -> bool:
|
||||
"""Whether a single query in a native-filter request reads beyond its targets."""
|
||||
# Columns and group-by may only reference target column(s); adhoc (free-form
|
||||
# SQL) columns cannot be validated, so reject them.
|
||||
for key in ("columns", "groupby"):
|
||||
for col in getattr(query, key, None) or []:
|
||||
if not isinstance(col, str) or col not in allowed_columns:
|
||||
return True
|
||||
for metric in getattr(query, "metrics", None) or []:
|
||||
if not _native_filter_term_allowed(metric, allowed_columns, allowed_metrics):
|
||||
return True
|
||||
# order-by entries are ``(expression, asc)`` pairs.
|
||||
for order in getattr(query, "orderby", None) or []:
|
||||
expr = order[0] if isinstance(order, (list, tuple)) and order else order
|
||||
if not _native_filter_term_allowed(expr, allowed_columns, allowed_metrics):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _native_filter_request_modified(query_context: "QueryContext") -> bool:
|
||||
"""
|
||||
Validate a chartless data request that targets a native filter.
|
||||
|
||||
Only requests identified as native-filter lookups (by the ``NATIVE_FILTER``
|
||||
type marker or a ``native_filter_id``) are constrained; other chartless
|
||||
paths (drill-to-detail, drill-by, samples) carry neither and are validated by
|
||||
the datasource-access checks in raise_for_access, so they are not treated as
|
||||
modified here.
|
||||
|
||||
A native filter may only read the column(s) it targets on the dashboard it
|
||||
belongs to. The request is treated as modified (and therefore rejected for
|
||||
guest users) when it cannot be tied to a native filter on the requesting
|
||||
dashboard, or when any value-returning term (column, group-by, metric, or
|
||||
order-by) references something other than a target column, a simple
|
||||
aggregate over a target column, or the filter's configured sort metric.
|
||||
Free-form SQL terms and saved metrics other than the configured sort metric
|
||||
are rejected. Row-restricting clauses (``filter``/``extras``) are not
|
||||
constrained here: cross-filters legitimately reference other columns and
|
||||
they do not return column values; that blind-inference surface is a separate
|
||||
concern shared with the chart path.
|
||||
"""
|
||||
form_data = query_context.form_data or {}
|
||||
if not (
|
||||
form_data.get("type") == "NATIVE_FILTER" or form_data.get("native_filter_id")
|
||||
):
|
||||
return False
|
||||
targets = _native_filter_allowed_targets(query_context, form_data)
|
||||
# Fail closed when the request cannot be tied to a native filter.
|
||||
if targets is None:
|
||||
return True
|
||||
# Empty allowed sets (filter resolved but no matching column/metric target)
|
||||
# intentionally deny every value-returning term below.
|
||||
allowed_columns, allowed_metrics = targets
|
||||
|
||||
return any(
|
||||
_native_filter_query_modified(query, allowed_columns, allowed_metrics)
|
||||
for query in query_context.queries
|
||||
)
|
||||
|
||||
|
||||
def query_context_modified(query_context: "QueryContext") -> bool:
|
||||
"""
|
||||
Check if a query context has been modified.
|
||||
@@ -498,14 +359,8 @@ def query_context_modified(query_context: "QueryContext") -> bool:
|
||||
form_data = query_context.form_data
|
||||
stored_chart = query_context.slice_
|
||||
|
||||
# Native-filter data requests have no associated chart (no slice_id). Rather
|
||||
# than accepting any payload, constrain them to the column(s) the dashboard's
|
||||
# native filter is allowed to target; other chartless paths keep prior
|
||||
# behavior (see _native_filter_request_modified).
|
||||
if stored_chart is None:
|
||||
return _native_filter_request_modified(query_context)
|
||||
|
||||
if form_data is None:
|
||||
# native filter requests
|
||||
if form_data is None or stored_chart is None:
|
||||
return False
|
||||
|
||||
# cannot request a different chart
|
||||
|
||||
@@ -14,65 +14,14 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import ipaddress
|
||||
import platform
|
||||
import socket
|
||||
import subprocess
|
||||
|
||||
# Networks that must never be reached via user-supplied hostnames.
|
||||
# Includes loopback, RFC-1918 private ranges, link-local (covers cloud
|
||||
# metadata endpoints such as 169.254.169.254), shared address space
|
||||
# (RFC 6598, 100.64.0.0/10), multicast (ip.is_global returns True for
|
||||
# multicast addresses in Python, so explicit blocking is required), and
|
||||
# IPv6 equivalents.
|
||||
_SSRF_UNSAFE_NETWORKS = (
|
||||
ipaddress.ip_network("0.0.0.0/8"),
|
||||
ipaddress.ip_network("10.0.0.0/8"),
|
||||
ipaddress.ip_network("100.64.0.0/10"),
|
||||
ipaddress.ip_network("127.0.0.0/8"),
|
||||
ipaddress.ip_network("169.254.0.0/16"),
|
||||
ipaddress.ip_network("172.16.0.0/12"),
|
||||
ipaddress.ip_network("192.168.0.0/16"),
|
||||
ipaddress.ip_network("224.0.0.0/4"), # IPv4 multicast — is_global is True in Python
|
||||
ipaddress.ip_network("::1/128"),
|
||||
ipaddress.ip_network("fc00::/7"),
|
||||
ipaddress.ip_network("fe80::/10"),
|
||||
ipaddress.ip_network("ff00::/8"), # IPv6 multicast
|
||||
)
|
||||
|
||||
PORT_TIMEOUT = 5
|
||||
PING_TIMEOUT = 5
|
||||
|
||||
|
||||
def is_safe_host(host: str) -> bool:
|
||||
"""
|
||||
Return True if ``host`` resolves exclusively to public, globally-routable
|
||||
IP addresses.
|
||||
|
||||
Returns False if any resolved address falls within a private, loopback,
|
||||
link-local, or otherwise non-routable range. An unresolvable host also
|
||||
returns False.
|
||||
"""
|
||||
try:
|
||||
results = socket.getaddrinfo(host, None)
|
||||
except socket.gaierror:
|
||||
return False
|
||||
if not results:
|
||||
return False
|
||||
for _, _, _, _, sockaddr in results:
|
||||
try:
|
||||
ip = ipaddress.ip_address(sockaddr[0])
|
||||
except ValueError:
|
||||
return False
|
||||
# Unwrap IPv4-mapped IPv6 addresses (e.g. ::ffff:127.0.0.1) so they
|
||||
# are checked against the IPv4 unsafe networks rather than bypassing.
|
||||
if isinstance(ip, ipaddress.IPv6Address) and ip.ipv4_mapped:
|
||||
ip = ip.ipv4_mapped
|
||||
if not ip.is_global or any(ip in net for net in _SSRF_UNSAFE_NETWORKS):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_port_open(host: str, port: int) -> bool:
|
||||
"""
|
||||
Test if a given port in a host is open.
|
||||
|
||||
@@ -2961,70 +2961,6 @@ class TestDashboardApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCas
|
||||
assert "Content-Disposition" in rv.headers
|
||||
assert "_example.zip" in rv.headers["Content-Disposition"]
|
||||
|
||||
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
||||
def test_export_as_example_data_respects_row_level_filters(self) -> None:
|
||||
"""
|
||||
Dashboard API: export_as_example with export_data must apply the
|
||||
dataset's row-level filters, so a restricted user only receives the
|
||||
rows they are entitled to (not the full underlying table).
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
from superset.connectors.sqla.models import RowLevelSecurityFilter
|
||||
|
||||
table = self.get_table(name="birth_names")
|
||||
gamma = security_manager.find_role("Gamma")
|
||||
# birth_names access so the row-level filter (not the access check) is
|
||||
# what scopes the result, and permission to call the endpoint. Track
|
||||
# what we grant so the shared test role is restored afterwards.
|
||||
granted = []
|
||||
for perm, view in (
|
||||
("datasource_access", table.perm),
|
||||
("can_export_as_example", "Dashboard"),
|
||||
("can_export", "Dashboard"),
|
||||
):
|
||||
pvm = security_manager.find_permission_view_menu(perm, view)
|
||||
if pvm and pvm not in gamma.permissions:
|
||||
gamma.permissions.append(pvm)
|
||||
granted.append(pvm)
|
||||
rls = RowLevelSecurityFilter(
|
||||
name="export_example_girls_only",
|
||||
filter_type="Regular",
|
||||
clause="gender = 'girl'",
|
||||
group_key="gender",
|
||||
)
|
||||
rls.tables.append(table)
|
||||
rls.roles.append(gamma)
|
||||
db.session.add(rls)
|
||||
db.session.commit()
|
||||
try:
|
||||
self.login(GAMMA_USERNAME)
|
||||
dashboard_id = get_dashboards_ids(["births"])[0]
|
||||
uri = (
|
||||
f"api/v1/dashboard/{dashboard_id}/export_as_example/"
|
||||
"?export_data=true&sample_rows=500"
|
||||
)
|
||||
rv = self.client.get(uri)
|
||||
assert rv.status_code == 200
|
||||
|
||||
with ZipFile(BytesIO(rv.data)) as zf:
|
||||
parquet_files = [n for n in zf.namelist() if n.endswith(".parquet")]
|
||||
assert parquet_files, f"expected a data file: {zf.namelist()}"
|
||||
for name in parquet_files:
|
||||
df = pd.read_parquet(BytesIO(zf.read(name)))
|
||||
if "gender" in df.columns:
|
||||
unexpected = set(df["gender"].unique()) - {"girl"}
|
||||
assert not unexpected, (
|
||||
f"export returned rows outside the configured "
|
||||
f"row-level filter: {unexpected}"
|
||||
)
|
||||
finally:
|
||||
db.session.delete(rls)
|
||||
for pvm in granted:
|
||||
if pvm in gamma.permissions:
|
||||
gamma.permissions.remove(pvm)
|
||||
db.session.commit()
|
||||
|
||||
@patch("superset.commands.database.importers.v1.utils.add_permissions")
|
||||
def test_import_dashboard(self, mock_add_permissions):
|
||||
"""
|
||||
|
||||
@@ -17,10 +17,11 @@
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from superset.commands.chart.exceptions import ChartForbiddenError
|
||||
from superset.commands.chart.exceptions import ChartForbiddenError, ChartInvalidError
|
||||
from superset.commands.chart.update import UpdateChartCommand
|
||||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
||||
from superset.exceptions import SupersetSecurityException
|
||||
from superset.utils import json
|
||||
|
||||
|
||||
def _ownership_exc() -> SupersetSecurityException:
|
||||
@@ -151,3 +152,78 @@ def test_update_chart_owner_can_perform_regular_update(
|
||||
|
||||
find_by_id.assert_called_once_with(1)
|
||||
raise_for_ownership.assert_called_once()
|
||||
|
||||
|
||||
def _query_context_payload(datasource: object) -> dict[str, object]:
|
||||
"""Build a query-context-only update payload targeting ``datasource``."""
|
||||
return {
|
||||
"query_context": json.dumps({"datasource": datasource, "queries": []}),
|
||||
"query_context_generation": True,
|
||||
}
|
||||
|
||||
|
||||
def test_update_chart_query_context_matching_datasource_is_allowed(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A query context that targets the chart's own datasource is accepted."""
|
||||
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
|
||||
find_by_id.return_value = mocker.MagicMock(
|
||||
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
|
||||
)
|
||||
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
|
||||
mocker.patch("superset.commands.chart.update.security_manager.raise_for_access")
|
||||
|
||||
UpdateChartCommand(
|
||||
1, _query_context_payload({"id": 42, "type": "table"})
|
||||
).validate()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"datasource",
|
||||
[
|
||||
{"id": 99, "type": "table"}, # different id
|
||||
{"id": 42, "type": "query"}, # different type
|
||||
{"id": "99", "type": "table"}, # different id as string
|
||||
{"id": 42}, # matching id but missing type
|
||||
],
|
||||
)
|
||||
def test_update_chart_query_context_mismatched_datasource_is_rejected(
|
||||
mocker: MockerFixture,
|
||||
datasource: dict[str, object],
|
||||
) -> None:
|
||||
"""A query context pointing at a different datasource is rejected with a 4xx."""
|
||||
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
|
||||
find_by_id.return_value = mocker.MagicMock(
|
||||
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
|
||||
)
|
||||
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
|
||||
mocker.patch("superset.commands.chart.update.security_manager.raise_for_access")
|
||||
|
||||
with pytest.raises(ChartInvalidError):
|
||||
UpdateChartCommand(1, _query_context_payload(datasource)).validate()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query_context",
|
||||
[
|
||||
"{}", # no datasource key
|
||||
'{"datasource": null}', # null datasource
|
||||
"not-json", # unparseable payload
|
||||
],
|
||||
)
|
||||
def test_update_chart_query_context_without_datasource_is_allowed(
|
||||
mocker: MockerFixture,
|
||||
query_context: str,
|
||||
) -> None:
|
||||
"""Payloads with no verifiable datasource fall back to the chart's own."""
|
||||
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
|
||||
find_by_id.return_value = mocker.MagicMock(
|
||||
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
|
||||
)
|
||||
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
|
||||
mocker.patch("superset.commands.chart.update.security_manager.raise_for_access")
|
||||
|
||||
UpdateChartCommand(
|
||||
1,
|
||||
{"query_context": query_context, "query_context_generation": True},
|
||||
).validate()
|
||||
|
||||
@@ -16,9 +16,6 @@
|
||||
# under the License.
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterator
|
||||
from io import BytesIO
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -30,14 +27,10 @@ from superset.commands.dashboard.export_example import (
|
||||
_make_bytes_generator,
|
||||
_make_yaml_generator,
|
||||
export_chart,
|
||||
export_dataset_data,
|
||||
export_dataset_yaml,
|
||||
ExportExampleCommand,
|
||||
sanitize_filename,
|
||||
)
|
||||
from superset.common.db_query_status import QueryStatus
|
||||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
||||
from superset.exceptions import SupersetSecurityException
|
||||
|
||||
|
||||
def test_sanitize_filename_basic():
|
||||
@@ -328,98 +321,3 @@ def test_export_example_command_no_data():
|
||||
assert "dataset.yaml" in files
|
||||
assert "data.parquet" not in files
|
||||
assert "dashboard.yaml" in files
|
||||
|
||||
|
||||
def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock:
|
||||
"""A dataset mock whose query() returns ``rows`` as a DataFrame."""
|
||||
import pandas as pd
|
||||
|
||||
dataset = MagicMock()
|
||||
dataset.table_name = "private_table"
|
||||
dataset.database = MagicMock() # truthy
|
||||
dataset.columns = [
|
||||
MagicMock(column_name=name, expression=None) for name in ("uid", "data")
|
||||
]
|
||||
result = MagicMock()
|
||||
result.status = QueryStatus.SUCCESS
|
||||
result.df = pd.DataFrame(rows)
|
||||
dataset.query.return_value = result
|
||||
return dataset
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_db() -> Iterator[MagicMock]:
|
||||
"""Patch ``superset.db`` so merge() returns the dataset unchanged."""
|
||||
with patch("superset.db") as mock_db:
|
||||
mock_db.session.merge.side_effect = lambda d: d
|
||||
yield mock_db
|
||||
|
||||
|
||||
def test_export_dataset_data_skips_when_access_denied(patched_db: MagicMock) -> None:
|
||||
"""
|
||||
A requester without access to the dataset must get no data file, and the
|
||||
underlying rows must never be fetched (no fallback raw read).
|
||||
"""
|
||||
dataset = _make_data_export_dataset([{"uid": 1, "data": "secret"}])
|
||||
dataset.raise_for_access.side_effect = SupersetSecurityException(
|
||||
SupersetError(
|
||||
message="denied",
|
||||
error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
|
||||
level=ErrorLevel.ERROR,
|
||||
)
|
||||
)
|
||||
|
||||
assert export_dataset_data(dataset) is None
|
||||
|
||||
dataset.raise_for_access.assert_called_once()
|
||||
dataset.query.assert_not_called()
|
||||
|
||||
|
||||
def test_export_dataset_data_fetches_through_query_path(patched_db: MagicMock) -> None:
|
||||
"""
|
||||
Rows are fetched through the dataset's own query builder (which applies the
|
||||
per-row filters), and exactly those rows are what gets written to Parquet.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
own_rows = [{"uid": 6, "data": "row-for-user-6"}]
|
||||
dataset = _make_data_export_dataset(own_rows)
|
||||
|
||||
payload = export_dataset_data(dataset)
|
||||
|
||||
assert payload is not None
|
||||
dataset.raise_for_access.assert_called_once()
|
||||
dataset.query.assert_called_once()
|
||||
|
||||
# The fetch goes through query() with a column projection, not a raw read.
|
||||
query_obj = dataset.query.call_args.args[0]
|
||||
assert query_obj["columns"] == ["uid", "data"]
|
||||
assert query_obj["is_timeseries"] is False
|
||||
assert "row_limit" in query_obj
|
||||
|
||||
# Only the rows query() returned are exported.
|
||||
exported = pd.read_parquet(BytesIO(payload))
|
||||
assert exported.to_dict("records") == own_rows
|
||||
|
||||
|
||||
def test_export_dataset_data_applies_row_limit_at_query_level(
|
||||
patched_db: MagicMock,
|
||||
) -> None:
|
||||
"""sample_rows is passed as a SQL-level row_limit, not applied post-fetch."""
|
||||
dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}])
|
||||
|
||||
export_dataset_data(dataset, sample_rows=5)
|
||||
|
||||
query_obj = dataset.query.call_args.args[0]
|
||||
assert query_obj["row_limit"] == 5
|
||||
|
||||
|
||||
def test_export_dataset_data_skips_on_failed_query(patched_db: MagicMock) -> None:
|
||||
"""
|
||||
The query path signals failure via status rather than raising, so a failed
|
||||
query must yield no data file instead of an empty/partial Parquet.
|
||||
"""
|
||||
dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}])
|
||||
dataset.query.return_value.status = QueryStatus.FAILED
|
||||
|
||||
assert export_dataset_data(dataset) is None
|
||||
|
||||
@@ -1821,37 +1821,3 @@ def test_get_df_payload_no_warning_when_not_memory_limited() -> None:
|
||||
result = processor.get_df_payload(query_obj, force_cached=False)
|
||||
|
||||
assert result["warning"] is None
|
||||
|
||||
|
||||
def test_raise_for_access_evaluates_access_before_validate():
|
||||
"""
|
||||
Access must be evaluated before the queries are validated, because query
|
||||
validation renders the request's filter expressions. When access is denied,
|
||||
no query is validated (so caller-supplied input is never rendered).
|
||||
"""
|
||||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
||||
from superset.exceptions import SupersetSecurityException
|
||||
from superset.utils.core import DatasourceType
|
||||
|
||||
query = MagicMock()
|
||||
query_context = MagicMock()
|
||||
query_context.queries = [query]
|
||||
query_context.datasource.type = DatasourceType.TABLE
|
||||
|
||||
processor = QueryContextProcessor(query_context)
|
||||
|
||||
denied = SupersetSecurityException(
|
||||
SupersetError(
|
||||
message="denied",
|
||||
error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
|
||||
level=ErrorLevel.ERROR,
|
||||
)
|
||||
)
|
||||
with patch(
|
||||
"superset.common.query_context_processor.security_manager.raise_for_access",
|
||||
side_effect=denied,
|
||||
):
|
||||
with pytest.raises(SupersetSecurityException):
|
||||
processor.raise_for_access()
|
||||
|
||||
query.validate.assert_not_called()
|
||||
|
||||
@@ -22,7 +22,6 @@ import re
|
||||
import uuid
|
||||
from typing import Any
|
||||
from unittest.mock import Mock, patch
|
||||
from urllib import request
|
||||
|
||||
import pytest
|
||||
from flask import current_app
|
||||
@@ -810,20 +809,16 @@ def test_import_dataset_extra_empty_string(
|
||||
assert sqla_table.extra is None # noqa: E711
|
||||
|
||||
|
||||
@patch("superset.commands.dataset.importers.v1.utils.is_safe_host", return_value=True)
|
||||
@patch("superset.commands.dataset.importers.v1.utils.request.build_opener")
|
||||
@patch("superset.commands.dataset.importers.v1.utils.request.urlopen")
|
||||
def test_import_column_allowed_data_url(
|
||||
mock_build_opener: Mock,
|
||||
mock_is_safe_host: Mock,
|
||||
mock_urlopen: Mock,
|
||||
mocker: MockerFixture,
|
||||
session: Session,
|
||||
) -> None:
|
||||
"""
|
||||
Test importing a dataset when using data key to fetch data from a URL.
|
||||
"""
|
||||
mock_opener = Mock()
|
||||
mock_opener.open.return_value = io.StringIO("col1\nvalue1\nvalue2\n")
|
||||
mock_build_opener.return_value = mock_opener
|
||||
mock_urlopen.return_value = io.StringIO("col1\nvalue1\nvalue2\n")
|
||||
|
||||
mocker.patch.object(security_manager, "can_access", return_value=True)
|
||||
|
||||
@@ -1051,134 +1046,10 @@ def test_import_dataset_access_check(
|
||||
(["*"], "https://host1.domain3.com/data.csv", False, re.error),
|
||||
],
|
||||
)
|
||||
def test_validate_data_uri(
|
||||
allowed_urls: list[str],
|
||||
data_uri: str,
|
||||
expected: bool,
|
||||
exception_class: type[Exception] | None,
|
||||
) -> None:
|
||||
"""Tests allowlist pattern matching. is_safe_host is stubbed out so that
|
||||
fake/unresolvable test hostnames do not interfere with DNS-based checks
|
||||
(those are covered by the dedicated is_safe_host tests below)."""
|
||||
def test_validate_data_uri(allowed_urls, data_uri, expected, exception_class):
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = allowed_urls
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
with patch(
|
||||
"superset.commands.dataset.importers.v1.utils.is_safe_host",
|
||||
return_value=True,
|
||||
):
|
||||
if expected:
|
||||
validate_data_uri(data_uri)
|
||||
else:
|
||||
with pytest.raises(exception_class):
|
||||
validate_data_uri(data_uri)
|
||||
|
||||
|
||||
def test_validate_data_uri_file_scheme_examples_allowed() -> None:
|
||||
"""file:// URIs pointing inside the examples folder are permitted."""
|
||||
import os
|
||||
|
||||
from superset.examples.helpers import get_examples_folder
|
||||
|
||||
examples_folder = get_examples_folder()
|
||||
uri_in_examples = (
|
||||
f"file://{os.path.join(examples_folder, 'birth_names', 'data.parquet')}"
|
||||
)
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = []
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
# Should not raise
|
||||
validate_data_uri(uri_in_examples)
|
||||
|
||||
|
||||
def test_validate_data_uri_file_scheme_outside_examples_blocked() -> None:
|
||||
"""file:// URIs outside the examples folder are blocked."""
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = []
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
with pytest.raises(DatasetForbiddenDataURI):
|
||||
validate_data_uri("file:///etc/passwd")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_uri",
|
||||
["FiLe:///etc/passwd", "FILE:///etc/passwd", "file:/etc/passwd"],
|
||||
)
|
||||
def test_validate_data_uri_file_scheme_case_insensitive(data_uri: str) -> None:
|
||||
"""Mixed-case / single-slash file URIs still go through the sandbox check
|
||||
and are blocked when outside the examples folder, so they cannot skip the
|
||||
local-file check via a case-sensitive scheme gate."""
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = []
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
with pytest.raises(DatasetForbiddenDataURI):
|
||||
if expected:
|
||||
validate_data_uri(data_uri)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_uri",
|
||||
[
|
||||
# Userinfo-injection: allowlist matches the trusted hostname in the
|
||||
# authority but urlparse().hostname resolves to the actual target.
|
||||
"https://allowed.example.com@169.254.169.254/latest/meta-data/",
|
||||
"https://allowed.example.com@10.0.0.1/internal",
|
||||
"https://allowed.example.com@127.0.0.1/admin",
|
||||
],
|
||||
)
|
||||
def test_validate_data_uri_blocks_userinfo_ssrf_injection(data_uri: str) -> None:
|
||||
"""Userinfo-injected private IPs must be rejected even when the leading
|
||||
hostname matches an allowlist pattern."""
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
with patch(
|
||||
"superset.commands.dataset.importers.v1.utils.is_safe_host",
|
||||
return_value=False,
|
||||
):
|
||||
with pytest.raises(DatasetForbiddenDataURI):
|
||||
else:
|
||||
with pytest.raises(exception_class):
|
||||
validate_data_uri(data_uri)
|
||||
|
||||
|
||||
def test_validate_data_uri_allow_internal_flag_bypasses_host_check() -> None:
|
||||
"""When DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS is True, internal hosts
|
||||
must be permitted to support air-gapped / on-premises deployments."""
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = True
|
||||
with patch(
|
||||
"superset.commands.dataset.importers.v1.utils.is_safe_host",
|
||||
return_value=False,
|
||||
) as mock_check:
|
||||
validate_data_uri("http://10.0.0.5/data.csv")
|
||||
mock_check.assert_not_called()
|
||||
|
||||
|
||||
def test_validate_data_uri_no_hostname_raises() -> None:
|
||||
"""A URI that produces no parseable hostname (e.g. opaque data: URIs) must
|
||||
be rejected — fail-closed: no hostname means no safe host confirmation."""
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
# urlparse("data:text/csv,...").hostname is None, which fails the
|
||||
# "not hostname or not is_safe_host(hostname)" guard.
|
||||
with pytest.raises(DatasetForbiddenDataURI):
|
||||
validate_data_uri("data:text/csv,col1,col2")
|
||||
|
||||
|
||||
def test_redirect_handler_blocks_disallowed_redirect_target() -> None:
|
||||
"""The redirect handler must reject a redirect to a disallowed host by
|
||||
re-running validate_data_uri() on the new URL before following it."""
|
||||
from superset.commands.dataset.importers.v1.utils import (
|
||||
_ValidatingRedirectHandler,
|
||||
)
|
||||
|
||||
current_app.config["DATASET_IMPORT_ALLOWED_DATA_URLS"] = [r".*"]
|
||||
current_app.config["DATASET_IMPORT_ALLOW_INTERNAL_DATA_URLS"] = False
|
||||
|
||||
handler = _ValidatingRedirectHandler()
|
||||
with patch(
|
||||
"superset.commands.dataset.importers.v1.utils.is_safe_host",
|
||||
return_value=False,
|
||||
):
|
||||
with pytest.raises(DatasetForbiddenDataURI):
|
||||
handler.redirect_request(
|
||||
request.Request("http://public.example.com/data.csv"),
|
||||
None,
|
||||
302,
|
||||
"Found",
|
||||
{},
|
||||
"http://169.254.169.254/latest/meta-data/",
|
||||
)
|
||||
|
||||
@@ -1,476 +0,0 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Tests for cancel_query_id validation to prevent SQL/command injection.
|
||||
|
||||
These tests verify that the validate_cancel_query_id method properly
|
||||
rejects malicious input that could be used for SQL injection attacks.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
|
||||
class TestValidateCancelQueryId:
|
||||
"""Tests for BaseEngineSpec.validate_cancel_query_id"""
|
||||
|
||||
def test_validate_cancel_query_id_valid_numeric(self) -> None:
|
||||
"""Test that valid numeric IDs pass validation"""
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
assert BaseEngineSpec.validate_cancel_query_id("12345") is True
|
||||
assert BaseEngineSpec.validate_cancel_query_id("1") is True
|
||||
assert BaseEngineSpec.validate_cancel_query_id("999999999") is True
|
||||
|
||||
def test_validate_cancel_query_id_invalid_sql_injection(self) -> None:
|
||||
"""Test that SQL injection payloads are rejected"""
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
# Common SQL injection payloads
|
||||
assert (
|
||||
BaseEngineSpec.validate_cancel_query_id("1; DROP TABLE users; --") is False
|
||||
)
|
||||
assert BaseEngineSpec.validate_cancel_query_id("1' OR '1'='1") is False
|
||||
assert (
|
||||
BaseEngineSpec.validate_cancel_query_id("1 UNION SELECT * FROM users")
|
||||
is False
|
||||
)
|
||||
assert BaseEngineSpec.validate_cancel_query_id("1; DELETE FROM users;") is False
|
||||
|
||||
def test_validate_cancel_query_id_invalid_special_chars(self) -> None:
|
||||
"""Test that special characters are rejected"""
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
assert BaseEngineSpec.validate_cancel_query_id("123&admin=true") is False
|
||||
assert BaseEngineSpec.validate_cancel_query_id("123#fragment") is False
|
||||
assert BaseEngineSpec.validate_cancel_query_id("123\n456") is False
|
||||
assert BaseEngineSpec.validate_cancel_query_id("123\r\nHost: evil") is False
|
||||
|
||||
def test_validate_cancel_query_id_none(self) -> None:
|
||||
"""Test that None is rejected"""
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
assert BaseEngineSpec.validate_cancel_query_id(None) is False
|
||||
|
||||
def test_validate_cancel_query_id_empty_string(self) -> None:
|
||||
"""Test that empty string is rejected"""
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
assert BaseEngineSpec.validate_cancel_query_id("") is False
|
||||
|
||||
def test_validate_cancel_query_id_custom_pattern(self) -> None:
|
||||
"""Test custom regex patterns"""
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
# Hex pattern with exact length (for Impala - 16 hex chars per side)
|
||||
assert (
|
||||
BaseEngineSpec.validate_cancel_query_id(
|
||||
"abc123def4567890:789abc123def4567", r"[A-Fa-f0-9]{16}:[A-Fa-f0-9]{16}"
|
||||
)
|
||||
is True
|
||||
)
|
||||
assert (
|
||||
BaseEngineSpec.validate_cancel_query_id(
|
||||
"invalid:pattern!", r"[A-Fa-f0-9]{16}:[A-Fa-f0-9]{16}"
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
# Alphanumeric with underscores (for Trino)
|
||||
assert (
|
||||
BaseEngineSpec.validate_cancel_query_id(
|
||||
"20240101_123456_00001_abcde", r"[a-zA-Z0-9_]+"
|
||||
)
|
||||
is True
|
||||
)
|
||||
assert (
|
||||
BaseEngineSpec.validate_cancel_query_id("20240101-123456", r"[a-zA-Z0-9_]+")
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
class TestMySQLCancelQueryValidation:
|
||||
"""Tests for MySQL cancel_query input validation"""
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
|
||||
"""Test that valid MySQL connection ID works"""
|
||||
from superset.db_engine_specs.mysql import MySQLEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "12345") is True
|
||||
cursor_mock.execute.assert_called_once_with("KILL CONNECTION 12345")
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that SQL injection is blocked"""
|
||||
from superset.db_engine_specs.mysql import MySQLEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
# SQL injection payload should be rejected before execute is called
|
||||
result = MySQLEngineSpec.cancel_query(
|
||||
cursor_mock, query, "1; DROP TABLE users; --"
|
||||
)
|
||||
assert result is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_non_numeric_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that non-numeric IDs are blocked"""
|
||||
from superset.db_engine_specs.mysql import MySQLEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "abc") is False
|
||||
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "12.34") is False
|
||||
assert MySQLEngineSpec.cancel_query(cursor_mock, query, "-123") is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
|
||||
class TestSingleStoreCancelQueryValidation:
|
||||
"""Tests for SingleStore cancel_query input validation"""
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
|
||||
"""Test valid SingleStore connection ID format (two space-separated integers)"""
|
||||
from superset.db_engine_specs.singlestore import SingleStoreSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
# Single integer should work
|
||||
assert SingleStoreSpec.cancel_query(cursor_mock, query, "12345") is True
|
||||
|
||||
# Two space-separated integers should work
|
||||
cursor_mock.reset_mock()
|
||||
assert SingleStoreSpec.cancel_query(cursor_mock, query, "12345 67890") is True
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that SQL injection is blocked"""
|
||||
from superset.db_engine_specs.singlestore import SingleStoreSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
result = SingleStoreSpec.cancel_query(
|
||||
cursor_mock, query, "1; DROP TABLE users; --"
|
||||
)
|
||||
assert result is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
|
||||
class TestPostgresCancelQueryValidation:
|
||||
"""Tests for PostgreSQL cancel_query input validation"""
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
|
||||
"""Test that valid PostgreSQL PID works"""
|
||||
from superset.db_engine_specs.postgres import PostgresEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
assert PostgresEngineSpec.cancel_query(cursor_mock, query, "12345") is True
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that SQL injection is blocked"""
|
||||
from superset.db_engine_specs.postgres import PostgresEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
# SQL injection in WHERE clause
|
||||
result = PostgresEngineSpec.cancel_query(cursor_mock, query, "1' OR '1'='1")
|
||||
assert result is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
|
||||
class TestRedshiftCancelQueryValidation:
|
||||
"""Tests for Redshift cancel_query input validation"""
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
|
||||
"""Test that valid Redshift PID works"""
|
||||
from superset.db_engine_specs.redshift import RedshiftEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
assert RedshiftEngineSpec.cancel_query(cursor_mock, query, "12345") is True
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that SQL injection is blocked"""
|
||||
from superset.db_engine_specs.redshift import RedshiftEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
result = RedshiftEngineSpec.cancel_query(
|
||||
cursor_mock, query, "1; DROP TABLE users; --"
|
||||
)
|
||||
assert result is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
|
||||
class TestSnowflakeCancelQueryValidation:
|
||||
"""Tests for Snowflake cancel_query input validation"""
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
|
||||
"""Test that valid Snowflake session ID works"""
|
||||
from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
# Snowflake session IDs are alphanumeric (VARCHAR)
|
||||
assert (
|
||||
SnowflakeEngineSpec.cancel_query(cursor_mock, query, "34359980038") is True
|
||||
)
|
||||
cursor_mock.reset_mock()
|
||||
# Also test alphanumeric (per Snowflake docs)
|
||||
assert (
|
||||
SnowflakeEngineSpec.cancel_query(cursor_mock, query, "ABC123def456") is True
|
||||
)
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that SQL injection is blocked"""
|
||||
from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
result = SnowflakeEngineSpec.cancel_query(
|
||||
cursor_mock, query, "1); DROP TABLE users; --"
|
||||
)
|
||||
assert result is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
|
||||
class TestTrinoCancelQueryValidation:
|
||||
"""Tests for Trino cancel_query input validation"""
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_valid_id(self, engine_mock: Mock) -> None:
|
||||
"""Test that valid Trino query ID works"""
|
||||
from superset.db_engine_specs.trino import TrinoEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
# Trino query IDs are alphanumeric with underscores
|
||||
assert (
|
||||
TrinoEngineSpec.cancel_query(
|
||||
cursor_mock, query, "20240101_123456_00001_abcde"
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
@patch("sqlalchemy.engine.Engine.connect")
|
||||
def test_cancel_query_sql_injection_blocked(self, engine_mock: Mock) -> None:
|
||||
"""Test that SQL injection is blocked"""
|
||||
from superset.db_engine_specs.trino import TrinoEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Query()
|
||||
cursor_mock = engine_mock.return_value.__enter__.return_value
|
||||
|
||||
result = TrinoEngineSpec.cancel_query(
|
||||
cursor_mock, query, "query_id'); DROP TABLE users; --"
|
||||
)
|
||||
assert result is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
|
||||
|
||||
class TestImpalaCancelQueryValidation:
|
||||
"""Tests for Impala cancel_query input validation"""
|
||||
|
||||
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_valid_id(
|
||||
self, requests_mock: Mock, safe_host_mock: Mock
|
||||
) -> None:
|
||||
"""Test that valid Impala query ID works"""
|
||||
from superset.db_engine_specs.impala import ImpalaEngineSpec
|
||||
from superset.models.core import Database
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
# Mock the database and query. The host-safety check (added alongside
|
||||
# the cancel call) is mocked as allowed here so the test stays focused
|
||||
# on query-ID validation; host validation is covered separately.
|
||||
mock_db = Mock(spec=Database)
|
||||
mock_db.url_object.host = "impala-host"
|
||||
|
||||
query = Mock(spec=Query)
|
||||
query.database = mock_db
|
||||
|
||||
requests_mock.return_value.status_code = 200
|
||||
|
||||
# Valid Impala query ID format: 16 hex chars per side
|
||||
result = ImpalaEngineSpec.cancel_query(
|
||||
None, query, "abc123def4567890:789abc123def4567"
|
||||
)
|
||||
assert result is True
|
||||
requests_mock.assert_called_once()
|
||||
|
||||
# Also test uppercase hex (should be valid)
|
||||
requests_mock.reset_mock()
|
||||
requests_mock.return_value.status_code = 200
|
||||
result = ImpalaEngineSpec.cancel_query(
|
||||
None, query, "ABC123DEF4567890:789ABC123DEF4567"
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_url_injection_blocked(self, requests_mock: Mock) -> None:
|
||||
"""Test that URL injection is blocked"""
|
||||
from superset.db_engine_specs.impala import ImpalaEngineSpec
|
||||
from superset.models.core import Database
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
mock_db = Mock(spec=Database)
|
||||
mock_db.url_object.host = "impala-host"
|
||||
|
||||
query = Mock(spec=Query)
|
||||
query.database = mock_db
|
||||
|
||||
# URL injection payloads should be rejected
|
||||
result = ImpalaEngineSpec.cancel_query(None, query, "abc123&admin=true")
|
||||
assert result is False
|
||||
requests_mock.assert_not_called()
|
||||
|
||||
result = ImpalaEngineSpec.cancel_query(None, query, "abc123#fragment")
|
||||
assert result is False
|
||||
requests_mock.assert_not_called()
|
||||
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_invalid_format_blocked(self, requests_mock: Mock) -> None:
|
||||
"""Test that invalid format is blocked"""
|
||||
from superset.db_engine_specs.impala import ImpalaEngineSpec
|
||||
from superset.models.core import Database
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
mock_db = Mock(spec=Database)
|
||||
mock_db.url_object.host = "impala-host"
|
||||
|
||||
query = Mock(spec=Query)
|
||||
query.database = mock_db
|
||||
|
||||
# Missing colon
|
||||
assert ImpalaEngineSpec.cancel_query(None, query, "abc123def4567890") is False
|
||||
# Wrong length (too short)
|
||||
assert ImpalaEngineSpec.cancel_query(None, query, "abc:def") is False
|
||||
# Wrong length (too long)
|
||||
assert (
|
||||
ImpalaEngineSpec.cancel_query(
|
||||
None, query, "abc123def45678901:789abc123def45678"
|
||||
)
|
||||
is False
|
||||
)
|
||||
# Special characters
|
||||
assert ImpalaEngineSpec.cancel_query(None, query, "abc!:def@ghijklmn") is False
|
||||
# Non-hex characters
|
||||
assert (
|
||||
ImpalaEngineSpec.cancel_query(
|
||||
None, query, "ghijklmnopqrstuv:ghijklmnopqrstuv"
|
||||
)
|
||||
is False
|
||||
)
|
||||
requests_mock.assert_not_called()
|
||||
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_null_host_blocked(self, requests_mock: Mock) -> None:
|
||||
"""Test that missing host returns False"""
|
||||
from superset.db_engine_specs.impala import ImpalaEngineSpec
|
||||
from superset.models.core import Database
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
mock_db = Mock(spec=Database)
|
||||
mock_db.url_object.host = None # Null host
|
||||
|
||||
query = Mock(spec=Query)
|
||||
query.database = mock_db
|
||||
|
||||
# Valid query ID but null host should fail
|
||||
result = ImpalaEngineSpec.cancel_query(
|
||||
None, query, "abc123def4567890:789abc123def4567"
|
||||
)
|
||||
assert result is False
|
||||
requests_mock.assert_not_called()
|
||||
|
||||
# Also test empty string host
|
||||
mock_db.url_object.host = ""
|
||||
result = ImpalaEngineSpec.cancel_query(
|
||||
None, query, "abc123def4567890:789abc123def4567"
|
||||
)
|
||||
assert result is False
|
||||
requests_mock.assert_not_called()
|
||||
|
||||
|
||||
class TestOcientCancelQueryValidation:
|
||||
"""Tests for Ocient cancel_query input validation.
|
||||
|
||||
Ocient validates the query ID it looked up from ``query_id_mapping`` (which
|
||||
originates from the database), so these tests seed that mapping directly.
|
||||
"""
|
||||
|
||||
def test_cancel_query_valid_id(self) -> None:
|
||||
"""Test that a valid mapped Ocient query ID is cancelled"""
|
||||
from superset.db_engine_specs.ocient import OcientEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Mock(spec=Query)
|
||||
query.id = "ocient-valid"
|
||||
cursor_mock = Mock()
|
||||
OcientEngineSpec.query_id_mapping[query.id] = "abc_123-DEF"
|
||||
try:
|
||||
assert OcientEngineSpec.cancel_query(cursor_mock, query, "") is True
|
||||
cursor_mock.execute.assert_called_once_with("CANCEL abc_123-DEF")
|
||||
finally:
|
||||
OcientEngineSpec.query_id_mapping.pop(query.id, None)
|
||||
|
||||
def test_cancel_query_sql_injection_blocked(self) -> None:
|
||||
"""Test that a malicious mapped query ID is rejected before execute"""
|
||||
from superset.db_engine_specs.ocient import OcientEngineSpec
|
||||
from superset.models.sql_lab import Query
|
||||
|
||||
query = Mock(spec=Query)
|
||||
query.id = "ocient-injection"
|
||||
cursor_mock = Mock()
|
||||
OcientEngineSpec.query_id_mapping[query.id] = "1; DROP TABLE users; --"
|
||||
try:
|
||||
assert OcientEngineSpec.cancel_query(cursor_mock, query, "") is False
|
||||
cursor_mock.execute.assert_not_called()
|
||||
finally:
|
||||
OcientEngineSpec.query_id_mapping.pop(query.id, None)
|
||||
@@ -60,13 +60,11 @@ def test_get_cancel_query_id() -> None:
|
||||
)
|
||||
|
||||
|
||||
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
|
||||
@patch("requests.post")
|
||||
def test_cancel_query(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
|
||||
def test_cancel_query(post_mock: Mock) -> None:
|
||||
query = Query()
|
||||
database = Database(
|
||||
database_name="test_impala",
|
||||
sqlalchemy_uri="impala://impala.example.com:21050/default",
|
||||
database_name="test_impala", sqlalchemy_uri="impala://localhost:21050/default"
|
||||
)
|
||||
query.database = database
|
||||
|
||||
@@ -77,20 +75,17 @@ def test_cancel_query(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
|
||||
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
|
||||
|
||||
post_mock.assert_called_once_with(
|
||||
"http://impala.example.com:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
|
||||
"http://localhost:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
|
||||
timeout=3,
|
||||
allow_redirects=False,
|
||||
)
|
||||
assert result is True
|
||||
|
||||
|
||||
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_failed(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
|
||||
def test_cancel_query_failed(post_mock: Mock) -> None:
|
||||
query = Query()
|
||||
database = Database(
|
||||
database_name="test_impala",
|
||||
sqlalchemy_uri="impala://impala.example.com:21050/default",
|
||||
database_name="test_impala", sqlalchemy_uri="impala://localhost:21050/default"
|
||||
)
|
||||
query.database = database
|
||||
|
||||
@@ -101,20 +96,17 @@ def test_cancel_query_failed(post_mock: Mock, _safe_host: Mock) -> None: # noqa
|
||||
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
|
||||
|
||||
post_mock.assert_called_once_with(
|
||||
"http://impala.example.com:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
|
||||
"http://localhost:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
|
||||
timeout=3,
|
||||
allow_redirects=False,
|
||||
)
|
||||
assert result is False
|
||||
|
||||
|
||||
@patch("superset.db_engine_specs.impala.is_safe_host", return_value=True)
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_exception(post_mock: Mock, _safe_host: Mock) -> None: # noqa: PT019
|
||||
def test_cancel_query_exception(post_mock: Mock) -> None:
|
||||
query = Query()
|
||||
database = Database(
|
||||
database_name="test_impala",
|
||||
sqlalchemy_uri="impala://impala.example.com:21050/default",
|
||||
database_name="test_impala", sqlalchemy_uri="impala://localhost:21050/default"
|
||||
)
|
||||
query.database = database
|
||||
|
||||
@@ -123,52 +115,3 @@ def test_cancel_query_exception(post_mock: Mock, _safe_host: Mock) -> None: # n
|
||||
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_blocks_internal_host(post_mock: Mock, app_context: None) -> None:
|
||||
"""A private/internal Impala host is refused by default (no HTTP call)."""
|
||||
query = Query()
|
||||
database = Database(
|
||||
database_name="test_impala",
|
||||
sqlalchemy_uri="impala://169.254.169.254:21050/default",
|
||||
)
|
||||
query.database = database
|
||||
|
||||
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
|
||||
|
||||
assert result is False
|
||||
post_mock.assert_not_called()
|
||||
|
||||
|
||||
@patch("requests.post")
|
||||
def test_cancel_query_allows_internal_host_with_opt_out(
|
||||
post_mock: Mock, app_context: None
|
||||
) -> None:
|
||||
"""IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS=True permits internal targets."""
|
||||
from flask import current_app
|
||||
|
||||
query = Query()
|
||||
database = Database(
|
||||
database_name="test_impala",
|
||||
sqlalchemy_uri="impala://10.0.0.5:21050/default",
|
||||
)
|
||||
query.database = database
|
||||
|
||||
response_mock = Mock()
|
||||
response_mock.status_code = 200
|
||||
post_mock.return_value = response_mock
|
||||
|
||||
original = current_app.config.get("IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS")
|
||||
current_app.config["IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS"] = True
|
||||
try:
|
||||
result = spec.cancel_query(None, query, "6940643a2731718b:9fbdba2000000000")
|
||||
finally:
|
||||
current_app.config["IMPALA_CANCEL_QUERY_ALLOW_INTERNAL_HOSTS"] = original
|
||||
|
||||
post_mock.assert_called_once_with(
|
||||
"http://10.0.0.5:25000/cancel_query?query_id=6940643a2731718b:9fbdba2000000000",
|
||||
timeout=3,
|
||||
allow_redirects=False,
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@@ -25,7 +25,6 @@ from flask import current_app
|
||||
from flask_appbuilder.security.sqla.models import Role
|
||||
from freezegun import freeze_time
|
||||
from jinja2 import DebugUndefined
|
||||
from jinja2.exceptions import SecurityError
|
||||
from jinja2.sandbox import SandboxedEnvironment
|
||||
from pytest_mock import MockerFixture
|
||||
from sqlalchemy.dialects import mysql
|
||||
@@ -993,40 +992,6 @@ def test_metric_macro_expansion(mocker: MockerFixture) -> None:
|
||||
assert processor.process_template("{{ metric('c') }}") == "42"
|
||||
|
||||
|
||||
def test_metric_macro_does_not_expose_environment(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
A template must not be able to read the template environment through the
|
||||
``metric`` macro's bound arguments.
|
||||
"""
|
||||
database = Database(id=1, database_name="my_database", sqlalchemy_uri="sqlite://")
|
||||
mock_g = mocker.patch("superset.jinja_context.g")
|
||||
mock_g.form_data = {"datasource": {"id": 1}}
|
||||
processor = get_template_processor(database=database)
|
||||
# Attribute access on the macro's partial is denied, so a reference to its
|
||||
# bound args resolves to an undefined and any further use raises instead of
|
||||
# yielding the environment object.
|
||||
with pytest.raises(SecurityError):
|
||||
processor.process_template("{{ metric.args[1] }}")
|
||||
with pytest.raises(SecurityError):
|
||||
processor.process_template(
|
||||
"{{ metric.args[1].template_class.environment_class() }}"
|
||||
)
|
||||
|
||||
|
||||
def test_supersetsandboxedenvironment_denies_unsafe_attributes() -> None:
|
||||
"""is_safe_attribute denies env/template class attrs and all attrs on partials."""
|
||||
from functools import partial
|
||||
|
||||
from superset.jinja_context import SupersetSandboxedEnvironment
|
||||
|
||||
env = SupersetSandboxedEnvironment()
|
||||
assert env.is_safe_attribute(env, "environment_class", None) is False
|
||||
assert env.is_safe_attribute(env, "template_class", None) is False
|
||||
macro = partial(lambda value: value, 1)
|
||||
assert env.is_safe_attribute(macro, "args", macro.args) is False
|
||||
assert env.is_safe_attribute(macro, "func", macro.func) is False
|
||||
|
||||
|
||||
def test_metric_macro_recursive_compound(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test the ``metric_macro`` when the definition is compound.
|
||||
|
||||
@@ -223,49 +223,3 @@ def test_send_http_only_https_check(monkeypatch, mock_header_data) -> None:
|
||||
|
||||
with pytest.raises(NotificationParamException, match="HTTPS is required by config"):
|
||||
webhook_notification.send()
|
||||
|
||||
|
||||
def test_send_treats_redirect_as_failure(monkeypatch, mock_header_data) -> None:
|
||||
"""
|
||||
A 3xx response is a failure: redirects are not followed
|
||||
(allow_redirects=False), so the request never reached the final target and
|
||||
must not be reported as success.
|
||||
"""
|
||||
from superset.reports.models import ReportRecipients, ReportRecipientType
|
||||
from superset.reports.notifications.base import NotificationContent
|
||||
|
||||
content = NotificationContent(
|
||||
name="test alert", header_data=mock_header_data, description="Test description"
|
||||
)
|
||||
webhook_notification = WebhookNotification(
|
||||
recipient=ReportRecipients(
|
||||
type=ReportRecipientType.WEBHOOK,
|
||||
recipient_config_json='{"target": "https://example.com/webhook"}',
|
||||
),
|
||||
content=content,
|
||||
)
|
||||
|
||||
class MockCurrentApp:
|
||||
config = {
|
||||
"ALERT_REPORTS_WEBHOOK_HTTPS_ONLY": True,
|
||||
"ALERT_REPORTS_WEBHOOK_ALLOW_INTERNAL_HOSTS": True,
|
||||
}
|
||||
|
||||
class MockResponse:
|
||||
status_code = 302
|
||||
text = ""
|
||||
|
||||
monkeypatch.setattr(
|
||||
"superset.reports.notifications.webhook.current_app", MockCurrentApp
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"superset.reports.notifications.webhook.feature_flag_manager.is_feature_enabled",
|
||||
lambda flag: True,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"superset.reports.notifications.webhook.requests.post",
|
||||
lambda *args, **kwargs: MockResponse(),
|
||||
)
|
||||
|
||||
with pytest.raises(NotificationParamException, match="redirect"):
|
||||
webhook_notification.send()
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
|
||||
import json # noqa: TID251
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from flask_appbuilder.security.sqla.models import Role, User
|
||||
@@ -657,188 +656,16 @@ def test_query_context_modified_tampered(
|
||||
assert query_context_modified(query_context)
|
||||
|
||||
|
||||
def _native_filter_ctx(
|
||||
mocker: MockerFixture,
|
||||
queries: list[Any],
|
||||
*,
|
||||
native_filter_id: str | None = "F1",
|
||||
dashboard_id: int | None = 10,
|
||||
dataset_id: int = 20,
|
||||
targets: list[Any] | None = None,
|
||||
control_values: dict[str, Any] | None = None,
|
||||
) -> Any:
|
||||
"""Build a native-filter query context (no slice_) + patched dashboard."""
|
||||
if targets is None:
|
||||
targets = [{"datasetId": dataset_id, "column": {"name": "region"}}]
|
||||
qc = mocker.MagicMock()
|
||||
qc.slice_ = None
|
||||
qc.form_data = {
|
||||
"type": "NATIVE_FILTER",
|
||||
"native_filter_id": native_filter_id,
|
||||
"dashboardId": dashboard_id,
|
||||
}
|
||||
qc.datasource.data = {"id": dataset_id}
|
||||
qc.queries = queries
|
||||
dash = mocker.MagicMock()
|
||||
dash.json_metadata = json.dumps(
|
||||
{
|
||||
"native_filter_configuration": [
|
||||
{
|
||||
"id": "F1",
|
||||
"targets": targets,
|
||||
"controlValues": control_values or {},
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
query_chain = mocker.patch("superset.db.session.query")
|
||||
query_chain.return_value.filter.return_value.one_or_none.return_value = dash
|
||||
return qc
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_target_column_allowed(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A native-filter request reading only its target column is allowed."""
|
||||
query = SimpleNamespace(columns=["region"], metrics=[], groupby=[])
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert not query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_arbitrary_column_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A native-filter request reading a non-target column is modified."""
|
||||
query = SimpleNamespace(columns=["ssn"], metrics=[], groupby=[])
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_simple_metric_on_target_allowed(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A range-style request (simple aggregate over the target column) is allowed."""
|
||||
query = SimpleNamespace(
|
||||
columns=[],
|
||||
metrics=[
|
||||
{
|
||||
"expressionType": "SIMPLE",
|
||||
"column": {"column_name": "region"},
|
||||
"aggregate": "MIN",
|
||||
}
|
||||
],
|
||||
groupby=[],
|
||||
)
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert not query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_adhoc_metric_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A free-form SQL metric on the native-filter path is modified."""
|
||||
query = SimpleNamespace(
|
||||
columns=[],
|
||||
metrics=[{"expressionType": "SQL", "sqlExpression": "SUM(salary)"}],
|
||||
groupby=[],
|
||||
)
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_adhoc_column_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""An adhoc (free-form SQL) column on the native-filter path is modified."""
|
||||
query = SimpleNamespace(
|
||||
columns=[{"sqlExpression": "ssn", "label": "x"}], metrics=[], groupby=[]
|
||||
)
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_no_filter_context_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Without a native_filter_id / dashboardId the request fails closed."""
|
||||
query = SimpleNamespace(columns=["region"], metrics=[], groupby=[])
|
||||
qc = _native_filter_ctx(mocker, [query], native_filter_id=None)
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_configured_sort_metric_allowed(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A value lookup sorted by the filter's configured saved metric is allowed."""
|
||||
query = SimpleNamespace(
|
||||
columns=["region"],
|
||||
metrics=["total"],
|
||||
groupby=[],
|
||||
orderby=[["total", True]],
|
||||
)
|
||||
qc = _native_filter_ctx(mocker, [query], control_values={"sortMetric": "total"})
|
||||
assert not query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_arbitrary_saved_metric_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""A saved metric other than the filter's configured sort metric is modified."""
|
||||
query = SimpleNamespace(columns=["region"], metrics=["salary_total"], groupby=[])
|
||||
qc = _native_filter_ctx(mocker, [query], control_values={"sortMetric": "total"})
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_orderby_arbitrary_column_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Ordering by a non-target column on the native-filter path is modified."""
|
||||
query = SimpleNamespace(
|
||||
columns=["region"], metrics=[], groupby=[], orderby=[["ssn", True]]
|
||||
)
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_native_filter_orderby_adhoc_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Ordering by a free-form SQL expression on the native-filter path is modified."""
|
||||
query = SimpleNamespace(
|
||||
columns=["region"],
|
||||
metrics=[],
|
||||
groupby=[],
|
||||
orderby=[[{"sqlExpression": "ssn"}, True]],
|
||||
)
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
assert query_context_modified(qc)
|
||||
|
||||
|
||||
def test_query_context_modified_chartless_non_native_filter_allowed(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
def test_query_context_modified_native_filter(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
A chartless request that is not a native filter (drill-to-detail, drill-by,
|
||||
samples) is validated by the datasource-access checks in raise_for_access and
|
||||
is not constrained here.
|
||||
"""
|
||||
qc = mocker.MagicMock()
|
||||
qc.slice_ = None
|
||||
qc.form_data = {"dashboardId": 10, "slice_id": 0, "groupby": ["ssn"]}
|
||||
assert not query_context_modified(qc)
|
||||
Test the `query_context_modified` function with a native filter request.
|
||||
|
||||
A native filter request has no chart (slice) associated with it.
|
||||
"""
|
||||
query_context = mocker.MagicMock()
|
||||
query_context.slice_ = None
|
||||
|
||||
def test_query_context_modified_native_filter_without_type_marker_blocked(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
A request identified by native_filter_id is constrained even when the
|
||||
NATIVE_FILTER type marker is absent.
|
||||
"""
|
||||
query = SimpleNamespace(columns=["ssn"], metrics=[], groupby=[])
|
||||
qc = _native_filter_ctx(mocker, [query])
|
||||
del qc.form_data["type"]
|
||||
assert query_context_modified(qc)
|
||||
assert not query_context_modified(query_context)
|
||||
|
||||
|
||||
def test_query_context_modified_mixed_chart(mocker: MockerFixture) -> None:
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from superset.utils.network import is_safe_host
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("resolved_ip", "expected"),
|
||||
[
|
||||
# Public IPs → safe
|
||||
("93.184.216.34", True), # example.com
|
||||
("8.8.8.8", True), # Google DNS
|
||||
("2606:2800:220:1:248:1893:25c8:1946", True), # example.com IPv6
|
||||
# Loopback → unsafe
|
||||
("127.0.0.1", False),
|
||||
("::1", False),
|
||||
# RFC-1918 private ranges → unsafe
|
||||
("10.0.0.1", False),
|
||||
("10.255.255.255", False),
|
||||
("172.16.0.1", False),
|
||||
("172.31.255.255", False),
|
||||
("192.168.0.1", False),
|
||||
("192.168.255.255", False),
|
||||
# Link-local / IMDS → unsafe
|
||||
("169.254.169.254", False), # AWS/GCP/Azure metadata
|
||||
("169.254.0.1", False),
|
||||
# 0.0.0.0/8 → unsafe
|
||||
("0.0.0.1", False),
|
||||
# IPv4 multicast (224.0.0.0/4) → unsafe; ip.is_global returns True for
|
||||
# multicast in Python, so the explicit blocklist entry is the only guard.
|
||||
("224.0.0.1", False),
|
||||
("239.255.255.255", False),
|
||||
# IPv6 private → unsafe
|
||||
("fc00::1", False),
|
||||
("fe80::1", False),
|
||||
# IPv6 multicast (ff00::/8) → unsafe
|
||||
("ff02::1", False),
|
||||
("ff0e::1", False),
|
||||
],
|
||||
)
|
||||
def test_is_safe_host_ip_classification(resolved_ip: str, expected: bool) -> None:
|
||||
"""Hosts resolving to private/internal IPs must be rejected."""
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
return_value=[(None, None, None, None, (resolved_ip, 0))],
|
||||
):
|
||||
assert is_safe_host("any-hostname") is expected
|
||||
|
||||
|
||||
def test_is_safe_host_unresolvable_returns_false() -> None:
|
||||
"""Unresolvable hostnames must return False (fail-closed)."""
|
||||
import socket
|
||||
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
side_effect=socket.gaierror("Name or service not known"),
|
||||
):
|
||||
assert is_safe_host("nonexistent.invalid") is False
|
||||
|
||||
|
||||
def test_is_safe_host_empty_results_returns_false() -> None:
|
||||
"""An empty getaddrinfo result must return False (fail-closed)."""
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
return_value=[],
|
||||
):
|
||||
assert is_safe_host("empty-result.example.com") is False
|
||||
|
||||
|
||||
def test_is_safe_host_malformed_sockaddr_returns_false() -> None:
|
||||
"""A sockaddr that cannot be parsed as an IP address must return False."""
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
return_value=[(None, None, None, None, ("not-an-ip", 0))],
|
||||
):
|
||||
assert is_safe_host("malformed") is False
|
||||
|
||||
|
||||
def test_is_safe_host_rejects_if_any_ip_is_private() -> None:
|
||||
"""A hostname that resolves to both a public and a private IP (split-DNS
|
||||
or multi-homed host) must be rejected — all resolved IPs must be safe."""
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
return_value=[
|
||||
(None, None, None, None, ("8.8.8.8", 0)),
|
||||
(None, None, None, None, ("10.0.0.1", 0)), # private — must fail
|
||||
],
|
||||
):
|
||||
assert is_safe_host("dual-homed.example.com") is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapped_ip",
|
||||
[
|
||||
"::ffff:127.0.0.1", # loopback via IPv4-mapped IPv6
|
||||
"::ffff:10.0.0.1", # RFC-1918 via IPv4-mapped IPv6
|
||||
"::ffff:169.254.169.254", # link-local via IPv4-mapped IPv6
|
||||
],
|
||||
)
|
||||
def test_is_safe_host_rejects_ipv4_mapped_ipv6(mapped_ip: str) -> None:
|
||||
"""IPv4-mapped IPv6 addresses must be unwrapped and checked against the
|
||||
IPv4 unsafe networks — not treated as safe IPv6 addresses."""
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
return_value=[(None, None, None, None, (mapped_ip, 0, 0, 0))],
|
||||
):
|
||||
assert is_safe_host("mapped") is False
|
||||
|
||||
|
||||
def test_is_safe_host_rejects_cgnat_range() -> None:
|
||||
"""100.64.0.0/10 (RFC 6598 shared address space) must be rejected."""
|
||||
with patch(
|
||||
"superset.utils.network.socket.getaddrinfo",
|
||||
return_value=[(None, None, None, None, ("100.100.100.200", 0))],
|
||||
):
|
||||
assert is_safe_host("cgnat-host") is False
|
||||
Reference in New Issue
Block a user