Compare commits

..

2 Commits

Author SHA1 Message Date
Claude Code
d544bff071 fix(chart): keep query-context updates bound to the chart's datasource
On the query-context-only update path UpdateChartCommand intentionally
skips the ownership check so report and alert workers can refresh a
chart's cached payload. Validate that the submitted query context still
targets the chart's own datasource (id and type) before saving, so a
cached payload cannot be repointed at an unrelated datasource. Payloads
without a parseable datasource fall back to the chart's datasource at
execution time and are left unchanged.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 16:29:39 -07:00
Dylan Cavalcante
f79a88c685 test(core): add unit tests for split function (#40819)
Co-authored-by: Đỗ Trọng Hải <41283691+hainenber@users.noreply.github.com>
Co-authored-by: Evan <evan@preset.io>
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 16:12:35 -07:00
6 changed files with 226 additions and 231 deletions

View File

@@ -103,6 +103,19 @@ class DatasourceTypeUpdateRequiredValidationError(ValidationError):
)
class ChartQueryContextDatasourceMismatchValidationError(ValidationError):
"""
Raised when a query-context-only update carries a datasource that does not
match the chart's own datasource.
"""
def __init__(self) -> None:
super().__init__(
_("The query context datasource does not match the chart datasource"),
field_name="query_context",
)
class ChartNotFoundError(CommandException):
message = "Chart not found."

View File

@@ -29,6 +29,7 @@ from superset.commands.chart.exceptions import (
ChartForbiddenError,
ChartInvalidError,
ChartNotFoundError,
ChartQueryContextDatasourceMismatchValidationError,
ChartUpdateFailedError,
DashboardsForbiddenError,
DashboardsNotFoundValidationError,
@@ -41,6 +42,7 @@ from superset.exceptions import SupersetSecurityException
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.tags.models import ObjectType
from superset.utils import json
from superset.utils.decorators import on_error, transaction
logger = logging.getLogger(__name__)
@@ -101,6 +103,51 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
if not security_manager.is_owner(dash):
raise DashboardsForbiddenError()
def _validate_query_context_datasource(
self, exceptions: list[ValidationError]
) -> None:
"""
Ensure a query-context-only update keeps the chart's own datasource.
The submitted query context is only verified when it carries a parseable
``datasource`` object; a payload that references a different datasource than
the chart's persisted one is rejected. Payloads without a datasource fall
back to the chart's datasource at execution time and need no check.
"""
if not self._model:
return
raw_query_context = self._properties.get("query_context")
if not raw_query_context:
return
try:
query_context = json.loads(raw_query_context)
except (TypeError, ValueError):
# An unparseable payload cannot be verified or replayed; leave it for
# downstream handling rather than guessing at its intent.
return
datasource = (
query_context.get("datasource") if isinstance(query_context, dict) else None
)
if not isinstance(datasource, dict):
return
try:
ids_match = int(datasource["id"]) == self._model.datasource_id
except (KeyError, TypeError, ValueError):
ids_match = False
datasource_type = datasource.get("type")
types_match = (
datasource_type is None
or str(datasource_type) == self._model.datasource_type
)
if not ids_match or not types_match:
exceptions.append(ChartQueryContextDatasourceMismatchValidationError())
def validate(self) -> None: # noqa: C901
exceptions: list[ValidationError] = []
dashboard_ids = self._properties.get("dashboards")
@@ -134,6 +181,12 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
raise ChartForbiddenError() from ex
except ValidationError as ex:
exceptions.append(ex)
else:
# The query-context-only path skips the ownership check so report and
# alert workers can refresh a chart's cached payload. Keep that payload
# bound to the chart's own datasource so it cannot be repointed at an
# unrelated one.
self._validate_query_context_datasource(exceptions)
# validate tags
try:

View File

@@ -21,14 +21,7 @@ from typing import TYPE_CHECKING
import sshtunnel
from flask import Flask
from paramiko import (
ECDSAKey,
Ed25519Key,
PasswordRequiredException,
PKey,
RSAKey,
SSHException,
)
from paramiko import RSAKey
from superset.commands.database.ssh_tunnel.exceptions import SSHTunnelDatabasePortError
from superset.databases.utils import make_url_safe
@@ -37,39 +30,6 @@ from superset.utils.class_utils import load_class_from_name
if TYPE_CHECKING:
from superset.databases.ssh_tunnel.models import SSHTunnel
# Order matters: paramiko's per-class loaders raise SSHException with vague
# "unpack requires 4 bytes" messages on type mismatches, so we try the more
# modern key types first (ed25519, ECDSA) and fall back to RSA, which is the
# most permissive parser and the historical default in this codebase.
_SSH_KEY_TYPES: tuple[type[PKey], ...] = (Ed25519Key, ECDSAKey, RSAKey)
def _load_private_key(pem: str, password: str | None) -> PKey:
"""Load a private key PEM regardless of algorithm (ed25519, ECDSA, RSA).
paramiko 3.2+ has ``PKey.from_path()`` for polymorphic loading, but it
requires a filesystem path; writing private key material to disk would be a
security regression. Each per-class loader only accepts its own format, so
iterate over the supported types on the in-memory ``StringIO`` and return
the first that parses cleanly.
"""
last_exc: SSHException | None = None
for key_class in _SSH_KEY_TYPES:
try:
return key_class.from_private_key(StringIO(pem), password=password)
except PasswordRequiredException:
raise
except SSHException as exc:
last_exc = exc
# NOTE: last_exc holds the error from the final attempt (RSAKey), not the
# closest-matching type. For a corrupted ed25519 key, the appended message
# reflects RSAKey's parse error; the full type list above still identifies
# all types attempted.
raise SSHException(
"Unable to parse SSH private key as any of "
f"{', '.join(k.__name__ for k in _SSH_KEY_TYPES)}: {last_exc}"
) from last_exc
class SSHManager:
def __init__(self, app: Flask) -> None:
@@ -93,9 +53,6 @@ class SSHManager:
ssh_tunnel: "SSHTunnel",
sqlalchemy_database_uri: str,
) -> sshtunnel.SSHTunnelForwarder:
# Deferred import to break a circular import:
# superset.utils.ssh_tunnel -> superset.databases.ssh_tunnel.models
# -> superset.extensions -> superset.extensions.ssh (this module).
from superset.utils.ssh_tunnel import get_default_port
url = make_url_safe(sqlalchemy_database_uri)
@@ -114,9 +71,11 @@ class SSHManager:
if ssh_tunnel.password:
params["ssh_password"] = ssh_tunnel.password
elif ssh_tunnel.private_key:
params["ssh_pkey"] = _load_private_key(
ssh_tunnel.private_key, ssh_tunnel.private_key_password
private_key_file = StringIO(ssh_tunnel.private_key)
private_key = RSAKey.from_private_key(
private_key_file, ssh_tunnel.private_key_password
)
params["ssh_pkey"] = private_key
return sshtunnel.open_tunnel(**params)

View File

@@ -17,10 +17,11 @@
import pytest
from pytest_mock import MockerFixture
from superset.commands.chart.exceptions import ChartForbiddenError
from superset.commands.chart.exceptions import ChartForbiddenError, ChartInvalidError
from superset.commands.chart.update import UpdateChartCommand
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
from superset.utils import json
def _ownership_exc() -> SupersetSecurityException:
@@ -91,3 +92,73 @@ def test_update_chart_owner_can_perform_regular_update(
find_by_id.assert_called_once_with(1)
raise_for_ownership.assert_called_once()
def _query_context_payload(datasource: object) -> dict[str, object]:
return {
"query_context": json.dumps({"datasource": datasource, "queries": []}),
"query_context_generation": True,
}
def test_update_chart_query_context_matching_datasource_is_allowed(
mocker: MockerFixture,
) -> None:
"""A query context that targets the chart's own datasource is accepted."""
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
find_by_id.return_value = mocker.MagicMock(
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
)
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
UpdateChartCommand(
1, _query_context_payload({"id": 42, "type": "table"})
).validate()
@pytest.mark.parametrize(
"datasource",
[
{"id": 99, "type": "table"}, # different id
{"id": 42, "type": "query"}, # different type
{"id": "99", "type": "table"}, # different id as string
],
)
def test_update_chart_query_context_mismatched_datasource_is_rejected(
mocker: MockerFixture,
datasource: dict[str, object],
) -> None:
"""A query context pointing at a different datasource is rejected with a 4xx."""
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
find_by_id.return_value = mocker.MagicMock(
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
)
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
with pytest.raises(ChartInvalidError):
UpdateChartCommand(1, _query_context_payload(datasource)).validate()
@pytest.mark.parametrize(
"query_context",
[
"{}", # no datasource key
'{"datasource": null}', # null datasource
"not-json", # unparseable payload
],
)
def test_update_chart_query_context_without_datasource_is_allowed(
mocker: MockerFixture,
query_context: str,
) -> None:
"""Payloads with no verifiable datasource fall back to the chart's own."""
find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
find_by_id.return_value = mocker.MagicMock(
id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
)
mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
UpdateChartCommand(
1,
{"query_context": query_context, "query_context_generation": True},
).validate()

View File

@@ -14,45 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import Mock, patch
from unittest.mock import Mock
import pytest
import sshtunnel
from cryptography.hazmat.primitives.asymmetric import ec
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
from cryptography.hazmat.primitives.asymmetric.rsa import (
generate_private_key as generate_rsa_key,
)
from cryptography.hazmat.primitives.serialization import (
BestAvailableEncryption,
Encoding,
NoEncryption,
PrivateFormat,
)
from paramiko import Ed25519Key, PasswordRequiredException, RSAKey, SSHException
from superset.extensions.ssh import SSHManager, SSHManagerFactory
def _make_manager() -> SSHManager:
app = Mock()
app.config = {
"SSH_TUNNEL_LOCAL_BIND_ADDRESS": "127.0.0.1",
"SSH_TUNNEL_TIMEOUT_SEC": 10.0,
"SSH_TUNNEL_PACKET_TIMEOUT_SEC": 10.0,
}
return SSHManager(app)
def _make_ssh_tunnel(private_key: str, private_key_password: str | None = None) -> Mock:
ssh_tunnel = Mock()
ssh_tunnel.server_address = "ssh.example.com"
ssh_tunnel.server_port = 22
ssh_tunnel.username = "tunneluser"
ssh_tunnel.password = None
ssh_tunnel.private_key = private_key
ssh_tunnel.private_key_password = private_key_password
return ssh_tunnel
from superset.extensions.ssh import SSHManagerFactory
def test_ssh_tunnel_timeout_setting() -> None:
@@ -68,151 +34,3 @@ def test_ssh_tunnel_timeout_setting() -> None:
factory.init_app(app)
assert sshtunnel.TUNNEL_TIMEOUT == 123.0
assert sshtunnel.SSH_TIMEOUT == 321.0
def _make_ed25519_pem() -> str:
"""Generate a fresh OpenSSH-format ed25519 private key PEM."""
key = Ed25519PrivateKey.generate()
return key.private_bytes(
encoding=Encoding.PEM,
format=PrivateFormat.OpenSSH,
encryption_algorithm=NoEncryption(),
).decode()
def test_create_tunnel_accepts_ed25519_private_key() -> None:
"""
Regression for #24180: ed25519 SSH keys must be loadable for tunnel
setup. The bug surfaces as ``unpack requires 4 bytes`` because the
code unconditionally calls ``RSAKey.from_private_key`` regardless of
key type, which mis-parses the ed25519 byte stream.
The Superset UI accepts any key the user pastes, so the fix is to
detect the key type (or use ``paramiko.PKey.from_private_key`` once
paramiko exposes a polymorphic loader) rather than hard-coding RSA.
This test exercises ``create_tunnel`` end-to-end with a freshly
generated ed25519 key. It mocks ``sshtunnel.open_tunnel`` so the
test does not actually open a network connection — only the key
parsing path is exercised.
"""
manager = _make_manager()
ssh_tunnel = _make_ssh_tunnel(_make_ed25519_pem())
with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
manager.create_tunnel(
ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
)
# Key-type-agnostic loader must produce a paramiko PKey usable as ssh_pkey.
assert mock_open.called, "open_tunnel was never invoked — key parsing aborted"
forwarded_pkey = mock_open.call_args.kwargs["ssh_pkey"]
assert isinstance(forwarded_pkey, Ed25519Key)
def test_create_tunnel_accepts_rsa_private_key_unchanged() -> None:
"""
Companion to test_create_tunnel_accepts_ed25519_private_key: pin the
backward-compatible RSA path so a fix for ed25519 doesn't regress the
historically-supported RSA case. Uses a freshly generated RSA key in
OpenSSH format.
"""
rsa_pem = (
generate_rsa_key(public_exponent=65537, key_size=2048)
.private_bytes(
encoding=Encoding.PEM,
format=PrivateFormat.OpenSSH,
encryption_algorithm=NoEncryption(),
)
.decode()
)
manager = _make_manager()
ssh_tunnel = _make_ssh_tunnel(rsa_pem)
with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
manager.create_tunnel(
ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
)
assert mock_open.called, "open_tunnel was never invoked — RSA key parsing aborted"
assert isinstance(mock_open.call_args.kwargs["ssh_pkey"], RSAKey)
def test_create_tunnel_accepts_ecdsa_private_key() -> None:
"""
Companion to the ed25519 and RSA tests: verify ECDSAKey (the third type
in _SSH_KEY_TYPES) is reachable by _load_private_key. Uses NIST P-256,
the most common ECDSA curve in practice.
"""
ecdsa_pem = (
ec.generate_private_key(ec.SECP256R1())
.private_bytes(
encoding=Encoding.PEM,
format=PrivateFormat.OpenSSH,
encryption_algorithm=NoEncryption(),
)
.decode()
)
manager = _make_manager()
ssh_tunnel = _make_ssh_tunnel(ecdsa_pem)
with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
manager.create_tunnel(
ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
)
assert mock_open.called, "open_tunnel was never invoked — ECDSA key parsing aborted"
assert mock_open.call_args.kwargs["ssh_pkey"] is not None
def test_create_tunnel_passphrase_protected_key_without_password() -> None:
"""
A passphrase-protected key supplied without a passphrase must surface as
``PasswordRequiredException`` (an actionable "key requires passphrase"
signal) rather than being absorbed by the per-type loop and reported as a
generic "Unable to parse" error.
"""
encrypted_pem = (
Ed25519PrivateKey.generate()
.private_bytes(
encoding=Encoding.PEM,
format=PrivateFormat.OpenSSH,
encryption_algorithm=BestAvailableEncryption(b"correct horse"),
)
.decode()
)
manager = _make_manager()
ssh_tunnel = _make_ssh_tunnel(encrypted_pem, private_key_password=None)
with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
with pytest.raises(PasswordRequiredException):
manager.create_tunnel(
ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
)
assert not mock_open.called
def test_create_tunnel_invalid_key_raises_combined_error() -> None:
"""
When a key parses as none of the supported types, ``_load_private_key``
raises ``SSHException`` whose message lists every type that was attempted,
so the failure clearly communicates that all loaders were tried.
"""
manager = _make_manager()
ssh_tunnel = _make_ssh_tunnel("not a valid private key")
with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
with pytest.raises(SSHException) as exc_info:
manager.create_tunnel(
ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
)
message = str(exc_info.value)
assert "Ed25519Key" in message
assert "ECDSAKey" in message
assert "RSAKey" in message
assert not mock_open.called

View File

@@ -0,0 +1,81 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from superset.utils.core import split
def test_split_empty_string():
assert list(split("")) == [""]
def test_split_leading_delimiter():
assert list(split(" a")) == [
"",
"a",
]
def test_split_trailing_delimiter():
assert list(split("a ")) == [
"a",
"",
]
def test_split_only_delimiter():
assert list(split(" ")) == [
"",
"",
]
def test_split_nested_parentheses():
assert list(
split(
"a,(b,(c,d))",
delimiter=",",
)
) == [
"a",
"(b,(c,d))",
]
def test_branch_separator_found():
assert list(split("a b")) == [
"a",
"b",
]
def test_branch_separator_not_found():
assert list(split("ab")) == [
"ab",
]
def test_branch_parentheses():
assert list(split("(a b)")) == [
"(a b)",
]
def test_branch_escaped_quote():
assert list(split(r'"a\"b c" d')) == [
r'"a\"b c"',
"d",
]