fix(chart): keep query-context updates bound to the chart's datasource

On the query-context-only update path UpdateChartCommand intentionally skips the ownership check so report and alert workers can refresh a chart's cached payload. Validate that the submitted query context still targets the chart's own datasource (id and type) before saving, so a cached payload cannot be repointed at an unrelated datasource. Payloads without a parseable datasource fall back to the chart's datasource at execution time and are left unchanged. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
test(core): add unit tests for split function (#40819 )
2026-06-11 18:49:15 +00:00 · 2026-06-10 16:29:39 -07:00 · 2026-06-10 16:12:35 -07:00
6 changed files with 226 additions and 231 deletions
--- a/superset/commands/chart/exceptions.py
+++ b/superset/commands/chart/exceptions.py
@@ -103,6 +103,19 @@ class DatasourceTypeUpdateRequiredValidationError(ValidationError):
        )


+class ChartQueryContextDatasourceMismatchValidationError(ValidationError):
+    """
+    Raised when a query-context-only update carries a datasource that does not
+    match the chart's own datasource.
+    """
+
+    def __init__(self) -> None:
+        super().__init__(
+            _("The query context datasource does not match the chart datasource"),
+            field_name="query_context",
+        )
+
+
 class ChartNotFoundError(CommandException):
    message = "Chart not found."

--- a/superset/commands/chart/update.py
+++ b/superset/commands/chart/update.py
@@ -29,6 +29,7 @@ from superset.commands.chart.exceptions import (
    ChartForbiddenError,
    ChartInvalidError,
    ChartNotFoundError,
+    ChartQueryContextDatasourceMismatchValidationError,
    ChartUpdateFailedError,
    DashboardsForbiddenError,
    DashboardsNotFoundValidationError,
@@ -41,6 +42,7 @@ from superset.exceptions import SupersetSecurityException
 from superset.models.dashboard import Dashboard
 from superset.models.slice import Slice
 from superset.tags.models import ObjectType
+from superset.utils import json
 from superset.utils.decorators import on_error, transaction

 logger = logging.getLogger(__name__)
@@ -101,6 +103,51 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
                if not security_manager.is_owner(dash):
                    raise DashboardsForbiddenError()

+    def _validate_query_context_datasource(
+        self, exceptions: list[ValidationError]
+    ) -> None:
+        """
+        Ensure a query-context-only update keeps the chart's own datasource.
+
+        The submitted query context is only verified when it carries a parseable
+        ``datasource`` object; a payload that references a different datasource than
+        the chart's persisted one is rejected. Payloads without a datasource fall
+        back to the chart's datasource at execution time and need no check.
+        """
+        if not self._model:
+            return
+
+        raw_query_context = self._properties.get("query_context")
+        if not raw_query_context:
+            return
+
+        try:
+            query_context = json.loads(raw_query_context)
+        except (TypeError, ValueError):
+            # An unparseable payload cannot be verified or replayed; leave it for
+            # downstream handling rather than guessing at its intent.
+            return
+
+        datasource = (
+            query_context.get("datasource") if isinstance(query_context, dict) else None
+        )
+        if not isinstance(datasource, dict):
+            return
+
+        try:
+            ids_match = int(datasource["id"]) == self._model.datasource_id
+        except (KeyError, TypeError, ValueError):
+            ids_match = False
+
+        datasource_type = datasource.get("type")
+        types_match = (
+            datasource_type is None
+            or str(datasource_type) == self._model.datasource_type
+        )
+
+        if not ids_match or not types_match:
+            exceptions.append(ChartQueryContextDatasourceMismatchValidationError())
+
    def validate(self) -> None:  # noqa: C901
        exceptions: list[ValidationError] = []
        dashboard_ids = self._properties.get("dashboards")
@@ -134,6 +181,12 @@ class UpdateChartCommand(UpdateMixin, BaseCommand):
                raise ChartForbiddenError() from ex
            except ValidationError as ex:
                exceptions.append(ex)
+        else:
+            # The query-context-only path skips the ownership check so report and
+            # alert workers can refresh a chart's cached payload. Keep that payload
+            # bound to the chart's own datasource so it cannot be repointed at an
+            # unrelated one.
+            self._validate_query_context_datasource(exceptions)

        # validate tags
        try:
--- a/superset/extensions/ssh.py
+++ b/superset/extensions/ssh.py
@@ -21,14 +21,7 @@ from typing import TYPE_CHECKING

 import sshtunnel
 from flask import Flask
-from paramiko import (
-    ECDSAKey,
-    Ed25519Key,
-    PasswordRequiredException,
-    PKey,
-    RSAKey,
-    SSHException,
-)
+from paramiko import RSAKey

 from superset.commands.database.ssh_tunnel.exceptions import SSHTunnelDatabasePortError
 from superset.databases.utils import make_url_safe
@@ -37,39 +30,6 @@ from superset.utils.class_utils import load_class_from_name
 if TYPE_CHECKING:
    from superset.databases.ssh_tunnel.models import SSHTunnel

-# Order matters: paramiko's per-class loaders raise SSHException with vague
-# "unpack requires 4 bytes" messages on type mismatches, so we try the more
-# modern key types first (ed25519, ECDSA) and fall back to RSA, which is the
-# most permissive parser and the historical default in this codebase.
-_SSH_KEY_TYPES: tuple[type[PKey], ...] = (Ed25519Key, ECDSAKey, RSAKey)
-
-
-def _load_private_key(pem: str, password: str | None) -> PKey:
-    """Load a private key PEM regardless of algorithm (ed25519, ECDSA, RSA).
-
-    paramiko 3.2+ has ``PKey.from_path()`` for polymorphic loading, but it
-    requires a filesystem path; writing private key material to disk would be a
-    security regression. Each per-class loader only accepts its own format, so
-    iterate over the supported types on the in-memory ``StringIO`` and return
-    the first that parses cleanly.
-    """
-    last_exc: SSHException | None = None
-    for key_class in _SSH_KEY_TYPES:
-        try:
-            return key_class.from_private_key(StringIO(pem), password=password)
-        except PasswordRequiredException:
-            raise
-        except SSHException as exc:
-            last_exc = exc
-    # NOTE: last_exc holds the error from the final attempt (RSAKey), not the
-    # closest-matching type. For a corrupted ed25519 key, the appended message
-    # reflects RSAKey's parse error; the full type list above still identifies
-    # all types attempted.
-    raise SSHException(
-        "Unable to parse SSH private key as any of "
-        f"{', '.join(k.__name__ for k in _SSH_KEY_TYPES)}: {last_exc}"
-    ) from last_exc
-

 class SSHManager:
    def __init__(self, app: Flask) -> None:
@@ -93,9 +53,6 @@ class SSHManager:
        ssh_tunnel: "SSHTunnel",
        sqlalchemy_database_uri: str,
    ) -> sshtunnel.SSHTunnelForwarder:
-        # Deferred import to break a circular import:
-        # superset.utils.ssh_tunnel -> superset.databases.ssh_tunnel.models
-        # -> superset.extensions -> superset.extensions.ssh (this module).
        from superset.utils.ssh_tunnel import get_default_port

        url = make_url_safe(sqlalchemy_database_uri)
@@ -114,9 +71,11 @@ class SSHManager:
        if ssh_tunnel.password:
            params["ssh_password"] = ssh_tunnel.password
        elif ssh_tunnel.private_key:
-            params["ssh_pkey"] = _load_private_key(
-                ssh_tunnel.private_key, ssh_tunnel.private_key_password
+            private_key_file = StringIO(ssh_tunnel.private_key)
+            private_key = RSAKey.from_private_key(
+                private_key_file, ssh_tunnel.private_key_password
            )
+            params["ssh_pkey"] = private_key

        return sshtunnel.open_tunnel(**params)

--- a/tests/unit_tests/commands/chart/update_test.py
+++ b/tests/unit_tests/commands/chart/update_test.py
@@ -17,10 +17,11 @@
 import pytest
 from pytest_mock import MockerFixture

-from superset.commands.chart.exceptions import ChartForbiddenError
+from superset.commands.chart.exceptions import ChartForbiddenError, ChartInvalidError
 from superset.commands.chart.update import UpdateChartCommand
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
 from superset.exceptions import SupersetSecurityException
+from superset.utils import json


 def _ownership_exc() -> SupersetSecurityException:
@@ -91,3 +92,73 @@ def test_update_chart_owner_can_perform_regular_update(

    find_by_id.assert_called_once_with(1)
    raise_for_ownership.assert_called_once()
+
+
+def _query_context_payload(datasource: object) -> dict[str, object]:
+    return {
+        "query_context": json.dumps({"datasource": datasource, "queries": []}),
+        "query_context_generation": True,
+    }
+
+
+def test_update_chart_query_context_matching_datasource_is_allowed(
+    mocker: MockerFixture,
+) -> None:
+    """A query context that targets the chart's own datasource is accepted."""
+    find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
+    find_by_id.return_value = mocker.MagicMock(
+        id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
+    )
+    mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
+
+    UpdateChartCommand(
+        1, _query_context_payload({"id": 42, "type": "table"})
+    ).validate()
+
+
+@pytest.mark.parametrize(
+    "datasource",
+    [
+        {"id": 99, "type": "table"},  # different id
+        {"id": 42, "type": "query"},  # different type
+        {"id": "99", "type": "table"},  # different id as string
+    ],
+)
+def test_update_chart_query_context_mismatched_datasource_is_rejected(
+    mocker: MockerFixture,
+    datasource: dict[str, object],
+) -> None:
+    """A query context pointing at a different datasource is rejected with a 4xx."""
+    find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
+    find_by_id.return_value = mocker.MagicMock(
+        id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
+    )
+    mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
+
+    with pytest.raises(ChartInvalidError):
+        UpdateChartCommand(1, _query_context_payload(datasource)).validate()
+
+
+@pytest.mark.parametrize(
+    "query_context",
+    [
+        "{}",  # no datasource key
+        '{"datasource": null}',  # null datasource
+        "not-json",  # unparseable payload
+    ],
+)
+def test_update_chart_query_context_without_datasource_is_allowed(
+    mocker: MockerFixture,
+    query_context: str,
+) -> None:
+    """Payloads with no verifiable datasource fall back to the chart's own."""
+    find_by_id = mocker.patch("superset.commands.chart.update.ChartDAO.find_by_id")
+    find_by_id.return_value = mocker.MagicMock(
+        id=1, tags=[], dashboards=[], datasource_id=42, datasource_type="table"
+    )
+    mocker.patch("superset.commands.chart.update.security_manager.raise_for_ownership")
+
+    UpdateChartCommand(
+        1,
+        {"query_context": query_context, "query_context_generation": True},
+    ).validate()
--- a/tests/unit_tests/extensions/ssh_test.py
+++ b/tests/unit_tests/extensions/ssh_test.py
@@ -14,45 +14,11 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from unittest.mock import Mock, patch
+from unittest.mock import Mock

-import pytest
 import sshtunnel
-from cryptography.hazmat.primitives.asymmetric import ec
-from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
-from cryptography.hazmat.primitives.asymmetric.rsa import (
-    generate_private_key as generate_rsa_key,
-)
-from cryptography.hazmat.primitives.serialization import (
-    BestAvailableEncryption,
-    Encoding,
-    NoEncryption,
-    PrivateFormat,
-)
-from paramiko import Ed25519Key, PasswordRequiredException, RSAKey, SSHException

-from superset.extensions.ssh import SSHManager, SSHManagerFactory
-
-
-def _make_manager() -> SSHManager:
-    app = Mock()
-    app.config = {
-        "SSH_TUNNEL_LOCAL_BIND_ADDRESS": "127.0.0.1",
-        "SSH_TUNNEL_TIMEOUT_SEC": 10.0,
-        "SSH_TUNNEL_PACKET_TIMEOUT_SEC": 10.0,
-    }
-    return SSHManager(app)
-
-
-def _make_ssh_tunnel(private_key: str, private_key_password: str | None = None) -> Mock:
-    ssh_tunnel = Mock()
-    ssh_tunnel.server_address = "ssh.example.com"
-    ssh_tunnel.server_port = 22
-    ssh_tunnel.username = "tunneluser"
-    ssh_tunnel.password = None
-    ssh_tunnel.private_key = private_key
-    ssh_tunnel.private_key_password = private_key_password
-    return ssh_tunnel
+from superset.extensions.ssh import SSHManagerFactory


 def test_ssh_tunnel_timeout_setting() -> None:
@@ -68,151 +34,3 @@ def test_ssh_tunnel_timeout_setting() -> None:
    factory.init_app(app)
    assert sshtunnel.TUNNEL_TIMEOUT == 123.0
    assert sshtunnel.SSH_TIMEOUT == 321.0
-
-
-def _make_ed25519_pem() -> str:
-    """Generate a fresh OpenSSH-format ed25519 private key PEM."""
-    key = Ed25519PrivateKey.generate()
-    return key.private_bytes(
-        encoding=Encoding.PEM,
-        format=PrivateFormat.OpenSSH,
-        encryption_algorithm=NoEncryption(),
-    ).decode()
-
-
-def test_create_tunnel_accepts_ed25519_private_key() -> None:
-    """
-    Regression for #24180: ed25519 SSH keys must be loadable for tunnel
-    setup. The bug surfaces as ``unpack requires 4 bytes`` because the
-    code unconditionally calls ``RSAKey.from_private_key`` regardless of
-    key type, which mis-parses the ed25519 byte stream.
-
-    The Superset UI accepts any key the user pastes, so the fix is to
-    detect the key type (or use ``paramiko.PKey.from_private_key`` once
-    paramiko exposes a polymorphic loader) rather than hard-coding RSA.
-
-    This test exercises ``create_tunnel`` end-to-end with a freshly
-    generated ed25519 key. It mocks ``sshtunnel.open_tunnel`` so the
-    test does not actually open a network connection — only the key
-    parsing path is exercised.
-    """
-    manager = _make_manager()
-    ssh_tunnel = _make_ssh_tunnel(_make_ed25519_pem())
-
-    with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
-        manager.create_tunnel(
-            ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
-        )
-
-    # Key-type-agnostic loader must produce a paramiko PKey usable as ssh_pkey.
-    assert mock_open.called, "open_tunnel was never invoked — key parsing aborted"
-    forwarded_pkey = mock_open.call_args.kwargs["ssh_pkey"]
-    assert isinstance(forwarded_pkey, Ed25519Key)
-
-
-def test_create_tunnel_accepts_rsa_private_key_unchanged() -> None:
-    """
-    Companion to test_create_tunnel_accepts_ed25519_private_key: pin the
-    backward-compatible RSA path so a fix for ed25519 doesn't regress the
-    historically-supported RSA case. Uses a freshly generated RSA key in
-    OpenSSH format.
-    """
-    rsa_pem = (
-        generate_rsa_key(public_exponent=65537, key_size=2048)
-        .private_bytes(
-            encoding=Encoding.PEM,
-            format=PrivateFormat.OpenSSH,
-            encryption_algorithm=NoEncryption(),
-        )
-        .decode()
-    )
-
-    manager = _make_manager()
-    ssh_tunnel = _make_ssh_tunnel(rsa_pem)
-
-    with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
-        manager.create_tunnel(
-            ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
-        )
-
-    assert mock_open.called, "open_tunnel was never invoked — RSA key parsing aborted"
-    assert isinstance(mock_open.call_args.kwargs["ssh_pkey"], RSAKey)
-
-
-def test_create_tunnel_accepts_ecdsa_private_key() -> None:
-    """
-    Companion to the ed25519 and RSA tests: verify ECDSAKey (the third type
-    in _SSH_KEY_TYPES) is reachable by _load_private_key. Uses NIST P-256,
-    the most common ECDSA curve in practice.
-    """
-    ecdsa_pem = (
-        ec.generate_private_key(ec.SECP256R1())
-        .private_bytes(
-            encoding=Encoding.PEM,
-            format=PrivateFormat.OpenSSH,
-            encryption_algorithm=NoEncryption(),
-        )
-        .decode()
-    )
-
-    manager = _make_manager()
-    ssh_tunnel = _make_ssh_tunnel(ecdsa_pem)
-
-    with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
-        manager.create_tunnel(
-            ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
-        )
-
-    assert mock_open.called, "open_tunnel was never invoked — ECDSA key parsing aborted"
-    assert mock_open.call_args.kwargs["ssh_pkey"] is not None
-
-
-def test_create_tunnel_passphrase_protected_key_without_password() -> None:
-    """
-    A passphrase-protected key supplied without a passphrase must surface as
-    ``PasswordRequiredException`` (an actionable "key requires passphrase"
-    signal) rather than being absorbed by the per-type loop and reported as a
-    generic "Unable to parse" error.
-    """
-    encrypted_pem = (
-        Ed25519PrivateKey.generate()
-        .private_bytes(
-            encoding=Encoding.PEM,
-            format=PrivateFormat.OpenSSH,
-            encryption_algorithm=BestAvailableEncryption(b"correct horse"),
-        )
-        .decode()
-    )
-
-    manager = _make_manager()
-    ssh_tunnel = _make_ssh_tunnel(encrypted_pem, private_key_password=None)
-
-    with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
-        with pytest.raises(PasswordRequiredException):
-            manager.create_tunnel(
-                ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
-            )
-
-    assert not mock_open.called
-
-
-def test_create_tunnel_invalid_key_raises_combined_error() -> None:
-    """
-    When a key parses as none of the supported types, ``_load_private_key``
-    raises ``SSHException`` whose message lists every type that was attempted,
-    so the failure clearly communicates that all loaders were tried.
-    """
-    manager = _make_manager()
-    ssh_tunnel = _make_ssh_tunnel("not a valid private key")
-
-    with patch("superset.extensions.ssh.sshtunnel.open_tunnel") as mock_open:
-        with pytest.raises(SSHException) as exc_info:
-            manager.create_tunnel(
-                ssh_tunnel, "postgresql://user:pass@db.example.com:5432/x"
-            )
-
-    message = str(exc_info.value)
-    assert "Ed25519Key" in message
-    assert "ECDSAKey" in message
-    assert "RSAKey" in message
-    assert not mock_open.called
--- a/tests/unit_tests/utils/test_split.py
+++ b/tests/unit_tests/utils/test_split.py
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from superset.utils.core import split
+
+
+def test_split_empty_string():
+    assert list(split("")) == [""]
+
+
+def test_split_leading_delimiter():
+    assert list(split(" a")) == [
+        "",
+        "a",
+    ]
+
+
+def test_split_trailing_delimiter():
+    assert list(split("a ")) == [
+        "a",
+        "",
+    ]
+
+
+def test_split_only_delimiter():
+    assert list(split(" ")) == [
+        "",
+        "",
+    ]
+
+
+def test_split_nested_parentheses():
+    assert list(
+        split(
+            "a,(b,(c,d))",
+            delimiter=",",
+        )
+    ) == [
+        "a",
+        "(b,(c,d))",
+    ]
+
+
+def test_branch_separator_found():
+    assert list(split("a b")) == [
+        "a",
+        "b",
+    ]
+
+
+def test_branch_separator_not_found():
+    assert list(split("ab")) == [
+        "ab",
+    ]
+
+
+def test_branch_parentheses():
+    assert list(split("(a b)")) == [
+        "(a b)",
+    ]
+
+
+def test_branch_escaped_quote():
+    assert list(split(r'"a\"b c" d')) == [
+        r'"a\"b c"',
+        "d",
+    ]