fix: return 422 on invalid SQL (#34303)

2026-04-19 16:14:52 +00:00 · 2025-07-24 16:40:56 -04:00
parent 3177131d52
commit b794b192d1
6 changed files with 340 additions and 4 deletions
--- a/superset/commands/dataset/create.py
+++ b/superset/commands/dataset/create.py
@@ -31,7 +31,7 @@ from superset.commands.dataset.exceptions import (
    TableNotFoundValidationError,
 )
 from superset.daos.dataset import DatasetDAO
-from superset.exceptions import SupersetSecurityException
+from superset.exceptions import SupersetParseError, SupersetSecurityException
 from superset.extensions import security_manager
 from superset.sql.parse import Table
 from superset.utils.decorators import on_error, transaction
@@ -51,7 +51,7 @@ class CreateDatasetCommand(CreateMixin, BaseCommand):
        dataset.fetch_metadata()
        return dataset
-    def validate(self) -> None:
+    def validate(self) -> None:  # noqa: C901
        exceptions: list[ValidationError] = []
        database_id = self._properties["database"]
        catalog = self._properties.get("catalog")
@@ -95,6 +95,13 @@ class CreateDatasetCommand(CreateMixin, BaseCommand):
                )
            except SupersetSecurityException as ex:
                exceptions.append(DatasetDataAccessIsNotAllowed(ex.error.message))
            except SupersetParseError as ex:
                exceptions.append(
                    ValidationError(
                        f"Invalid SQL: {ex.error.message}",
                        field_name="sql",
                    )
                )
        try:
            owners = self.populate_owners(owner_ids)
            self._properties["owners"] = owners
--- a/superset/connectors/sqla/utils.py
+++ b/superset/connectors/sqla/utils.py
@@ -34,6 +34,7 @@ from superset.constants import LRU_CACHE_MAX_SIZE
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
 from superset.exceptions import (
    SupersetGenericDBErrorException,
    SupersetParseError,
    SupersetSecurityException,
 )
 from superset.models.core import Database
@@ -105,7 +106,12 @@ def get_virtual_table_metadata(dataset: SqlaTable) -> list[ResultSetColumnType]:
    sql = dataset.get_template_processor().process_template(
        dataset.sql, **dataset.template_params_dict
    )
-    parsed_script = SQLScript(sql, engine=db_engine_spec.engine)
+    try:
        parsed_script = SQLScript(sql, engine=db_engine_spec.engine)
    except SupersetParseError as ex:
        raise SupersetGenericDBErrorException(
            message=_("Invalid SQL: %(error)s", error=ex.error.message),
        ) from ex
    if parsed_script.has_mutation():
        raise SupersetSecurityException(
            SupersetError(
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -644,7 +644,7 @@ class SQLStatement(BaseSQLStatement[exp.Expression]):
        # depending on the dialect (Oracle, MS SQL) the `ALTER` is parsed as a
        # command, not an expression - check at root level
        if isinstance(self._parsed, exp.Command) and self._parsed.name == "ALTER":
-            return True
+            return True  #  pragma: no cover
        # Postgres runs DMLs prefixed by `EXPLAIN ANALYZE`, see
        # https://www.postgresql.org/docs/current/sql-explain.html
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -974,6 +974,36 @@ class TestDatasetApi(SupersetTestCase):
        assert rv.status_code == 422
        assert data == {"message": "Dataset could not be created."}
    @patch("superset.commands.dataset.create.security_manager.raise_for_access")
    def test_create_dataset_with_invalid_sql_validation(self, mock_raise_for_access):
        """
        Dataset API: Test create dataset with invalid SQL during validation returns 422
        """
        from superset.exceptions import SupersetParseError
        # Mock raise_for_access to throw SupersetParseError during validation
        mock_raise_for_access.side_effect = SupersetParseError(
            sql="SELECT FROM WHERE AND",
            engine="postgresql",
            message="Invalid SQL syntax",
        )
        self.login(ADMIN_USERNAME)
        examples_db = get_example_database()
        dataset_data = {
            "database": examples_db.id,
            "schema": "",
            "table_name": "invalid_sql_table",
            "sql": "SELECT FROM WHERE AND",
        }
        uri = "api/v1/dataset/"
        rv = self.client.post(uri, json=dataset_data)
        data = json.loads(rv.data.decode("utf-8"))
        # The error is caught during validation and returns 422
        assert rv.status_code == 422
        assert "sql" in data["message"]
        assert "Invalid SQL:" in data["message"]["sql"][0]
    def test_update_dataset_preserve_ownership(self):
        """
        Dataset API: Test update dataset preserves owner list (if un-changed)
--- a/tests/unit_tests/commands/dataset/test_create.py
+++ b/tests/unit_tests/commands/dataset/test_create.py
@@ -0,0 +1,170 @@
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 from unittest.mock import Mock, patch
 import pytest
 from marshmallow import ValidationError
 from superset.commands.dataset.create import CreateDatasetCommand
 from superset.commands.dataset.exceptions import DatasetInvalidError
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
 from superset.exceptions import SupersetParseError
 from superset.models.core import Database
 def test_create_dataset_invalid_sql_parse_error():
    """Test that invalid SQL returns a 4xx error when caught as SupersetParseError."""
    mock_database = Mock(spec=Database)
    mock_database.id = 1
    mock_database.db_engine_spec.engine = "postgresql"
    mock_database.get_default_catalog.return_value = None
    with patch(
        "superset.commands.dataset.create.DatasetDAO.get_database_by_id",
        return_value=mock_database,
    ):
        with patch(
            "superset.commands.dataset.create.DatasetDAO.validate_uniqueness",
            return_value=True,
        ):
            with patch(
                "superset.commands.dataset.create.security_manager.raise_for_access",
                side_effect=SupersetParseError(
                    sql="SELECT INVALID SQL SYNTAX",
                    engine="postgresql",
                    message="Invalid SQL syntax: unexpected token 'INVALID'",
                ),
            ):
                with patch(
                    "superset.commands.dataset.create.CreateDatasetCommand.populate_owners",
                    return_value=[],
                ):
                    command = CreateDatasetCommand(
                        {
                            "database": 1,
                            "table_name": "test_virtual_dataset",
                            "sql": "SELECT INVALID SQL SYNTAX",
                        }
                    )
                    with pytest.raises(DatasetInvalidError) as exc_info:
                        command.validate()
                    # Verify the exception contains the correct validation error
                    validation_errors = exc_info.value._exceptions
                    assert len(validation_errors) == 1
                    assert isinstance(validation_errors[0], ValidationError)
                    assert validation_errors[0].field_name == "sql"
                    assert "Invalid SQL:" in str(validation_errors[0].messages[0])
                    assert "unexpected token 'INVALID'" in str(
                        validation_errors[0].messages[0]
                    )
 def test_create_dataset_valid_sql_with_access_error():
    """
    Test that security exceptions work correctly
    """
    mock_database = Mock(spec=Database)
    mock_database.id = 1
    mock_database.db_engine_spec.engine = "postgresql"
    mock_database.get_default_catalog.return_value = None
    from superset.exceptions import SupersetSecurityException
    with patch(
        "superset.commands.dataset.create.DatasetDAO.get_database_by_id",
        return_value=mock_database,
    ):
        with patch(
            "superset.commands.dataset.create.DatasetDAO.validate_uniqueness",
            return_value=True,
        ):
            with patch(
                "superset.commands.dataset.create.security_manager.raise_for_access",
                side_effect=SupersetSecurityException(
                    SupersetError(
                        error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
                        message="User does not have access to table 'secret_table'",
                        level=ErrorLevel.ERROR,
                    )
                ),
            ):
                with patch(
                    "superset.commands.dataset.create.CreateDatasetCommand.populate_owners",
                    return_value=[],
                ):
                    command = CreateDatasetCommand(
                        {
                            "database": 1,
                            "table_name": "test_virtual_dataset",
                            "sql": "SELECT * FROM secret_table",
                        }
                    )
                    with pytest.raises(DatasetInvalidError) as exc_info:
                        command.validate()
                    # Verify the security error is handled correctly (existing behavior)
                    validation_errors = exc_info.value._exceptions
                    assert len(validation_errors) == 1
                    # This should be a DatasetDataAccessIsNotAllowed error
                    from superset.commands.dataset.exceptions import (
                        DatasetDataAccessIsNotAllowed,
                    )
                    assert isinstance(
                        validation_errors[0], DatasetDataAccessIsNotAllowed
                    )
                    assert validation_errors[0].field_name == "sql"
                    assert "User does not have access to table 'secret_table'" in str(
                        validation_errors[0].messages[0]
                    )
 def test_create_dataset_physical_table_no_parse_error():
    """Test that physical tables (no SQL) don't trigger parsing."""
    mock_database = Mock(spec=Database)
    mock_database.id = 1
    mock_database.get_default_catalog.return_value = None
    with patch(
        "superset.commands.dataset.create.DatasetDAO.get_database_by_id",
        return_value=mock_database,
    ):
        with patch(
            "superset.commands.dataset.create.DatasetDAO.validate_uniqueness",
            return_value=True,
        ):
            with patch(
                "superset.commands.dataset.create.DatasetDAO.validate_table_exists",
                return_value=True,
            ):
                with patch(
                    "superset.commands.dataset.create.CreateDatasetCommand.populate_owners",
                    return_value=[],
                ):
                    command = CreateDatasetCommand(
                        {
                            "database": 1,
                            "table_name": "physical_table",
                            # No SQL provided - this is a physical table
                        }
                    )
                    # Should not raise any parsing errors
                    command.validate()
--- a/tests/unit_tests/connectors/sqla/test_utils.py
+++ b/tests/unit_tests/connectors/sqla/test_utils.py
@@ -0,0 +1,123 @@
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 from unittest.mock import Mock, patch
 import pytest
 from superset.connectors.sqla.models import SqlaTable
 from superset.connectors.sqla.utils import get_virtual_table_metadata
 from superset.errors import SupersetErrorType
 from superset.exceptions import (
    SupersetGenericDBErrorException,
    SupersetSecurityException,
 )
 from superset.models.core import Database
 def test_get_virtual_table_metadata_invalid_sql():
    """Test that invalid SQL in virtual table raises proper exception."""
    mock_dataset = Mock(spec=SqlaTable)
    mock_database = Mock(spec=Database)
    mock_dataset.database = mock_database
    mock_dataset.sql = "SELECT INVALID SYNTAX FROM"
    mock_database.db_engine_spec.engine = "postgresql"
    # Mock template processor
    mock_template_processor = Mock()
    mock_template_processor.process_template.return_value = "SELECT INVALID SYNTAX FROM"
    mock_dataset.get_template_processor.return_value = mock_template_processor
    mock_dataset.template_params_dict = {}
    with pytest.raises(SupersetGenericDBErrorException) as exc_info:
        get_virtual_table_metadata(mock_dataset)
    # Check that the error message includes the parsing error
    assert "Invalid SQL:" in str(exc_info.value.message)
 def test_get_virtual_table_metadata_empty_sql():
    """Test that empty SQL raises appropriate error."""
    mock_dataset = Mock(spec=SqlaTable)
    mock_dataset.sql = None
    with pytest.raises(SupersetGenericDBErrorException) as exc_info:
        get_virtual_table_metadata(mock_dataset)
    assert "Virtual dataset query cannot be empty" in str(exc_info.value.message)
 def test_get_virtual_table_metadata_mutation_not_allowed():
    """Test that SQL with mutations raises security error."""
    mock_dataset = Mock(spec=SqlaTable)
    mock_database = Mock(spec=Database)
    mock_dataset.database = mock_database
    mock_dataset.sql = "DELETE FROM users"
    mock_database.db_engine_spec.engine = "postgresql"
    # Mock template processor
    mock_template_processor = Mock()
    mock_template_processor.process_template.return_value = "DELETE FROM users"
    mock_dataset.get_template_processor.return_value = mock_template_processor
    mock_dataset.template_params_dict = {}
    # Mock SQLScript to simulate mutation detection
    with patch("superset.connectors.sqla.utils.SQLScript") as mock_sqlscript_class:
        mock_script = Mock()
        mock_script.has_mutation.return_value = True
        mock_sqlscript_class.return_value = mock_script
        with pytest.raises(SupersetSecurityException) as exc_info:
            get_virtual_table_metadata(mock_dataset)
        assert (
            exc_info.value.error.error_type
            == SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR
        )
        assert "Only `SELECT` statements are allowed" in exc_info.value.error.message
 def test_get_virtual_table_metadata_multiple_statements_not_allowed():
    """Test that multiple SQL statements raise security error."""
    mock_dataset = Mock(spec=SqlaTable)
    mock_database = Mock(spec=Database)
    mock_dataset.database = mock_database
    mock_dataset.sql = "SELECT * FROM table1; SELECT * FROM table2"
    mock_database.db_engine_spec.engine = "postgresql"
    # Mock template processor
    mock_template_processor = Mock()
    mock_template_processor.process_template.return_value = (
        "SELECT * FROM table1; SELECT * FROM table2"
    )
    mock_dataset.get_template_processor.return_value = mock_template_processor
    mock_dataset.template_params_dict = {}
    # Mock SQLScript to simulate multiple statements
    with patch("superset.connectors.sqla.utils.SQLScript") as mock_sqlscript_class:
        mock_script = Mock()
        mock_script.has_mutation.return_value = False
        mock_script.statements = [Mock(), Mock()]  # Two statements
        mock_sqlscript_class.return_value = mock_script
        with pytest.raises(SupersetSecurityException) as exc_info:
            get_virtual_table_metadata(mock_dataset)
        assert (
            exc_info.value.error.error_type
            == SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR
        )
        assert "Only single queries supported" in exc_info.value.error.message