diff --git a/superset/commands/dashboard/export_example.py b/superset/commands/dashboard/export_example.py index 0446e213b33..7924fe0ad4d 100644 --- a/superset/commands/dashboard/export_example.py +++ b/superset/commands/dashboard/export_example.py @@ -175,6 +175,10 @@ def export_dataset_yaml( "schema": None, # Don't export - use target database's default schema # Preserve SQL for virtual datasets, None for physical (data is in parquet) "sql": dataset.sql if is_preserved_virtual else None, + # Track source database engine for SQL transpilation during import + "source_db_engine": ( + dataset.database.db_engine_spec.engine if is_preserved_virtual else None + ), "params": None, # Don't export - contains stale import metadata "template_params": dataset.template_params, "filter_select_enabled": dataset.filter_select_enabled, diff --git a/superset/commands/importers/v1/examples.py b/superset/commands/importers/v1/examples.py index 99ecab79555..19fe811044a 100644 --- a/superset/commands/importers/v1/examples.py +++ b/superset/commands/importers/v1/examples.py @@ -14,11 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging from typing import Any, Optional from marshmallow import Schema from sqlalchemy.exc import MultipleResultsFound +from superset import db from superset.charts.schemas import ImportV1ChartSchema from superset.commands.chart.importers.v1 import ImportChartsCommand from superset.commands.chart.importers.v1.utils import import_chart @@ -41,9 +43,62 @@ from superset.daos.base import BaseDAO from superset.dashboards.schemas import ImportV1DashboardSchema from superset.databases.schemas import ImportV1DatabaseSchema from superset.datasets.schemas import ImportV1DatasetSchema +from superset.exceptions import QueryClauseValidationException +from superset.models.core import Database +from superset.sql.parse import transpile_to_dialect from superset.utils.core import get_example_default_schema from superset.utils.decorators import transaction +logger = logging.getLogger(__name__) + + +def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) -> None: + """ + Transpile virtual dataset SQL to the target database dialect. + + This ensures that virtual datasets exported from one database type + (e.g., PostgreSQL) can be loaded into a different database type + (e.g., MySQL, DuckDB, SQLite). + + Args: + config: Dataset configuration dict (modified in place) + database_id: ID of the target database + """ + sql = config.get("sql") + if not sql: + return + + database = db.session.query(Database).get(database_id) + if not database: + logger.warning("Database %s not found, skipping SQL transpilation", database_id) + return + + target_engine = database.db_engine_spec.engine + source_engine = config.get("source_db_engine") + if target_engine == source_engine: + logger.info("Source and target dialects are identical, skipping transpilation") + return + + try: + transpiled_sql = transpile_to_dialect(sql, target_engine, source_engine) + if transpiled_sql != sql: + logger.info( + "Transpiled virtual dataset SQL for '%s' from %s to %s dialect", + config.get("table_name", "unknown"), + source_engine or "generic", + target_engine, + ) + config["sql"] = transpiled_sql + except QueryClauseValidationException as ex: + logger.warning( + "Could not transpile SQL for dataset '%s' from %s to %s: %s. " + "Using original SQL which may not be compatible.", + config.get("table_name", "unknown"), + source_engine or "generic", + target_engine, + ex, + ) + class ImportExamplesCommand(ImportModelsCommand): """Import examples""" @@ -119,6 +174,9 @@ class ImportExamplesCommand(ImportModelsCommand): if config["schema"] is None: config["schema"] = get_example_default_schema() + # transpile virtual dataset SQL to target database dialect + transpile_virtual_dataset_sql(config, config["database_id"]) + try: dataset = import_dataset( config, diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py index 96bc44a9d3c..1506ef45d16 100644 --- a/superset/datasets/schemas.py +++ b/superset/datasets/schemas.py @@ -322,6 +322,8 @@ class ImportV1DatasetSchema(Schema): schema = fields.String(allow_none=True) catalog = fields.String(allow_none=True) sql = fields.String(allow_none=True) + # Source database engine for SQL transpilation (virtual datasets only) + source_db_engine = fields.String(allow_none=True, load_default=None) params = fields.Dict(allow_none=True) template_params = fields.Dict(allow_none=True) filter_select_enabled = fields.Boolean() diff --git a/superset/sql/parse.py b/superset/sql/parse.py index af72f72e952..af9a740ec75 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -1522,9 +1522,21 @@ def sanitize_clause(clause: str, engine: str) -> str: raise QueryClauseValidationException(f"Invalid SQL clause: {clause}") from ex -def transpile_to_dialect(sql: str, target_engine: str) -> str: +def transpile_to_dialect( + sql: str, + target_engine: str, + source_engine: str | None = None, +) -> str: """ - Transpile SQL from "generic SQL" to the target database dialect using SQLGlot. + Transpile SQL from one database dialect to another using SQLGlot. + + Args: + sql: The SQL query to transpile + target_engine: The target database engine (e.g., "mysql", "postgresql") + source_engine: The source database engine. If None, uses generic SQL dialect. + + Returns: + The transpiled SQL string If the target engine is not in SQLGLOT_DIALECTS, returns the SQL as-is. """ @@ -1534,8 +1546,11 @@ def transpile_to_dialect(sql: str, target_engine: str) -> str: if target_dialect is None: return sql + # Get source dialect (default to generic if not specified) + source_dialect = SQLGLOT_DIALECTS.get(source_engine) if source_engine else Dialect + try: - parsed = sqlglot.parse_one(sql, dialect=Dialect) + parsed = sqlglot.parse_one(sql, dialect=source_dialect) return Dialect.get_or_raise(target_dialect).generate( parsed, copy=True, diff --git a/tests/unit_tests/commands/importers/v1/examples_test.py b/tests/unit_tests/commands/importers/v1/examples_test.py new file mode 100644 index 00000000000..1ad6176dc1c --- /dev/null +++ b/tests/unit_tests/commands/importers/v1/examples_test.py @@ -0,0 +1,244 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for the examples importer, specifically SQL transpilation.""" + +from unittest.mock import MagicMock, patch + +from superset.commands.importers.v1.examples import transpile_virtual_dataset_sql + + +def test_transpile_virtual_dataset_sql_no_sql(): + """Test that configs without SQL are unchanged.""" + config = {"table_name": "my_table", "sql": None} + transpile_virtual_dataset_sql(config, 1) + assert config["sql"] is None + + +def test_transpile_virtual_dataset_sql_empty_sql(): + """Test that configs with empty SQL are unchanged.""" + config = {"table_name": "my_table", "sql": ""} + transpile_virtual_dataset_sql(config, 1) + assert config["sql"] == "" + + +@patch("superset.commands.importers.v1.examples.db") +def test_transpile_virtual_dataset_sql_database_not_found(mock_db): + """Test graceful handling when database is not found.""" + mock_db.session.query.return_value.get.return_value = None + + config = {"table_name": "my_table", "sql": "SELECT * FROM foo"} + original_sql = config["sql"] + + transpile_virtual_dataset_sql(config, 999) + + # SQL should remain unchanged + assert config["sql"] == original_sql + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db): + """Test successful SQL transpilation with source engine.""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + mock_transpile.return_value = "SELECT * FROM `foo`" + + config = { + "table_name": "my_table", + "sql": "SELECT * FROM foo", + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == "SELECT * FROM `foo`" + mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, mock_db): + """Test transpilation when source_db_engine is not specified (legacy).""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + mock_transpile.return_value = "SELECT * FROM `foo`" + + # No source_db_engine - should default to None (generic dialect) + config = {"table_name": "my_table", "sql": "SELECT * FROM foo"} + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == "SELECT * FROM `foo`" + mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None) + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db): + """Test when transpilation returns same SQL (no dialect differences).""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "postgresql" + mock_db.session.query.return_value.get.return_value = mock_database + + original_sql = "SELECT * FROM foo" + mock_transpile.return_value = original_sql + + config = { + "table_name": "my_table", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == original_sql + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db): + """Test graceful fallback when transpilation fails.""" + from superset.exceptions import QueryClauseValidationException + + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + mock_transpile.side_effect = QueryClauseValidationException("Parse error") + + original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo" + config = { + "table_name": "my_table", + "sql": original_sql, + "source_db_engine": "postgresql", + } + + # Should not raise, should keep original SQL + transpile_virtual_dataset_sql(config, 1) + assert config["sql"] == original_sql + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to DuckDB.""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "duckdb" + mock_db.session.query.return_value.get.return_value = mock_database + + original_sql = """ + SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt + FROM orders WHERE status = 'completed' GROUP BY 1 + """ + transpiled_sql = """ + SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt + FROM orders WHERE status = 'completed' GROUP BY 1 + """ + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "monthly_orders", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "duckdb", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to ClickHouse. + + ClickHouse has different syntax for date functions, so this tests + real dialect differences. + """ + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "clickhouse" + mock_db.session.query.return_value.get.return_value = mock_database + + # PostgreSQL syntax + original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM orders" + # ClickHouse uses toStartOfMonth instead + transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders" + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "monthly_orders", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "clickhouse", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to MySQL. + + MySQL uses backticks for identifiers and has different casting syntax. + """ + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "mysql" + mock_db.session.query.return_value.get.return_value = mock_database + + # PostgreSQL syntax with :: casting + original_sql = "SELECT created_at::DATE AS date_only FROM orders" + # MySQL syntax with CAST + transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM `orders`" + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "orders_dates", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql") + + +@patch("superset.commands.importers.v1.examples.db") +@patch("superset.commands.importers.v1.examples.transpile_to_dialect") +def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, mock_db): + """Test transpilation from PostgreSQL to SQLite.""" + mock_database = MagicMock() + mock_database.db_engine_spec.engine = "sqlite" + mock_db.session.query.return_value.get.return_value = mock_database + + original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'" + transpiled_sql = ( + "SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')" + ) + mock_transpile.return_value = transpiled_sql + + config = { + "table_name": "recent_orders", + "sql": original_sql, + "source_db_engine": "postgresql", + } + transpile_virtual_dataset_sql(config, 1) + + assert config["sql"] == transpiled_sql + mock_transpile.assert_called_once_with(original_sql, "sqlite", "postgresql") diff --git a/tests/unit_tests/sql/transpile_to_dialect_test.py b/tests/unit_tests/sql/transpile_to_dialect_test.py index 1327b09009c..5a11e501fad 100644 --- a/tests/unit_tests/sql/transpile_to_dialect_test.py +++ b/tests/unit_tests/sql/transpile_to_dialect_test.py @@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() -> None: match="Cannot transpile SQL to postgresql", ): transpile_to_dialect("name = 'test'", "postgresql") + + +# Tests for source_engine parameter +@pytest.mark.parametrize( + ("sql", "source_engine", "target_engine", "expected"), + [ + # PostgreSQL to MySQL - should convert :: casting to CAST() + ( + "SELECT created_at::DATE FROM orders", + "postgresql", + "mysql", + "SELECT CAST(created_at AS DATE) FROM orders", + ), + # Same dialect - should preserve SQL + ( + "SELECT * FROM orders", + "postgresql", + "postgresql", + "SELECT * FROM orders", + ), + # PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases date part) + ( + "SELECT DATE_TRUNC('month', ts) FROM orders", + "postgresql", + "duckdb", + "SELECT DATE_TRUNC('MONTH', ts) FROM orders", + ), + ], +) +def test_transpile_with_source_engine( + sql: str, source_engine: str, target_engine: str, expected: str +) -> None: + """Test transpilation with explicit source engine.""" + result = transpile_to_dialect(sql, target_engine, source_engine) + assert result == expected + + +def test_transpile_source_engine_none_uses_generic() -> None: + """Test that source_engine=None uses generic dialect (backward compatible).""" + # Simple SQL that doesn't require dialect-specific parsing + result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None) + assert result == "SELECT * FROM orders" + + +def test_transpile_unknown_source_engine_uses_generic() -> None: + """Test that unknown source_engine falls back to generic dialect.""" + # Unknown engine should be treated as None (generic) + result = transpile_to_dialect( + "SELECT * FROM orders", "postgresql", "unknown_engine" + ) + assert result == "SELECT * FROM orders"