feat(examples): Transpile virtual dataset SQL on import (#37311)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>
Co-authored-by: bito-code-review[bot] <188872107+bito-code-review[bot]@users.noreply.github.com>
This commit is contained in:
Evan Rusackas
2026-01-22 09:50:05 -08:00
committed by GitHub
parent b630830841
commit 87bbd54d0a
6 changed files with 377 additions and 3 deletions

View File

@@ -0,0 +1,244 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Tests for the examples importer, specifically SQL transpilation."""
from unittest.mock import MagicMock, patch
from superset.commands.importers.v1.examples import transpile_virtual_dataset_sql
def test_transpile_virtual_dataset_sql_no_sql():
"""Test that configs without SQL are unchanged."""
config = {"table_name": "my_table", "sql": None}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] is None
def test_transpile_virtual_dataset_sql_empty_sql():
"""Test that configs with empty SQL are unchanged."""
config = {"table_name": "my_table", "sql": ""}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == ""
@patch("superset.commands.importers.v1.examples.db")
def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
"""Test graceful handling when database is not found."""
mock_db.session.query.return_value.get.return_value = None
config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
original_sql = config["sql"]
transpile_virtual_dataset_sql(config, 999)
# SQL should remain unchanged
assert config["sql"] == original_sql
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
"""Test successful SQL transpilation with source engine."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database
mock_transpile.return_value = "SELECT * FROM `foo`"
config = {
"table_name": "my_table",
"sql": "SELECT * FROM foo",
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == "SELECT * FROM `foo`"
mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", "postgresql")
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, mock_db):
"""Test transpilation when source_db_engine is not specified (legacy)."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database
mock_transpile.return_value = "SELECT * FROM `foo`"
# No source_db_engine - should default to None (generic dialect)
config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == "SELECT * FROM `foo`"
mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None)
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
"""Test when transpilation returns same SQL (no dialect differences)."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "postgresql"
mock_db.session.query.return_value.get.return_value = mock_database
original_sql = "SELECT * FROM foo"
mock_transpile.return_value = original_sql
config = {
"table_name": "my_table",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == original_sql
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
"""Test graceful fallback when transpilation fails."""
from superset.exceptions import QueryClauseValidationException
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database
mock_transpile.side_effect = QueryClauseValidationException("Parse error")
original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
config = {
"table_name": "my_table",
"sql": original_sql,
"source_db_engine": "postgresql",
}
# Should not raise, should keep original SQL
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == original_sql
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to DuckDB."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "duckdb"
mock_db.session.query.return_value.get.return_value = mock_database
original_sql = """
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
FROM orders WHERE status = 'completed' GROUP BY 1
"""
transpiled_sql = """
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
FROM orders WHERE status = 'completed' GROUP BY 1
"""
mock_transpile.return_value = transpiled_sql
config = {
"table_name": "monthly_orders",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "duckdb", "postgresql")
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to ClickHouse.
ClickHouse has different syntax for date functions, so this tests
real dialect differences.
"""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "clickhouse"
mock_db.session.query.return_value.get.return_value = mock_database
# PostgreSQL syntax
original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM orders"
# ClickHouse uses toStartOfMonth instead
transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders"
mock_transpile.return_value = transpiled_sql
config = {
"table_name": "monthly_orders",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "clickhouse", "postgresql")
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to MySQL.
MySQL uses backticks for identifiers and has different casting syntax.
"""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database
# PostgreSQL syntax with :: casting
original_sql = "SELECT created_at::DATE AS date_only FROM orders"
# MySQL syntax with CAST
transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM `orders`"
mock_transpile.return_value = transpiled_sql
config = {
"table_name": "orders_dates",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql")
@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to SQLite."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "sqlite"
mock_db.session.query.return_value.get.return_value = mock_database
original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'"
transpiled_sql = (
"SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')"
)
mock_transpile.return_value = transpiled_sql
config = {
"table_name": "recent_orders",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "sqlite", "postgresql")

View File

@@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() -> None:
match="Cannot transpile SQL to postgresql",
):
transpile_to_dialect("name = 'test'", "postgresql")
# Tests for source_engine parameter
@pytest.mark.parametrize(
("sql", "source_engine", "target_engine", "expected"),
[
# PostgreSQL to MySQL - should convert :: casting to CAST()
(
"SELECT created_at::DATE FROM orders",
"postgresql",
"mysql",
"SELECT CAST(created_at AS DATE) FROM orders",
),
# Same dialect - should preserve SQL
(
"SELECT * FROM orders",
"postgresql",
"postgresql",
"SELECT * FROM orders",
),
# PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases date part)
(
"SELECT DATE_TRUNC('month', ts) FROM orders",
"postgresql",
"duckdb",
"SELECT DATE_TRUNC('MONTH', ts) FROM orders",
),
],
)
def test_transpile_with_source_engine(
sql: str, source_engine: str, target_engine: str, expected: str
) -> None:
"""Test transpilation with explicit source engine."""
result = transpile_to_dialect(sql, target_engine, source_engine)
assert result == expected
def test_transpile_source_engine_none_uses_generic() -> None:
"""Test that source_engine=None uses generic dialect (backward compatible)."""
# Simple SQL that doesn't require dialect-specific parsing
result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None)
assert result == "SELECT * FROM orders"
def test_transpile_unknown_source_engine_uses_generic() -> None:
"""Test that unknown source_engine falls back to generic dialect."""
# Unknown engine should be treated as None (generic)
result = transpile_to_dialect(
"SELECT * FROM orders", "postgresql", "unknown_engine"
)
assert result == "SELECT * FROM orders"