mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
feat(examples): Transpile virtual dataset SQL on import (#37311)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Beto Dealmeida <roberto@dealmeida.net> Co-authored-by: bito-code-review[bot] <188872107+bito-code-review[bot]@users.noreply.github.com>
This commit is contained in:
244
tests/unit_tests/commands/importers/v1/examples_test.py
Normal file
244
tests/unit_tests/commands/importers/v1/examples_test.py
Normal file
@@ -0,0 +1,244 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""Tests for the examples importer, specifically SQL transpilation."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from superset.commands.importers.v1.examples import transpile_virtual_dataset_sql
|
||||
|
||||
|
||||
def test_transpile_virtual_dataset_sql_no_sql():
|
||||
"""Test that configs without SQL are unchanged."""
|
||||
config = {"table_name": "my_table", "sql": None}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
assert config["sql"] is None
|
||||
|
||||
|
||||
def test_transpile_virtual_dataset_sql_empty_sql():
|
||||
"""Test that configs with empty SQL are unchanged."""
|
||||
config = {"table_name": "my_table", "sql": ""}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
assert config["sql"] == ""
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
|
||||
"""Test graceful handling when database is not found."""
|
||||
mock_db.session.query.return_value.get.return_value = None
|
||||
|
||||
config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
|
||||
original_sql = config["sql"]
|
||||
|
||||
transpile_virtual_dataset_sql(config, 999)
|
||||
|
||||
# SQL should remain unchanged
|
||||
assert config["sql"] == original_sql
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
|
||||
"""Test successful SQL transpilation with source engine."""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "mysql"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
mock_transpile.return_value = "SELECT * FROM `foo`"
|
||||
|
||||
config = {
|
||||
"table_name": "my_table",
|
||||
"sql": "SELECT * FROM foo",
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == "SELECT * FROM `foo`"
|
||||
mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", "postgresql")
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, mock_db):
|
||||
"""Test transpilation when source_db_engine is not specified (legacy)."""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "mysql"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
mock_transpile.return_value = "SELECT * FROM `foo`"
|
||||
|
||||
# No source_db_engine - should default to None (generic dialect)
|
||||
config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == "SELECT * FROM `foo`"
|
||||
mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None)
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
|
||||
"""Test when transpilation returns same SQL (no dialect differences)."""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "postgresql"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
original_sql = "SELECT * FROM foo"
|
||||
mock_transpile.return_value = original_sql
|
||||
|
||||
config = {
|
||||
"table_name": "my_table",
|
||||
"sql": original_sql,
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == original_sql
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
|
||||
"""Test graceful fallback when transpilation fails."""
|
||||
from superset.exceptions import QueryClauseValidationException
|
||||
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "mysql"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
mock_transpile.side_effect = QueryClauseValidationException("Parse error")
|
||||
|
||||
original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
|
||||
config = {
|
||||
"table_name": "my_table",
|
||||
"sql": original_sql,
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
|
||||
# Should not raise, should keep original SQL
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
assert config["sql"] == original_sql
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, mock_db):
|
||||
"""Test transpilation from PostgreSQL to DuckDB."""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "duckdb"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
original_sql = """
|
||||
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
|
||||
FROM orders WHERE status = 'completed' GROUP BY 1
|
||||
"""
|
||||
transpiled_sql = """
|
||||
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
|
||||
FROM orders WHERE status = 'completed' GROUP BY 1
|
||||
"""
|
||||
mock_transpile.return_value = transpiled_sql
|
||||
|
||||
config = {
|
||||
"table_name": "monthly_orders",
|
||||
"sql": original_sql,
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == transpiled_sql
|
||||
mock_transpile.assert_called_once_with(original_sql, "duckdb", "postgresql")
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, mock_db):
|
||||
"""Test transpilation from PostgreSQL to ClickHouse.
|
||||
|
||||
ClickHouse has different syntax for date functions, so this tests
|
||||
real dialect differences.
|
||||
"""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "clickhouse"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
# PostgreSQL syntax
|
||||
original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM orders"
|
||||
# ClickHouse uses toStartOfMonth instead
|
||||
transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders"
|
||||
mock_transpile.return_value = transpiled_sql
|
||||
|
||||
config = {
|
||||
"table_name": "monthly_orders",
|
||||
"sql": original_sql,
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == transpiled_sql
|
||||
mock_transpile.assert_called_once_with(original_sql, "clickhouse", "postgresql")
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, mock_db):
|
||||
"""Test transpilation from PostgreSQL to MySQL.
|
||||
|
||||
MySQL uses backticks for identifiers and has different casting syntax.
|
||||
"""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "mysql"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
# PostgreSQL syntax with :: casting
|
||||
original_sql = "SELECT created_at::DATE AS date_only FROM orders"
|
||||
# MySQL syntax with CAST
|
||||
transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM `orders`"
|
||||
mock_transpile.return_value = transpiled_sql
|
||||
|
||||
config = {
|
||||
"table_name": "orders_dates",
|
||||
"sql": original_sql,
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == transpiled_sql
|
||||
mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql")
|
||||
|
||||
|
||||
@patch("superset.commands.importers.v1.examples.db")
|
||||
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
|
||||
def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, mock_db):
|
||||
"""Test transpilation from PostgreSQL to SQLite."""
|
||||
mock_database = MagicMock()
|
||||
mock_database.db_engine_spec.engine = "sqlite"
|
||||
mock_db.session.query.return_value.get.return_value = mock_database
|
||||
|
||||
original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'"
|
||||
transpiled_sql = (
|
||||
"SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')"
|
||||
)
|
||||
mock_transpile.return_value = transpiled_sql
|
||||
|
||||
config = {
|
||||
"table_name": "recent_orders",
|
||||
"sql": original_sql,
|
||||
"source_db_engine": "postgresql",
|
||||
}
|
||||
transpile_virtual_dataset_sql(config, 1)
|
||||
|
||||
assert config["sql"] == transpiled_sql
|
||||
mock_transpile.assert_called_once_with(original_sql, "sqlite", "postgresql")
|
||||
@@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() -> None:
|
||||
match="Cannot transpile SQL to postgresql",
|
||||
):
|
||||
transpile_to_dialect("name = 'test'", "postgresql")
|
||||
|
||||
|
||||
# Tests for source_engine parameter
|
||||
@pytest.mark.parametrize(
|
||||
("sql", "source_engine", "target_engine", "expected"),
|
||||
[
|
||||
# PostgreSQL to MySQL - should convert :: casting to CAST()
|
||||
(
|
||||
"SELECT created_at::DATE FROM orders",
|
||||
"postgresql",
|
||||
"mysql",
|
||||
"SELECT CAST(created_at AS DATE) FROM orders",
|
||||
),
|
||||
# Same dialect - should preserve SQL
|
||||
(
|
||||
"SELECT * FROM orders",
|
||||
"postgresql",
|
||||
"postgresql",
|
||||
"SELECT * FROM orders",
|
||||
),
|
||||
# PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases date part)
|
||||
(
|
||||
"SELECT DATE_TRUNC('month', ts) FROM orders",
|
||||
"postgresql",
|
||||
"duckdb",
|
||||
"SELECT DATE_TRUNC('MONTH', ts) FROM orders",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_transpile_with_source_engine(
|
||||
sql: str, source_engine: str, target_engine: str, expected: str
|
||||
) -> None:
|
||||
"""Test transpilation with explicit source engine."""
|
||||
result = transpile_to_dialect(sql, target_engine, source_engine)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_transpile_source_engine_none_uses_generic() -> None:
|
||||
"""Test that source_engine=None uses generic dialect (backward compatible)."""
|
||||
# Simple SQL that doesn't require dialect-specific parsing
|
||||
result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None)
|
||||
assert result == "SELECT * FROM orders"
|
||||
|
||||
|
||||
def test_transpile_unknown_source_engine_uses_generic() -> None:
|
||||
"""Test that unknown source_engine falls back to generic dialect."""
|
||||
# Unknown engine should be treated as None (generic)
|
||||
result = transpile_to_dialect(
|
||||
"SELECT * FROM orders", "postgresql", "unknown_engine"
|
||||
)
|
||||
assert result == "SELECT * FROM orders"
|
||||
|
||||
Reference in New Issue
Block a user