feat: push predicates into virtual datasets (#31486)

This commit is contained in:
Beto Dealmeida
2025-01-08 22:11:28 -05:00
committed by GitHub
parent f29eafd044
commit e4b3ecd372
10 changed files with 191 additions and 5 deletions

View File

@@ -226,7 +226,7 @@ def test_select_star(mocker: MockerFixture) -> None:
# mock the database so we can compile the query
database = mocker.MagicMock()
database.compile_sqla_query = lambda query: str(
database.compile_sqla_query = lambda query, catalog, schema: str(
query.compile(dialect=sqlite.dialect())
)

View File

@@ -149,7 +149,7 @@ def test_select_star(mocker: MockerFixture) -> None:
# mock the database so we can compile the query
database = mocker.MagicMock()
database.compile_sqla_query = lambda query: str(
database.compile_sqla_query = lambda query, catalog, schema: str(
query.compile(dialect=BigQueryDialect(), compile_kwargs={"literal_binds": True})
)

View File

@@ -21,9 +21,17 @@ from datetime import datetime
import pytest
from pytest_mock import MockerFixture
from sqlalchemy import (
Column,
Integer,
MetaData,
select,
Table as SqlalchemyTable,
)
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.engine.url import make_url
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import Select
from superset.connectors.sqla.models import SqlaTable, TableColumn
from superset.errors import SupersetErrorType
@@ -45,6 +53,29 @@ oauth2_client_info = {
}
@pytest.fixture
def query() -> Select:
"""
A nested query fixture used to test query optimization.
"""
metadata = MetaData()
some_table = SqlalchemyTable(
"some_table",
metadata,
Column("a", Integer),
Column("b", Integer),
Column("c", Integer),
)
inner_select = select(some_table.c.a, some_table.c.b, some_table.c.c)
outer_select = select(inner_select.c.a, inner_select.c.b).where(
inner_select.c.a > 1,
inner_select.c.b == 2,
)
return outer_select
def test_get_metrics(mocker: MockerFixture) -> None:
"""
Tests for ``get_metrics``.
@@ -683,3 +714,56 @@ def test_purge_oauth2_tokens(session: Session) -> None:
# make sure database was not deleted... just in case
database = session.query(Database).filter_by(id=database1.id).one()
assert database.name == "my_oauth2_db"
def test_compile_sqla_query_no_optimization(query: Select) -> None:
"""
Test the `compile_sqla_query` method.
"""
from superset.models.core import Database
database = Database(
database_name="db",
sqlalchemy_uri="sqlite://",
)
space = " "
assert (
database.compile_sqla_query(query, is_virtual=True)
== f"""SELECT anon_1.a, anon_1.b{space}
FROM (SELECT some_table.a AS a, some_table.b AS b, some_table.c AS c{space}
FROM some_table) AS anon_1{space}
WHERE anon_1.a > 1 AND anon_1.b = 2"""
)
@with_feature_flags(OPTIMIZE_SQL=True)
def test_compile_sqla_query(query: Select) -> None:
"""
Test the `compile_sqla_query` method.
"""
from superset.models.core import Database
database = Database(
database_name="db",
sqlalchemy_uri="sqlite://",
)
assert (
database.compile_sqla_query(query, is_virtual=True)
== """SELECT
anon_1.a,
anon_1.b
FROM (
SELECT
some_table.a AS a,
some_table.b AS b,
some_table.c AS c
FROM some_table
WHERE
some_table.a > 1 AND some_table.b = 2
) AS anon_1
WHERE
TRUE AND TRUE"""
)

View File

@@ -1070,3 +1070,46 @@ def test_is_mutating(engine: str) -> None:
"with source as ( select 1 as one ) select * from source",
engine=engine,
).is_mutating()
def test_optimize() -> None:
"""
Test that the `optimize` method works as expected.
The SQL optimization only works with engines that have a corresponding dialect.
"""
sql = """
SELECT anon_1.a, anon_1.b
FROM (SELECT some_table.a AS a, some_table.b AS b, some_table.c AS c
FROM some_table) AS anon_1
WHERE anon_1.a > 1 AND anon_1.b = 2
"""
optimized = """SELECT
anon_1.a,
anon_1.b
FROM (
SELECT
some_table.a AS a,
some_table.b AS b,
some_table.c AS c
FROM some_table
WHERE
some_table.a > 1 AND some_table.b = 2
) AS anon_1
WHERE
TRUE AND TRUE"""
not_optimized = """
SELECT anon_1.a,
anon_1.b
FROM
(SELECT some_table.a AS a,
some_table.b AS b,
some_table.c AS c
FROM some_table) AS anon_1
WHERE anon_1.a > 1
AND anon_1.b = 2"""
assert SQLStatement(sql, "sqlite").optimize().format() == optimized
assert SQLStatement(sql, "firebolt").optimize().format() == not_optimized