feat: push predicates into virtual datasets (#31486)

This commit is contained in:
Beto Dealmeida
2025-01-08 22:11:28 -05:00
committed by GitHub
parent f29eafd044
commit e4b3ecd372
10 changed files with 191 additions and 5 deletions

View File

@@ -17,6 +17,7 @@
from __future__ import annotations
import copy
import enum
import logging
import re
@@ -31,6 +32,7 @@ from deprecation import deprecated
from sqlglot import exp
from sqlglot.dialects.dialect import Dialect, Dialects
from sqlglot.errors import ParseError
from sqlglot.optimizer.pushdown_predicates import pushdown_predicates
from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope
from superset.exceptions import SupersetParseError
@@ -227,6 +229,12 @@ class BaseSQLStatement(Generic[InternalRepresentation]):
"""
raise NotImplementedError()
def optimize(self) -> BaseSQLStatement[InternalRepresentation]:
"""
Return optimized statement.
"""
raise NotImplementedError()
def __str__(self) -> str:
return self.format()
@@ -431,6 +439,19 @@ class SQLStatement(BaseSQLStatement[exp.Expression]):
for eq in set_item.find_all(exp.EQ)
}
def optimize(self) -> SQLStatement:
"""
Return optimized statement.
"""
# only optimize statements that have a custom dialect
if not self._dialect:
return SQLStatement(self._sql, self.engine, self._parsed.copy())
optimized = pushdown_predicates(self._parsed, dialect=self._dialect)
sql = optimized.sql(dialect=self._dialect)
return SQLStatement(sql, self.engine, optimized)
class KQLSplitState(enum.Enum):
"""
@@ -589,6 +610,14 @@ class KustoKQLStatement(BaseSQLStatement[str]):
"""
return self._parsed.startswith(".") and not self._parsed.startswith(".show")
def optimize(self) -> KustoKQLStatement:
"""
Return optimized statement.
Kusto KQL doesn't support optimization, so this method is a no-op.
"""
return KustoKQLStatement(self._sql, self.engine, self._parsed)
class SQLScript:
"""
@@ -643,6 +672,17 @@ class SQLScript:
"""
return any(statement.is_mutating() for statement in self.statements)
def optimize(self) -> SQLScript:
"""
Return optimized script.
"""
script = copy.deepcopy(self)
script.statements = [ # type: ignore
statement.optimize() for statement in self.statements
]
return script
def extract_tables_from_statement(
statement: exp.Expression,