fix(mssql): support cte in virtual tables (#18567)

* Fix for handling regular CTE queries with MSSQL,#8074

* Moved the get_cte_query function from mssql.py to base.py for using irrespetcive of dbengine

* Fix for handling regular CTE queries with MSSQL,#8074

* Moved the get_cte_query function from mssql.py to base.py for using irrespetcive of dbengine

* Unit test added for the db engine CTE SQL parsing.

Unit test added for the db engine CTE SQL parsing.  Removed additional spaces from the CTE parsing SQL generation.

* implement in sqla model

* lint + cleanup

Co-authored-by: Ville Brofeldt <ville.v.brofeldt@gmail.com>
This commit is contained in:
Sujith Kumar S
2022-02-10 13:58:05 +05:30
committed by GitHub
parent 00eb6b1f57
commit b8aef10098
6 changed files with 165 additions and 13 deletions

View File

@@ -54,6 +54,7 @@ from sqlalchemy.orm import Session
from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql.expression import ColumnClause, Select, TextAsFrom, TextClause
from sqlalchemy.types import TypeEngine
from sqlparse.tokens import CTE
from typing_extensions import TypedDict
from superset import security_manager, sql_parse
@@ -80,6 +81,9 @@ ColumnTypeMapping = Tuple[
logger = logging.getLogger()
CTE_ALIAS = "__cte"
class TimeGrain(NamedTuple):
name: str # TODO: redundant field, remove
label: str
@@ -292,6 +296,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
# But for backward compatibility, False by default
allows_hidden_cc_in_orderby = False
# Whether allow CTE as subquery or regular CTE
# If True, then it will allow in subquery ,
# if False it will allow as regular CTE
allows_cte_in_subquery = True
force_column_alias_quotes = False
arraysize = 0
max_column_name_length = 0
@@ -663,6 +672,31 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
parsed_query = sql_parse.ParsedQuery(sql)
return parsed_query.set_or_update_query_limit(limit)
@classmethod
def get_cte_query(cls, sql: str) -> Optional[str]:
"""
Convert the input CTE based SQL to the SQL for virtual table conversion
:param sql: SQL query
:return: CTE with the main select query aliased as `__cte`
"""
if not cls.allows_cte_in_subquery:
stmt = sqlparse.parse(sql)[0]
# The first meaningful token for CTE will be with WITH
idx, token = stmt.token_next(-1, skip_ws=True, skip_cm=True)
if not (token and token.ttype == CTE):
return None
idx, token = stmt.token_next(idx)
idx = stmt.token_index(token) + 1
# extract rest of the SQLs after CTE
remainder = "".join(str(token) for token in stmt.tokens[idx:]).strip()
return f"WITH {token.value},\n{CTE_ALIAS} AS (\n{remainder}\n)"
return None
@classmethod
def df_to_sql(
cls,