mirror of
https://github.com/apache/superset.git
synced 2026-04-20 16:44:46 +00:00
Add option to specify type specific date truncation functions (#9238)
This commit is contained in:
@@ -23,7 +23,7 @@ from superset.db_engine_specs.base import BaseEngineSpec
|
||||
class AthenaEngineSpec(BaseEngineSpec):
|
||||
engine = "awsathena"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))",
|
||||
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))",
|
||||
|
||||
@@ -132,7 +132,8 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||
"""Abstract class for database engine specific configurations"""
|
||||
|
||||
engine = "base" # str as defined in sqlalchemy.engine.engine
|
||||
_time_grain_functions: Dict[Optional[str], str] = {}
|
||||
_date_trunc_functions: Dict[str, str] = {}
|
||||
_time_grain_expressions: Dict[Optional[str], str] = {}
|
||||
time_groupby_inline = False
|
||||
limit_method = LimitMethod.FORCE_LIMIT
|
||||
time_secondary_columns = False
|
||||
@@ -204,7 +205,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||
|
||||
@classmethod
|
||||
def get_timestamp_expr(
|
||||
cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
|
||||
cls,
|
||||
col: ColumnClause,
|
||||
pdf: Optional[str],
|
||||
time_grain: Optional[str],
|
||||
type_: Optional[str] = None,
|
||||
) -> TimestampExpression:
|
||||
"""
|
||||
Construct a TimestampExpression to be used in a SQLAlchemy query.
|
||||
@@ -212,14 +217,19 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||
:param col: Target column for the TimestampExpression
|
||||
:param pdf: date format (seconds or milliseconds)
|
||||
:param time_grain: time grain, e.g. P1Y for 1 year
|
||||
:param type_: the source column type
|
||||
:return: TimestampExpression object
|
||||
"""
|
||||
if time_grain:
|
||||
time_expr = cls.get_time_grain_functions().get(time_grain)
|
||||
time_expr = cls.get_time_grain_expressions().get(time_grain)
|
||||
if not time_expr:
|
||||
raise NotImplementedError(
|
||||
f"No grain spec for {time_grain} for database {cls.engine}"
|
||||
)
|
||||
if type_ and "{func}" in time_expr:
|
||||
date_trunc_function = cls._date_trunc_functions.get(type_)
|
||||
if date_trunc_function:
|
||||
time_expr = time_expr.replace("{func}", date_trunc_function)
|
||||
else:
|
||||
time_expr = "{col}"
|
||||
|
||||
@@ -240,31 +250,30 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||
"""
|
||||
|
||||
ret_list = []
|
||||
time_grain_functions = cls.get_time_grain_functions()
|
||||
time_grains = builtin_time_grains.copy()
|
||||
time_grains.update(config["TIME_GRAIN_ADDONS"])
|
||||
for duration, func in time_grain_functions.items():
|
||||
for duration, func in cls.get_time_grain_expressions().items():
|
||||
if duration in time_grains:
|
||||
name = time_grains[duration]
|
||||
ret_list.append(TimeGrain(name, _(name), func, duration))
|
||||
return tuple(ret_list)
|
||||
|
||||
@classmethod
|
||||
def get_time_grain_functions(cls) -> Dict[Optional[str], str]:
|
||||
def get_time_grain_expressions(cls) -> Dict[Optional[str], str]:
|
||||
"""
|
||||
Return a dict of all supported time grains including any potential added grains
|
||||
but excluding any potentially blacklisted grains in the config file.
|
||||
|
||||
:return: All time grain functions supported by the engine
|
||||
:return: All time grain expressions supported by the engine
|
||||
"""
|
||||
# TODO: use @memoize decorator or similar to avoid recomputation on every call
|
||||
time_grain_functions = cls._time_grain_functions.copy()
|
||||
grain_addon_functions = config["TIME_GRAIN_ADDON_FUNCTIONS"]
|
||||
time_grain_functions.update(grain_addon_functions.get(cls.engine, {}))
|
||||
time_grain_expressions = cls._time_grain_expressions.copy()
|
||||
grain_addon_expressions = config["TIME_GRAIN_ADDON_EXPRESSIONS"]
|
||||
time_grain_expressions.update(grain_addon_expressions.get(cls.engine, {}))
|
||||
blacklist: List[str] = config["TIME_GRAIN_BLACKLIST"]
|
||||
for key in blacklist:
|
||||
time_grain_functions.pop(key)
|
||||
return time_grain_functions
|
||||
time_grain_expressions.pop(key)
|
||||
return time_grain_expressions
|
||||
|
||||
@classmethod
|
||||
def make_select_compatible(
|
||||
|
||||
@@ -49,16 +49,23 @@ class BigQueryEngineSpec(BaseEngineSpec):
|
||||
"""
|
||||
arraysize = 5000
|
||||
|
||||
_time_grain_functions = {
|
||||
_date_trunc_functions = {
|
||||
"DATE": "DATE_TRUNC",
|
||||
"DATETIME": "DATETIME_TRUNC",
|
||||
"TIME": "TIME_TRUNC",
|
||||
"TIMESTAMP": "TIMESTAMP_TRUNC",
|
||||
}
|
||||
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "TIMESTAMP_TRUNC({col}, SECOND)",
|
||||
"PT1M": "TIMESTAMP_TRUNC({col}, MINUTE)",
|
||||
"PT1H": "TIMESTAMP_TRUNC({col}, HOUR)",
|
||||
"P1D": "TIMESTAMP_TRUNC({col}, DAY)",
|
||||
"P1W": "TIMESTAMP_TRUNC({col}, WEEK)",
|
||||
"P1M": "TIMESTAMP_TRUNC({col}, MONTH)",
|
||||
"P0.25Y": "TIMESTAMP_TRUNC({col}, QUARTER)",
|
||||
"P1Y": "TIMESTAMP_TRUNC({col}, YEAR)",
|
||||
"PT1S": "{func}({col}, SECOND)",
|
||||
"PT1M": "{func}({col}, MINUTE)",
|
||||
"PT1H": "{func}({col}, HOUR)",
|
||||
"P1D": "{func}({col}, DAY)",
|
||||
"P1W": "{func}({col}, WEEK)",
|
||||
"P1M": "{func}({col}, MONTH)",
|
||||
"P0.25Y": "{func}({col}, QUARTER)",
|
||||
"P1Y": "{func}({col}, YEAR)",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -68,13 +75,15 @@ class BigQueryEngineSpec(BaseEngineSpec):
|
||||
return f"CAST('{dttm.date().isoformat()}' AS DATE)"
|
||||
if tt == "DATETIME":
|
||||
return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS DATETIME)"""
|
||||
if tt == "TIME":
|
||||
return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME)"""
|
||||
if tt == "TIMESTAMP":
|
||||
return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS TIMESTAMP)"""
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple]:
|
||||
data = super(BigQueryEngineSpec, cls).fetch_data(cursor, limit)
|
||||
data = super().fetch_data(cursor, limit)
|
||||
if data and type(data[0]).__name__ == "Row":
|
||||
data = [r.values() for r in data] # type: ignore
|
||||
return data
|
||||
|
||||
@@ -28,7 +28,7 @@ class ClickHouseEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
time_secondary_columns = True
|
||||
time_groupby_inline = True
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1M": "toStartOfMinute(toDateTime({col}))",
|
||||
"PT5M": "toDateTime(intDiv(toUInt32(toDateTime({col})), 300)*300)",
|
||||
|
||||
@@ -23,7 +23,7 @@ class Db2EngineSpec(BaseEngineSpec):
|
||||
force_column_alias_quotes = True
|
||||
max_column_name_length = 30
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "CAST({col} as TIMESTAMP)" " - MICROSECOND({col}) MICROSECONDS",
|
||||
"PT1M": "CAST({col} as TIMESTAMP)"
|
||||
|
||||
@@ -21,7 +21,7 @@ class DremioBaseEngineSpec(BaseEngineSpec):
|
||||
|
||||
engine = "dremio"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATE_TRUNC('second', {col})",
|
||||
"PT1M": "DATE_TRUNC('minute', {col})",
|
||||
|
||||
@@ -28,7 +28,7 @@ class DrillEngineSpec(BaseEngineSpec):
|
||||
|
||||
engine = "drill"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "NEARESTDATE({col}, 'SECOND')",
|
||||
"PT1M": "NEARESTDATE({col}, 'MINUTE')",
|
||||
|
||||
@@ -31,7 +31,7 @@ class DruidEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
allows_joins = False
|
||||
allows_subqueries = True
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "FLOOR({col} TO SECOND)",
|
||||
"PT1M": "FLOOR({col} TO MINUTE)",
|
||||
|
||||
@@ -27,7 +27,7 @@ class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-metho
|
||||
allows_joins = False
|
||||
allows_subqueries = True
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)",
|
||||
"PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)",
|
||||
|
||||
@@ -26,7 +26,7 @@ class ExasolEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
max_column_name_length = 128
|
||||
|
||||
# Exasol's DATE_TRUNC function is PostgresSQL compatible
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATE_TRUNC('second', {col})",
|
||||
"PT1M": "DATE_TRUNC('minute', {col})",
|
||||
|
||||
@@ -27,7 +27,7 @@ class HanaEngineSpec(PostgresBaseEngineSpec):
|
||||
force_column_alias_quotes = True
|
||||
max_column_name_length = 30
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,20))",
|
||||
"PT1M": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,17) || '00')",
|
||||
|
||||
@@ -27,7 +27,7 @@ class ImpalaEngineSpec(BaseEngineSpec):
|
||||
|
||||
engine = "impala"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1M": "TRUNC({col}, 'MI')",
|
||||
"PT1H": "TRUNC({col}, 'HH')",
|
||||
|
||||
@@ -25,7 +25,7 @@ class KylinEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
|
||||
engine = "kylin"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO SECOND) AS TIMESTAMP)",
|
||||
"PT1M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MINUTE) AS TIMESTAMP)",
|
||||
|
||||
@@ -29,7 +29,7 @@ class MssqlEngineSpec(BaseEngineSpec):
|
||||
limit_method = LimitMethod.WRAP_SQL
|
||||
max_column_name_length = 128
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATEADD(second, DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')",
|
||||
"PT1M": "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)",
|
||||
|
||||
@@ -29,7 +29,7 @@ class MySQLEngineSpec(BaseEngineSpec):
|
||||
engine = "mysql"
|
||||
max_column_name_length = 64
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATE_ADD(DATE({col}), "
|
||||
"INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60"
|
||||
|
||||
@@ -26,7 +26,7 @@ class OracleEngineSpec(BaseEngineSpec):
|
||||
force_column_alias_quotes = True
|
||||
max_column_name_length = 30
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "CAST({col} as DATE)",
|
||||
"PT1M": "TRUNC(CAST({col} as DATE), 'MI')",
|
||||
|
||||
@@ -29,7 +29,7 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
allows_column_aliases = False
|
||||
|
||||
# Pinot does its own conversion below
|
||||
_time_grain_functions: Dict[Optional[str], str] = {
|
||||
_time_grain_expressions: Dict[Optional[str], str] = {
|
||||
"PT1S": "1:SECONDS",
|
||||
"PT1M": "1:MINUTES",
|
||||
"PT1H": "1:HOURS",
|
||||
@@ -51,7 +51,11 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
|
||||
@classmethod
|
||||
def get_timestamp_expr(
|
||||
cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
|
||||
cls,
|
||||
col: ColumnClause,
|
||||
pdf: Optional[str],
|
||||
time_grain: Optional[str],
|
||||
type_: Optional[str] = None,
|
||||
) -> TimestampExpression:
|
||||
is_epoch = pdf in ("epoch_s", "epoch_ms")
|
||||
|
||||
@@ -75,7 +79,7 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
else:
|
||||
seconds_or_ms = "MILLISECONDS" if pdf == "epoch_ms" else "SECONDS"
|
||||
tf = f"1:{seconds_or_ms}:EPOCH"
|
||||
granularity = cls.get_time_grain_functions().get(time_grain)
|
||||
granularity = cls.get_time_grain_expressions().get(time_grain)
|
||||
if not granularity:
|
||||
raise NotImplementedError("No pinot grain spec for " + str(time_grain))
|
||||
# In pinot the output is a string since there is no timestamp column like pg
|
||||
|
||||
@@ -38,7 +38,7 @@ class PostgresBaseEngineSpec(BaseEngineSpec):
|
||||
|
||||
engine = ""
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATE_TRUNC('second', {col})",
|
||||
"PT1M": "DATE_TRUNC('minute', {col})",
|
||||
|
||||
@@ -100,7 +100,7 @@ def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
|
||||
class PrestoEngineSpec(BaseEngineSpec):
|
||||
engine = "presto"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))",
|
||||
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))",
|
||||
|
||||
@@ -28,7 +28,7 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
|
||||
force_column_alias_quotes = True
|
||||
max_column_name_length = 256
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATE_TRUNC('SECOND', {col})",
|
||||
"PT1M": "DATE_TRUNC('MINUTE', {col})",
|
||||
|
||||
@@ -30,7 +30,7 @@ if TYPE_CHECKING:
|
||||
class SqliteEngineSpec(BaseEngineSpec):
|
||||
engine = "sqlite"
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1S": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
|
||||
"PT1M": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
|
||||
|
||||
@@ -24,7 +24,7 @@ class TeradataEngineSpec(BaseEngineSpec):
|
||||
limit_method = LimitMethod.WRAP_SQL
|
||||
max_column_name_length = 30 # since 14.10 this is 128
|
||||
|
||||
_time_grain_functions = {
|
||||
_time_grain_expressions = {
|
||||
None: "{col}",
|
||||
"PT1M": "TRUNC(CAST({col} as DATE), 'MI')",
|
||||
"PT1H": "TRUNC(CAST({col} as DATE), 'HH')",
|
||||
|
||||
Reference in New Issue
Block a user