Files
superset2/superset/db_engine_specs/druid.py
2026-01-21 10:54:01 -08:00

214 lines
8.3 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import logging
from datetime import datetime
from typing import Any, TYPE_CHECKING
from sqlalchemy import types
from superset import is_feature_enabled
from superset.constants import TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec, DatabaseCategory
from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError
from superset.exceptions import SupersetException
from superset.utils import core as utils, json
from superset.utils.core import QuerySource
if TYPE_CHECKING:
from superset.connectors.sqla.models import TableColumn
from superset.models.core import Database
logger = logging.getLogger()
class DruidEngineSpec(BaseEngineSpec):
"""Engine spec for Druid.io"""
engine = "druid"
engine_name = "Apache Druid"
allows_joins = is_feature_enabled("DRUID_JOINS")
allows_subqueries = True
metadata = {
"description": (
"Apache Druid is a high performance real-time analytics database."
),
"logo": "druid.png",
"homepage_url": "https://druid.apache.org/",
"categories": [
DatabaseCategory.APACHE_PROJECTS,
DatabaseCategory.TIME_SERIES,
DatabaseCategory.OPEN_SOURCE,
],
"pypi_packages": ["pydruid"],
"connection_string": (
"druid://{username}:{password}@{host}:{port}/druid/v2/sql"
),
"default_port": 9088,
"parameters": {
"username": "Database username",
"password": "Database password",
"host": "IP address or URL of the host",
"port": "Default 9088",
},
"ssl_configuration": {
"custom_certificate": (
"Add certificate in Root Certificate field. "
"pydruid will automatically use https."
),
"disable_ssl_verification": {
"engine_params": {
"connect_args": {"scheme": "https", "ssl_verify_cert": False}
}
},
},
"advanced_features": {
"aggregations": (
"Define common aggregations in datasource edit view "
"under List Druid Column tab."
),
"post_aggregations": (
"Create metrics with postagg as Metric Type and provide "
"valid JSON post-aggregation definition."
),
},
"notes": (
"A native Druid connector ships with Superset "
"(behind DRUID_IS_ACTIVE flag) but SQLAlchemy connector "
"via pydruid is preferred."
),
"compatible_databases": [
{
"name": "Imply",
"description": (
"Imply is a fully-managed cloud platform and enterprise "
"distribution built on Apache Druid. It provides real-time "
"analytics with enterprise security and support."
),
"logo": "imply.png",
"homepage_url": "https://imply.io/",
"categories": [
DatabaseCategory.TIME_SERIES,
DatabaseCategory.CLOUD_DATA_WAREHOUSES,
DatabaseCategory.HOSTED_OPEN_SOURCE,
],
"pypi_packages": ["pydruid"],
"connection_string": (
"druid://{username}:{password}@{host}/druid/v2/sql"
),
"docs_url": "https://docs.imply.io/",
},
],
}
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1S')",
TimeGrain.FIVE_SECONDS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5S')",
TimeGrain.THIRTY_SECONDS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30S')",
TimeGrain.MINUTE: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1M')",
TimeGrain.FIVE_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5M')",
TimeGrain.TEN_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT10M')",
TimeGrain.FIFTEEN_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT15M')",
TimeGrain.THIRTY_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30M')",
TimeGrain.HOUR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1H')",
TimeGrain.SIX_HOURS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT6H')",
TimeGrain.DAY: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1D')",
TimeGrain.WEEK: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1W')",
TimeGrain.MONTH: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1M')",
TimeGrain.QUARTER: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P3M')",
TimeGrain.YEAR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1Y')",
TimeGrain.WEEK_ENDING_SATURDAY: (
"TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), "
"'P1D', 1), 'P1W'), 'P1D', 5)"
),
TimeGrain.WEEK_STARTING_SUNDAY: (
"TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), "
"'P1D', 1), 'P1W'), 'P1D', -1)"
),
}
@classmethod
def alter_new_orm_column(cls, orm_col: TableColumn) -> None:
if orm_col.column_name == "__time":
orm_col.is_dttm = True
@staticmethod
def get_extra_params(
database: Database, source: QuerySource | None = None
) -> dict[str, Any]:
"""
For Druid, the path to a SSL certificate is placed in `connect_args`.
:param database: database instance from which to extract extras
:param source: in which context is the connection needed
:raises CertificateException: If certificate is not valid/unparseable
:raises SupersetException: If database extra json payload is unparseable
"""
try:
extra = json.loads(database.extra or "{}")
except json.JSONDecodeError as ex:
raise SupersetException("Unable to parse database extras") from ex
if database.server_cert:
engine_params = extra.get("engine_params", {})
connect_args = engine_params.get("connect_args", {})
connect_args["scheme"] = "https"
path = utils.create_ssl_cert_file(database.server_cert)
connect_args["ssl_verify_cert"] = path
engine_params["connect_args"] = connect_args
extra["engine_params"] = engine_params
return extra
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
) -> str | None:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, types.Date):
return f"CAST(TIME_PARSE('{dttm.date().isoformat()}') AS DATE)"
if isinstance(sqla_type, (types.DateTime, types.TIMESTAMP)):
return f"""TIME_PARSE('{dttm.isoformat(timespec="seconds")}')"""
return None
@classmethod
def epoch_to_dttm(cls) -> str:
"""
Convert from number of seconds since the epoch to a timestamp.
"""
return "MILLIS_TO_TIMESTAMP({col} * 1000)"
@classmethod
def epoch_ms_to_dttm(cls) -> str:
"""
Convert from number of milliseconds since the epoch to a timestamp.
"""
return "MILLIS_TO_TIMESTAMP({col})"
@classmethod
def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]:
# pylint: disable=import-outside-toplevel
from requests import exceptions as requests_exceptions
return {
requests_exceptions.ConnectionError: SupersetDBAPIConnectionError,
}