Files
superset2/superset/db_engine_specs/elasticsearch.py
2026-01-21 10:54:01 -08:00

220 lines
8.5 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
from datetime import datetime
from typing import Any, Optional
from packaging.version import Version
from sqlalchemy import types
from superset.constants import TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec, DatabaseCategory
from superset.db_engine_specs.exceptions import (
SupersetDBAPIDatabaseError,
SupersetDBAPIOperationalError,
SupersetDBAPIProgrammingError,
)
logger = logging.getLogger()
class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
engine = "elasticsearch"
engine_name = "Elasticsearch"
time_groupby_inline = True
allows_joins = False
allows_subqueries = True
allows_sql_comments = False
metadata = {
"description": (
"Elasticsearch is a distributed search and analytics engine. "
"Query data using Elasticsearch SQL or OpenSearch SQL syntax."
),
"logo": "elasticsearch.png",
"homepage_url": "https://www.elastic.co/elasticsearch/",
"categories": [DatabaseCategory.SEARCH_NOSQL, DatabaseCategory.OPEN_SOURCE],
"pypi_packages": ["elasticsearch-dbapi"],
"connection_string": "elasticsearch+https://{user}:{password}@{host}:9243/",
"default_port": 9243,
"parameters": {
"user": "Elasticsearch username",
"password": "Elasticsearch password",
"host": "Elasticsearch host",
},
"drivers": [
{
"name": "Elasticsearch SQL API (Recommended)",
"pypi_package": "elasticsearch-dbapi",
"connection_string": "elasticsearch+https://{user}:{password}@{host}:9243/",
"is_recommended": True,
"notes": (
"For Elastic Cloud and self-hosted Elasticsearch with SQL enabled."
),
},
{
"name": "OpenDistro / OpenSearch SQL",
"pypi_package": "elasticsearch-dbapi",
"connection_string": "odelasticsearch+https://{user}:{password}@{host}:9200/",
"is_recommended": False,
"notes": "For OpenDistro Elasticsearch or Amazon OpenSearch Service.",
},
],
"compatible_databases": [
{
"name": "Elastic Cloud",
"description": (
"Elastic Cloud is the official managed Elasticsearch service "
"from Elastic. It includes Elasticsearch, Kibana, and "
"enterprise features with automatic scaling."
),
"logo": "elasticsearch.png",
"homepage_url": "https://www.elastic.co/cloud/",
"categories": [
DatabaseCategory.SEARCH_NOSQL,
DatabaseCategory.HOSTED_OPEN_SOURCE,
],
"pypi_packages": ["elasticsearch-dbapi"],
"connection_string": (
"elasticsearch+https://{user}:{password}@{deployment}.{region}"
".cloud.es.io:9243/"
),
"docs_url": "https://www.elastic.co/guide/en/cloud/current/",
},
{
"name": "Amazon OpenSearch Service",
"description": (
"Amazon OpenSearch Service (successor to Amazon Elasticsearch "
"Service) is a managed search and analytics service on AWS."
),
"logo": "elasticsearch.png",
"homepage_url": "https://aws.amazon.com/opensearch-service/",
"categories": [
DatabaseCategory.SEARCH_NOSQL,
DatabaseCategory.CLOUD_AWS,
DatabaseCategory.HOSTED_OPEN_SOURCE,
],
"pypi_packages": ["elasticsearch-dbapi"],
"connection_string": (
"odelasticsearch+https://{user}:{password}@{host}:443/"
),
"docs_url": (
"https://docs.aws.amazon.com/opensearch-service/latest/developerguide/"
),
},
],
}
_date_trunc_functions = {
"DATETIME": "DATE_TRUNC",
}
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "{func}('second', {col})",
TimeGrain.MINUTE: "{func}('minute', {col})",
TimeGrain.HOUR: "{func}('hour', {col})",
TimeGrain.DAY: "{func}('day', {col})",
TimeGrain.WEEK: "{func}('week', {col})",
TimeGrain.MONTH: "{func}('month', {col})",
TimeGrain.YEAR: "{func}('year', {col})",
}
type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed
@classmethod
def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]:
# pylint: disable=import-error,import-outside-toplevel
import es.exceptions as es_exceptions
return {
es_exceptions.DatabaseError: SupersetDBAPIDatabaseError,
es_exceptions.OperationalError: SupersetDBAPIOperationalError,
es_exceptions.ProgrammingError: SupersetDBAPIProgrammingError,
}
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
) -> Optional[str]:
db_extra = db_extra or {}
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, types.DateTime):
es_version = db_extra.get("version")
# The elasticsearch CAST function does not take effect for the time zone
# setting. In elasticsearch7.8 and above, we can use the DATETIME_PARSE
# function to solve this problem.
supports_dttm_parse = False
try:
if es_version:
supports_dttm_parse = Version(es_version) >= Version("7.8")
except Exception as ex: # pylint: disable=broad-except
logger.error("Unexpected error while convert es_version", exc_info=True)
logger.exception(ex)
if supports_dttm_parse:
datetime_formatted = dttm.isoformat(sep=" ", timespec="seconds")
return (
f"""DATETIME_PARSE('{datetime_formatted}', 'yyyy-MM-dd HH:mm:ss')"""
)
return f"""CAST('{dttm.isoformat(timespec="seconds")}' AS DATETIME)"""
return None
class OpenDistroEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
"""OpenDistro/OpenSearch SQL engine spec.
Note: Documentation is consolidated in ElasticSearchEngineSpec.
This spec exists for runtime support of the odelasticsearch driver.
"""
time_groupby_inline = True
allows_joins = False
allows_subqueries = True
allows_sql_comments = False
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')",
TimeGrain.MINUTE: "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')",
TimeGrain.HOUR: "date_format({col}, 'yyyy-MM-dd HH:00:00.000')",
TimeGrain.DAY: "date_format({col}, 'yyyy-MM-dd 00:00:00.000')",
TimeGrain.MONTH: "date_format({col}, 'yyyy-MM-01 00:00:00.000')",
TimeGrain.YEAR: "date_format({col}, 'yyyy-01-01 00:00:00.000')",
}
engine = "odelasticsearch"
engine_name = "OpenSearch (OpenDistro)"
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
) -> Optional[str]:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, types.DateTime):
return f"""'{dttm.isoformat(timespec="seconds")}'"""
return None
@staticmethod
def _mutate_label(label: str) -> str:
return label.replace(".", "_")