# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import logging from datetime import datetime from typing import Any, Optional from packaging.version import Version from sqlalchemy import types from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, DatabaseCategory from superset.db_engine_specs.exceptions import ( SupersetDBAPIDatabaseError, SupersetDBAPIOperationalError, SupersetDBAPIProgrammingError, ) logger = logging.getLogger() class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method engine = "elasticsearch" engine_name = "Elasticsearch" time_groupby_inline = True allows_joins = False allows_subqueries = True allows_sql_comments = False metadata = { "description": ( "Elasticsearch is a distributed search and analytics engine. " "Query data using Elasticsearch SQL or OpenSearch SQL syntax." ), "logo": "elasticsearch.png", "homepage_url": "https://www.elastic.co/elasticsearch/", "categories": [DatabaseCategory.SEARCH_NOSQL, DatabaseCategory.OPEN_SOURCE], "pypi_packages": ["elasticsearch-dbapi"], "connection_string": "elasticsearch+https://{user}:{password}@{host}:9243/", "default_port": 9243, "parameters": { "user": "Elasticsearch username", "password": "Elasticsearch password", "host": "Elasticsearch host", }, "drivers": [ { "name": "Elasticsearch SQL API (Recommended)", "pypi_package": "elasticsearch-dbapi", "connection_string": "elasticsearch+https://{user}:{password}@{host}:9243/", "is_recommended": True, "notes": ( "For Elastic Cloud and self-hosted Elasticsearch with SQL enabled." ), }, { "name": "OpenDistro / OpenSearch SQL", "pypi_package": "elasticsearch-dbapi", "connection_string": "odelasticsearch+https://{user}:{password}@{host}:9200/", "is_recommended": False, "notes": "For OpenDistro Elasticsearch or Amazon OpenSearch Service.", }, ], "compatible_databases": [ { "name": "Elastic Cloud", "description": ( "Elastic Cloud is the official managed Elasticsearch service " "from Elastic. It includes Elasticsearch, Kibana, and " "enterprise features with automatic scaling." ), "logo": "elasticsearch.png", "homepage_url": "https://www.elastic.co/cloud/", "categories": [ DatabaseCategory.SEARCH_NOSQL, DatabaseCategory.HOSTED_OPEN_SOURCE, ], "pypi_packages": ["elasticsearch-dbapi"], "connection_string": ( "elasticsearch+https://{user}:{password}@{deployment}.{region}" ".cloud.es.io:9243/" ), "docs_url": "https://www.elastic.co/guide/en/cloud/current/", }, { "name": "Amazon OpenSearch Service", "description": ( "Amazon OpenSearch Service (successor to Amazon Elasticsearch " "Service) is a managed search and analytics service on AWS." ), "logo": "elasticsearch.png", "homepage_url": "https://aws.amazon.com/opensearch-service/", "categories": [ DatabaseCategory.SEARCH_NOSQL, DatabaseCategory.CLOUD_AWS, DatabaseCategory.HOSTED_OPEN_SOURCE, ], "pypi_packages": ["elasticsearch-dbapi"], "connection_string": ( "odelasticsearch+https://{user}:{password}@{host}:443/" ), "docs_url": ( "https://docs.aws.amazon.com/opensearch-service/latest/developerguide/" ), }, ], } _date_trunc_functions = { "DATETIME": "DATE_TRUNC", } _time_grain_expressions = { None: "{col}", TimeGrain.SECOND: "{func}('second', {col})", TimeGrain.MINUTE: "{func}('minute', {col})", TimeGrain.HOUR: "{func}('hour', {col})", TimeGrain.DAY: "{func}('day', {col})", TimeGrain.WEEK: "{func}('week', {col})", TimeGrain.MONTH: "{func}('month', {col})", TimeGrain.YEAR: "{func}('year', {col})", } type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed @classmethod def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]: # pylint: disable=import-error,import-outside-toplevel import es.exceptions as es_exceptions return { es_exceptions.DatabaseError: SupersetDBAPIDatabaseError, es_exceptions.OperationalError: SupersetDBAPIOperationalError, es_exceptions.ProgrammingError: SupersetDBAPIProgrammingError, } @classmethod def convert_dttm( cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None ) -> Optional[str]: db_extra = db_extra or {} sqla_type = cls.get_sqla_column_type(target_type) if isinstance(sqla_type, types.DateTime): es_version = db_extra.get("version") # The elasticsearch CAST function does not take effect for the time zone # setting. In elasticsearch7.8 and above, we can use the DATETIME_PARSE # function to solve this problem. supports_dttm_parse = False try: if es_version: supports_dttm_parse = Version(es_version) >= Version("7.8") except Exception as ex: # pylint: disable=broad-except logger.error("Unexpected error while convert es_version", exc_info=True) logger.exception(ex) if supports_dttm_parse: datetime_formatted = dttm.isoformat(sep=" ", timespec="seconds") return ( f"""DATETIME_PARSE('{datetime_formatted}', 'yyyy-MM-dd HH:mm:ss')""" ) return f"""CAST('{dttm.isoformat(timespec="seconds")}' AS DATETIME)""" return None class OpenDistroEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method """OpenDistro/OpenSearch SQL engine spec. Note: Documentation is consolidated in ElasticSearchEngineSpec. This spec exists for runtime support of the odelasticsearch driver. """ time_groupby_inline = True allows_joins = False allows_subqueries = True allows_sql_comments = False _time_grain_expressions = { None: "{col}", TimeGrain.SECOND: "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')", TimeGrain.MINUTE: "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')", TimeGrain.HOUR: "date_format({col}, 'yyyy-MM-dd HH:00:00.000')", TimeGrain.DAY: "date_format({col}, 'yyyy-MM-dd 00:00:00.000')", TimeGrain.MONTH: "date_format({col}, 'yyyy-MM-01 00:00:00.000')", TimeGrain.YEAR: "date_format({col}, 'yyyy-01-01 00:00:00.000')", } engine = "odelasticsearch" engine_name = "OpenSearch (OpenDistro)" @classmethod def convert_dttm( cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None ) -> Optional[str]: sqla_type = cls.get_sqla_column_type(target_type) if isinstance(sqla_type, types.DateTime): return f"""'{dttm.isoformat(timespec="seconds")}'""" return None @staticmethod def _mutate_label(label: str) -> str: return label.replace(".", "_")