# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import re from datetime import datetime from re import Pattern from typing import Any, Optional from flask_babel import gettext as __ from sqlalchemy import types from sqlalchemy.engine.url import URL from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, DatabaseCategory from superset.errors import SupersetErrorType SYNTAX_ERROR_REGEX = re.compile( ": mismatched input '(?P.*?)'. Expecting: " ) class AthenaEngineSpec(BaseEngineSpec): engine = "awsathena" engine_name = "Amazon Athena" allows_escaped_colons = False disable_ssh_tunneling = True # Athena doesn't support IS true/false syntax, use = true/false instead use_equality_for_boolean_filters = True supports_dynamic_schema = True metadata = { "description": ( "Amazon Athena is an interactive query service for " "analyzing data in S3 using SQL." ), "logo": "amazon-athena.jpg", "homepage_url": "https://aws.amazon.com/athena/", "categories": [ DatabaseCategory.CLOUD_AWS, DatabaseCategory.QUERY_ENGINES, DatabaseCategory.PROPRIETARY, ], "pypi_packages": ["pyathena[pandas]"], "connection_string": ( "awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}" "@athena.{region_name}.amazonaws.com/{schema_name}" "?s3_staging_dir={s3_staging_dir}" ), "drivers": [ { "name": "PyAthena (REST)", "pypi_package": "pyathena[pandas]", "connection_string": ( "awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}" "@athena.{region_name}.amazonaws.com/{schema_name}" "?s3_staging_dir={s3_staging_dir}" ), "is_recommended": True, "notes": ( "No Java required. URL-encode special characters " "(e.g., s3:// -> s3%3A//)." ), }, { "name": "PyAthenaJDBC", "pypi_package": "PyAthenaJDBC", "connection_string": ( "awsathena+jdbc://{aws_access_key_id}:{aws_secret_access_key}" "@athena.{region_name}.amazonaws.com/{schema_name}" "?s3_staging_dir={s3_staging_dir}" ), "is_recommended": False, "notes": "Requires Amazon Athena JDBC driver.", }, ], "engine_parameters": [ { "name": "IAM Role Assumption", "description": "Assume a specific IAM role for queries", "json": {"connect_args": {"role_arn": ""}}, }, ], "notes": ( "URL-encode special characters in s3_staging_dir " "(e.g., s3:// becomes s3%3A//)." ), } _time_grain_expressions = { None: "{col}", TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", TimeGrain.WEEK_ENDING_SATURDAY: "date_add('day', 5, date_trunc('week', \ date_add('day', 1, CAST({col} AS TIMESTAMP))))", TimeGrain.WEEK_STARTING_SUNDAY: "date_add('day', -1, date_trunc('week', \ date_add('day', 1, CAST({col} AS TIMESTAMP))))", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { SYNTAX_ERROR_REGEX: ( __( "Please check your query for syntax errors at or " 'near "%(syntax_error)s". Then, try running your query again.' ), SupersetErrorType.SYNTAX_ERROR, {}, ), } @classmethod def convert_dttm( cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None ) -> Optional[str]: sqla_type = cls.get_sqla_column_type(target_type) if isinstance(sqla_type, types.Date): return f"DATE '{dttm.date().isoformat()}'" if isinstance(sqla_type, types.TIMESTAMP): datetime_formatted = dttm.isoformat(sep=" ", timespec="milliseconds") return f"""TIMESTAMP '{datetime_formatted}'""" return None @classmethod def epoch_to_dttm(cls) -> str: return "from_unixtime({col})" @staticmethod def _mutate_label(label: str) -> str: """ Athena only supports lowercase column names and aliases. :param label: Expected expression label :return: Conditionally mutated label """ return label.lower() @classmethod def adjust_engine_params( cls, uri: URL, connect_args: dict[str, Any], catalog: str | None = None, schema: str | None = None, ) -> tuple[URL, dict[str, Any]]: """ Adjust the SQLAlchemy URI for Athena with a provided catalog and schema. For AWS Athena the SQLAlchemy URI looks like this: awsathena+rest://athena.{region_name}.amazonaws.com:443/{schema_name}?catalog_name={catalog_name}&s3_staging_dir={s3_staging_dir} """ if catalog: uri = uri.update_query_dict({"catalog_name": catalog}) if schema: uri = uri.set(database=schema) return uri, connect_args @classmethod def get_schema_from_engine_params( cls, sqlalchemy_uri: URL, connect_args: dict[str, Any], ) -> str | None: """ Return the configured schema. For AWS Athena the SQLAlchemy URI looks like this: awsathena+rest://athena.{region_name}.amazonaws.com:443/{schema_name}?catalog_name={catalog_name}&s3_staging_dir={s3_staging_dir} """ return sqlalchemy_uri.database