mirror of
https://github.com/apache/superset.git
synced 2026-04-07 18:35:15 +00:00
781 lines
30 KiB
Python
781 lines
30 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from datetime import datetime
|
|
from re import Pattern
|
|
from typing import Any, Optional, TYPE_CHECKING
|
|
|
|
from flask_babel import gettext as __
|
|
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION, ENUM, JSON
|
|
from sqlalchemy.dialects.postgresql.base import PGInspector
|
|
from sqlalchemy.engine.reflection import Inspector
|
|
from sqlalchemy.engine.url import URL
|
|
from sqlalchemy.types import Date, DateTime, String
|
|
|
|
from superset.constants import TimeGrain
|
|
from superset.db_engine_specs.base import (
|
|
BaseEngineSpec,
|
|
BasicParametersMixin,
|
|
DatabaseCategory,
|
|
)
|
|
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
|
from superset.exceptions import SupersetException, SupersetSecurityException
|
|
from superset.models.sql_lab import Query
|
|
from superset.sql.parse import process_jinja_sql
|
|
from superset.utils import core as utils, json
|
|
from superset.utils.core import GenericDataType, QuerySource
|
|
|
|
if TYPE_CHECKING:
|
|
from superset.models.core import Database # pragma: no cover
|
|
|
|
logger = logging.getLogger()
|
|
|
|
|
|
# Regular expressions to catch custom errors
|
|
CONNECTION_INVALID_USERNAME_REGEX = re.compile(
|
|
'role "(?P<username>.*?)" does not exist'
|
|
)
|
|
CONNECTION_INVALID_PASSWORD_REGEX = re.compile(
|
|
'password authentication failed for user "(?P<username>.*?)"'
|
|
)
|
|
CONNECTION_INVALID_PASSWORD_NEEDED_REGEX = re.compile("no password supplied")
|
|
CONNECTION_INVALID_HOSTNAME_REGEX = re.compile(
|
|
'could not translate host name "(?P<hostname>.*?)" to address: '
|
|
"nodename nor servname provided, or not known"
|
|
)
|
|
CONNECTION_PORT_CLOSED_REGEX = re.compile(
|
|
r"could not connect to server: Connection refused\s+Is the server "
|
|
r'running on host "(?P<hostname>.*?)" (\(.*?\) )?and accepting\s+TCP/IP '
|
|
r"connections on port (?P<port>.*?)\?"
|
|
)
|
|
CONNECTION_HOST_DOWN_REGEX = re.compile(
|
|
r"could not connect to server: (?P<reason>.*?)\s+Is the server running on "
|
|
r'host "(?P<hostname>.*?)" (\(.*?\) )?and accepting\s+TCP/IP '
|
|
r"connections on port (?P<port>.*?)\?"
|
|
)
|
|
CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile(
|
|
'database "(?P<database>.*?)" does not exist'
|
|
)
|
|
COLUMN_DOES_NOT_EXIST_REGEX = re.compile(
|
|
r'postgresql error: column "(?P<column_name>.+?)" '
|
|
r"does not exist\s+LINE (?P<location>\d+?)"
|
|
)
|
|
|
|
SYNTAX_ERROR_REGEX = re.compile('syntax error at or near "(?P<syntax_error>.*?)"')
|
|
|
|
|
|
def parse_options(connect_args: dict[str, Any]) -> dict[str, str]:
|
|
"""
|
|
Parse ``options`` from ``connect_args`` into a dictionary.
|
|
"""
|
|
if not isinstance(connect_args.get("options"), str):
|
|
return {}
|
|
|
|
tokens = (
|
|
tuple(token.strip() for token in option.strip().split("=", 1))
|
|
for option in re.split(r"-c\s?", connect_args["options"])
|
|
if "=" in option
|
|
)
|
|
|
|
return {token[0]: token[1] for token in tokens}
|
|
|
|
|
|
class PostgresBaseEngineSpec(BaseEngineSpec):
|
|
"""Abstract class for Postgres 'like' databases"""
|
|
|
|
engine = ""
|
|
engine_name = "PostgreSQL"
|
|
supports_multivalues_insert = True
|
|
|
|
_time_grain_expressions = {
|
|
None: "{col}",
|
|
TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
|
|
TimeGrain.FIVE_SECONDS: "DATE_TRUNC('minute', {col}) + INTERVAL '5 seconds' * FLOOR(EXTRACT(SECOND FROM {col}) / 5)", # noqa: E501
|
|
TimeGrain.THIRTY_SECONDS: "DATE_TRUNC('minute', {col}) + INTERVAL '30 seconds' * FLOOR(EXTRACT(SECOND FROM {col}) / 30)", # noqa: E501
|
|
TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
|
|
TimeGrain.FIVE_MINUTES: "DATE_TRUNC('hour', {col}) + INTERVAL '5 minutes' * FLOOR(EXTRACT(MINUTE FROM {col}) / 5)", # noqa: E501
|
|
TimeGrain.TEN_MINUTES: "DATE_TRUNC('hour', {col}) + INTERVAL '10 minutes' * FLOOR(EXTRACT(MINUTE FROM {col}) / 10)", # noqa: E501
|
|
TimeGrain.FIFTEEN_MINUTES: "DATE_TRUNC('hour', {col}) + INTERVAL '15 minutes' * FLOOR(EXTRACT(MINUTE FROM {col}) / 15)", # noqa: E501
|
|
TimeGrain.THIRTY_MINUTES: "DATE_TRUNC('hour', {col}) + INTERVAL '30 minutes' * FLOOR(EXTRACT(MINUTE FROM {col}) / 30)", # noqa: E501
|
|
TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
|
|
TimeGrain.DAY: "DATE_TRUNC('day', {col})",
|
|
TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
|
|
TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
|
|
TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
|
|
TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
|
|
}
|
|
|
|
custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
|
|
CONNECTION_INVALID_USERNAME_REGEX: (
|
|
__('The username "%(username)s" does not exist.'),
|
|
SupersetErrorType.CONNECTION_INVALID_USERNAME_ERROR,
|
|
{"invalid": ["username"]},
|
|
),
|
|
CONNECTION_INVALID_PASSWORD_REGEX: (
|
|
__('The password provided for username "%(username)s" is incorrect.'),
|
|
SupersetErrorType.CONNECTION_INVALID_PASSWORD_ERROR,
|
|
{"invalid": ["username", "password"]},
|
|
),
|
|
CONNECTION_INVALID_PASSWORD_NEEDED_REGEX: (
|
|
__("Please re-enter the password."),
|
|
SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
|
|
{"invalid": ["password"]},
|
|
),
|
|
CONNECTION_INVALID_HOSTNAME_REGEX: (
|
|
__('The hostname "%(hostname)s" cannot be resolved.'),
|
|
SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
|
|
{"invalid": ["host"]},
|
|
),
|
|
CONNECTION_PORT_CLOSED_REGEX: (
|
|
__('Port %(port)s on hostname "%(hostname)s" refused the connection.'),
|
|
SupersetErrorType.CONNECTION_PORT_CLOSED_ERROR,
|
|
{"invalid": ["host", "port"]},
|
|
),
|
|
CONNECTION_HOST_DOWN_REGEX: (
|
|
__(
|
|
'The host "%(hostname)s" might be down, and can\'t be '
|
|
"reached on port %(port)s."
|
|
),
|
|
SupersetErrorType.CONNECTION_HOST_DOWN_ERROR,
|
|
{"invalid": ["host", "port"]},
|
|
),
|
|
CONNECTION_UNKNOWN_DATABASE_REGEX: (
|
|
__('Unable to connect to database "%(database)s".'),
|
|
SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
|
|
{"invalid": ["database"]},
|
|
),
|
|
COLUMN_DOES_NOT_EXIST_REGEX: (
|
|
__(
|
|
'We can\'t seem to resolve the column "%(column_name)s" at '
|
|
"line %(location)s.",
|
|
),
|
|
SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
|
|
{},
|
|
),
|
|
SYNTAX_ERROR_REGEX: (
|
|
__(
|
|
"Please check your query for syntax errors at or "
|
|
'near "%(syntax_error)s". Then, try running your query again.'
|
|
),
|
|
SupersetErrorType.SYNTAX_ERROR,
|
|
{},
|
|
),
|
|
}
|
|
|
|
@classmethod
|
|
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]:
|
|
if not cursor.description:
|
|
return []
|
|
return super().fetch_data(cursor, limit)
|
|
|
|
@classmethod
|
|
def epoch_to_dttm(cls) -> str:
|
|
return "(timestamp 'epoch' + {col} * interval '1 second')"
|
|
|
|
@classmethod
|
|
def convert_dttm(
|
|
cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
|
|
) -> str | None:
|
|
sqla_type = cls.get_sqla_column_type(target_type)
|
|
|
|
if isinstance(sqla_type, Date):
|
|
return f"TO_DATE('{dttm.date().isoformat()}', 'YYYY-MM-DD')"
|
|
if isinstance(sqla_type, DateTime):
|
|
dttm_formatted = dttm.isoformat(sep=" ", timespec="microseconds")
|
|
return f"""TO_TIMESTAMP('{dttm_formatted}', 'YYYY-MM-DD HH24:MI:SS.US')"""
|
|
return None
|
|
|
|
|
|
class PostgresEngineSpec(BasicParametersMixin, PostgresBaseEngineSpec):
|
|
engine = "postgresql"
|
|
engine_name = "PostgreSQL"
|
|
engine_aliases = {"postgres"}
|
|
|
|
supports_dynamic_schema = True
|
|
supports_catalog = True
|
|
supports_dynamic_catalog = True
|
|
|
|
default_driver = "psycopg2"
|
|
sqlalchemy_uri_placeholder = (
|
|
"postgresql://user:password@host:port/dbname[?key=value&key=value...]"
|
|
)
|
|
|
|
metadata = {
|
|
"description": "PostgreSQL is an advanced open-source relational database.",
|
|
"logo": "postgresql.svg",
|
|
"homepage_url": "https://www.postgresql.org/",
|
|
"categories": [
|
|
DatabaseCategory.TRADITIONAL_RDBMS,
|
|
DatabaseCategory.OPEN_SOURCE,
|
|
],
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
),
|
|
"default_port": 5432,
|
|
"parameters": {
|
|
"username": "Database username",
|
|
"password": "Database password",
|
|
"host": "For localhost: localhost or 127.0.0.1. For AWS: endpoint URL",
|
|
"port": "Default 5432",
|
|
"database": "Database name",
|
|
},
|
|
"notes": "The psycopg2 library comes bundled with Superset Docker images.",
|
|
"connection_examples": [
|
|
{
|
|
"description": "Basic connection",
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
),
|
|
},
|
|
{
|
|
"description": "With SSL required",
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
"?sslmode=require"
|
|
),
|
|
},
|
|
],
|
|
"docs_url": "https://www.postgresql.org/docs/",
|
|
"sqlalchemy_docs_url": (
|
|
"https://docs.sqlalchemy.org/en/13/dialects/postgresql.html"
|
|
),
|
|
"compatible_databases": [
|
|
{
|
|
"name": "Hologres",
|
|
"description": (
|
|
"Alibaba Cloud real-time interactive analytics service, "
|
|
"fully compatible with PostgreSQL 11."
|
|
),
|
|
"logo": "hologres.png",
|
|
"homepage_url": "https://www.alibabacloud.com/product/hologres",
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql+psycopg2://{username}:{password}"
|
|
"@{host}:{port}/{database}"
|
|
),
|
|
"parameters": {
|
|
"username": "AccessKey ID of your Alibaba Cloud account",
|
|
"password": "AccessKey secret of your Alibaba Cloud account",
|
|
"host": "Public endpoint of the Hologres instance",
|
|
"port": "Port number of the Hologres instance",
|
|
"database": "Name of the Hologres database",
|
|
},
|
|
"categories": [DatabaseCategory.PROPRIETARY],
|
|
},
|
|
{
|
|
"name": "TimescaleDB",
|
|
"description": (
|
|
"Open-source relational database for time-series and analytics, "
|
|
"built on PostgreSQL."
|
|
),
|
|
"logo": "timescale.png",
|
|
"homepage_url": "https://www.timescale.com/",
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
),
|
|
"connection_examples": [
|
|
{
|
|
"description": "Timescale Cloud (SSL required)",
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}"
|
|
"@{host}:{port}/{database}?sslmode=require"
|
|
),
|
|
},
|
|
],
|
|
"notes": "psycopg2 comes bundled with Superset Docker images.",
|
|
"docs_url": "https://docs.timescale.com/",
|
|
"categories": [DatabaseCategory.OPEN_SOURCE],
|
|
},
|
|
{
|
|
"name": "YugabyteDB",
|
|
"description": ("Distributed SQL database built on top of PostgreSQL."),
|
|
"logo": "yugabyte.png",
|
|
"homepage_url": "https://www.yugabyte.com/",
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
),
|
|
"notes": "psycopg2 comes bundled with Superset Docker images.",
|
|
"docs_url": "https://www.yugabyte.com/",
|
|
"categories": [DatabaseCategory.OPEN_SOURCE],
|
|
},
|
|
{
|
|
"name": "Supabase",
|
|
"description": (
|
|
"Open-source Firebase alternative built on top of PostgreSQL, "
|
|
"providing a full backend-as-a-service with a hosted Postgres "
|
|
"database."
|
|
),
|
|
"logo": "supabase.svg",
|
|
"homepage_url": "https://supabase.com/",
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
),
|
|
"connection_examples": [
|
|
{
|
|
"description": "Supabase project (connection pooler)",
|
|
"connection_string": (
|
|
"postgresql://{username}.{project_ref}:{password}"
|
|
"@aws-0-{region}.pooler.supabase.com:6543/{database}"
|
|
),
|
|
},
|
|
],
|
|
"parameters": {
|
|
"username": "Database user (default: postgres)",
|
|
"password": "Database password",
|
|
"host": "Supabase project host (from project settings)",
|
|
"port": "Default 5432 (direct) or 6543 (pooler)",
|
|
"database": "Database name (default: postgres)",
|
|
"project_ref": "Supabase project reference (from project settings)",
|
|
"region": "Supabase project region (e.g., us-east-1)",
|
|
},
|
|
"notes": (
|
|
"Find connection details in your Supabase project dashboard under "
|
|
"Settings > Database. Use the connection pooler (port 6543) for "
|
|
"better connection management."
|
|
),
|
|
"docs_url": "https://supabase.com/docs/guides/database/connecting-to-postgres",
|
|
"categories": [
|
|
DatabaseCategory.HOSTED_OPEN_SOURCE,
|
|
],
|
|
},
|
|
{
|
|
"name": "Google AlloyDB",
|
|
"description": (
|
|
"Google Cloud's PostgreSQL-compatible database service "
|
|
"for demanding transactional and analytical workloads."
|
|
),
|
|
"logo": "alloydb.png",
|
|
"homepage_url": "https://cloud.google.com/alloydb",
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}@{host}:{port}/{database}"
|
|
),
|
|
"parameters": {
|
|
"username": "Database user (default: postgres)",
|
|
"password": "Database password",
|
|
"host": "AlloyDB instance IP or Auth Proxy address",
|
|
"port": "Default 5432",
|
|
"database": "Database name",
|
|
},
|
|
"notes": (
|
|
"For public IP connections, use the AlloyDB Auth Proxy for "
|
|
"secure access. Private IP connections can connect directly."
|
|
),
|
|
"docs_url": "https://cloud.google.com/alloydb/docs",
|
|
"categories": [
|
|
DatabaseCategory.CLOUD_GCP,
|
|
DatabaseCategory.HOSTED_OPEN_SOURCE,
|
|
],
|
|
},
|
|
{
|
|
"name": "Neon",
|
|
"description": (
|
|
"Serverless PostgreSQL with branching, scale-to-zero, "
|
|
"and bottomless storage."
|
|
),
|
|
"logo": "neon.png",
|
|
"homepage_url": "https://neon.tech/",
|
|
"pypi_packages": ["psycopg2"],
|
|
"connection_string": (
|
|
"postgresql://{username}:{password}"
|
|
"@{host}/{database}?sslmode=require"
|
|
),
|
|
"parameters": {
|
|
"username": "Neon role name",
|
|
"password": "Neon role password",
|
|
"host": (
|
|
"Neon hostname (e.g., "
|
|
"ep-cool-name-123456.us-east-2.aws.neon.tech)"
|
|
),
|
|
"database": "Database name (default: neondb)",
|
|
},
|
|
"notes": (
|
|
"SSL is required for all connections. Find connection "
|
|
"details in the Neon console under Connection Details."
|
|
),
|
|
"docs_url": "https://neon.tech/docs/connect/connect-from-any-app",
|
|
"categories": [
|
|
DatabaseCategory.HOSTED_OPEN_SOURCE,
|
|
],
|
|
},
|
|
{
|
|
"name": "Amazon Aurora PostgreSQL",
|
|
"description": (
|
|
"Amazon Aurora PostgreSQL is a fully managed, "
|
|
"PostgreSQL-compatible relational database with up to 5x "
|
|
"the throughput of standard PostgreSQL."
|
|
),
|
|
"logo": "aws-aurora.jpg",
|
|
"homepage_url": "https://aws.amazon.com/rds/aurora/",
|
|
"pypi_packages": ["sqlalchemy-aurora-data-api"],
|
|
"connection_string": (
|
|
"postgresql+auroradataapi://{aws_access_id}:{aws_secret_access_key}@/"
|
|
"{database_name}?aurora_cluster_arn={aurora_cluster_arn}&"
|
|
"secret_arn={secret_arn}®ion_name={region_name}"
|
|
),
|
|
"parameters": {
|
|
"aws_access_id": "AWS Access Key ID",
|
|
"aws_secret_access_key": "AWS Secret Access Key",
|
|
"database_name": "Database name",
|
|
"aurora_cluster_arn": "Aurora cluster ARN",
|
|
"secret_arn": "Secrets Manager ARN for credentials",
|
|
"region_name": "AWS region (e.g., us-east-1)",
|
|
},
|
|
"notes": (
|
|
"Uses the Data API for serverless access. "
|
|
"Standard PostgreSQL connections also work with psycopg2."
|
|
),
|
|
"categories": [
|
|
DatabaseCategory.CLOUD_AWS,
|
|
DatabaseCategory.HOSTED_OPEN_SOURCE,
|
|
],
|
|
},
|
|
],
|
|
}
|
|
# https://www.postgresql.org/docs/9.1/libpq-ssl.html#LIBQ-SSL-CERTIFICATES
|
|
encryption_parameters = {"sslmode": "require"}
|
|
|
|
max_column_name_length = 63
|
|
try_remove_schema_from_table_name = False # pylint: disable=invalid-name
|
|
|
|
# Sensitive fields that should be masked in encrypted_extra.
|
|
# This follows the pattern used by other engine specs (bigquery, snowflake, etc.)
|
|
# that specify exact paths rather than using the base class's catch-all "$.*".
|
|
encrypted_extra_sensitive_fields = {
|
|
"$.aws_iam.external_id",
|
|
"$.aws_iam.role_arn",
|
|
}
|
|
|
|
column_type_mappings = (
|
|
(
|
|
re.compile(r"^double precision", re.IGNORECASE),
|
|
DOUBLE_PRECISION(),
|
|
GenericDataType.NUMERIC,
|
|
),
|
|
(
|
|
re.compile(r"^array.*", re.IGNORECASE),
|
|
String(),
|
|
GenericDataType.STRING,
|
|
),
|
|
(
|
|
re.compile(r"^json.*", re.IGNORECASE),
|
|
JSON(),
|
|
GenericDataType.STRING,
|
|
),
|
|
(
|
|
re.compile(r"^enum.*", re.IGNORECASE),
|
|
ENUM(),
|
|
GenericDataType.STRING,
|
|
),
|
|
)
|
|
|
|
@classmethod
|
|
def get_schema_from_engine_params(
|
|
cls,
|
|
sqlalchemy_uri: URL,
|
|
connect_args: dict[str, Any],
|
|
) -> str | None:
|
|
"""
|
|
Return the configured schema.
|
|
|
|
While Postgres doesn't support connecting directly to a given schema, it allows
|
|
users to specify a "search path" that is used to resolve non-qualified table
|
|
names; this can be specified in the database ``connect_args``.
|
|
|
|
One important detail is that the search path can be a comma separated list of
|
|
schemas. While this is supported by the SQLAlchemy dialect, it shouldn't be used
|
|
in Superset because it breaks schema-level permissions, since it's impossible
|
|
to determine the schema for a non-qualified table in a query. In cases like
|
|
that we raise an exception.
|
|
|
|
Note that because the DB engine supports dynamic schema this method is never
|
|
called. It's left here as an implementation reference.
|
|
"""
|
|
options = parse_options(connect_args)
|
|
if search_path := options.get("search_path"):
|
|
schemas = search_path.split(",")
|
|
if len(schemas) > 1:
|
|
raise Exception( # pylint: disable=broad-exception-raised
|
|
"Multiple schemas are configured in the search path, which means "
|
|
"Superset is unable to determine the schema of unqualified table "
|
|
"names and enforce permissions."
|
|
)
|
|
return schemas[0]
|
|
|
|
return None
|
|
|
|
@classmethod
|
|
def get_default_schema_for_query(
|
|
cls,
|
|
database: Database,
|
|
query: Query,
|
|
template_params: Optional[dict[str, Any]] = None,
|
|
) -> str | None:
|
|
"""
|
|
Return the default schema for a given query.
|
|
|
|
This method simply uses the parent method after checking that there are no
|
|
malicious path setting in the query.
|
|
"""
|
|
script = process_jinja_sql(query.sql, database, template_params).script
|
|
settings = script.get_settings()
|
|
if "search_path" in settings:
|
|
raise SupersetSecurityException(
|
|
SupersetError(
|
|
error_type=SupersetErrorType.QUERY_SECURITY_ACCESS_ERROR,
|
|
message=__(
|
|
"Users are not allowed to set a search path for security reasons." # noqa: E501
|
|
),
|
|
level=ErrorLevel.ERROR,
|
|
)
|
|
)
|
|
|
|
return super().get_default_schema_for_query(database, query, template_params)
|
|
|
|
@classmethod
|
|
def adjust_engine_params(
|
|
cls,
|
|
uri: URL,
|
|
connect_args: dict[str, Any],
|
|
catalog: str | None = None,
|
|
schema: str | None = None,
|
|
) -> tuple[URL, dict[str, Any]]:
|
|
"""
|
|
Set the catalog (database).
|
|
"""
|
|
if catalog:
|
|
uri = uri.set(database=catalog)
|
|
|
|
return uri, connect_args
|
|
|
|
@staticmethod
|
|
def update_params_from_encrypted_extra(
|
|
database: Database,
|
|
params: dict[str, Any],
|
|
) -> None:
|
|
"""
|
|
Extract sensitive parameters from encrypted_extra.
|
|
|
|
Handles AWS IAM authentication if configured, then merges any
|
|
remaining encrypted_extra keys into params (standard behavior).
|
|
"""
|
|
if not database.encrypted_extra:
|
|
return
|
|
|
|
try:
|
|
encrypted_extra = json.loads(database.encrypted_extra)
|
|
except json.JSONDecodeError as ex:
|
|
logger.error(ex, exc_info=True)
|
|
raise
|
|
|
|
# Handle AWS IAM auth: pop the key so it doesn't reach create_engine()
|
|
iam_config = encrypted_extra.pop("aws_iam", None)
|
|
if iam_config and iam_config.get("enabled"):
|
|
from superset.db_engine_specs.aws_iam import AWSIAMAuthMixin
|
|
|
|
# Preserve a stricter existing sslmode (e.g. verify-full) if present
|
|
connect_args = params.get("connect_args") or {}
|
|
previous_sslmode = connect_args.get("sslmode")
|
|
|
|
AWSIAMAuthMixin._apply_iam_authentication(
|
|
database,
|
|
params,
|
|
iam_config,
|
|
ssl_args={"sslmode": "require"},
|
|
default_port=5432,
|
|
)
|
|
|
|
# Restore stricter sslmode if it was previously configured
|
|
if previous_sslmode in ("verify-ca", "verify-full"):
|
|
params.setdefault("connect_args", {})["sslmode"] = previous_sslmode
|
|
|
|
# Standard behavior: merge remaining keys into params
|
|
if encrypted_extra:
|
|
params.update(encrypted_extra)
|
|
|
|
@classmethod
|
|
def get_default_catalog(cls, database: Database) -> str:
|
|
"""
|
|
Return the default catalog for a given database.
|
|
"""
|
|
return database.url_object.database
|
|
|
|
@classmethod
|
|
def get_prequeries(
|
|
cls,
|
|
database: Database,
|
|
catalog: str | None = None,
|
|
schema: str | None = None,
|
|
) -> list[str]:
|
|
"""
|
|
Set the search path to the specified schema.
|
|
|
|
This is important for two reasons: in SQL Lab it will allow queries to run in
|
|
the schema selected in the dropdown, resolving unqualified table names to the
|
|
expected schema.
|
|
|
|
But more importantly, in SQL Lab this is used to check if the user has access to
|
|
any tables with unqualified names. If the schema is not set by SQL Lab it could
|
|
be anything, and we would have to block users from running any queries
|
|
referencing tables without an explicit schema.
|
|
"""
|
|
return [f'set search_path = "{schema}"'] if schema else []
|
|
|
|
@classmethod
|
|
def get_allow_cost_estimate(cls, extra: dict[str, Any]) -> bool:
|
|
return True
|
|
|
|
@classmethod
|
|
def estimate_statement_cost(
|
|
cls, database: Database, statement: str, cursor: Any
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Run a SQL query that estimates the cost of a given statement.
|
|
:param database: A Database object
|
|
:param statement: A single SQL statement
|
|
:param cursor: Cursor instance
|
|
:return: JSON response from Trino
|
|
"""
|
|
sql = f"EXPLAIN {statement}"
|
|
cursor.execute(sql)
|
|
|
|
result = cursor.fetchone()[0]
|
|
match = re.search(r"cost=([\d\.]+)\.\.([\d\.]+)", result)
|
|
if match:
|
|
return {
|
|
"Start-up cost": float(match.group(1)),
|
|
"Total cost": float(match.group(2)),
|
|
}
|
|
|
|
return {}
|
|
|
|
@classmethod
|
|
def query_cost_formatter(
|
|
cls, raw_cost: list[dict[str, Any]]
|
|
) -> list[dict[str, str]]:
|
|
return [{k: str(v) for k, v in row.items()} for row in raw_cost]
|
|
|
|
@classmethod
|
|
def get_catalog_names(
|
|
cls,
|
|
database: Database,
|
|
inspector: Inspector,
|
|
) -> set[str]:
|
|
"""
|
|
Return all catalogs.
|
|
|
|
In Postgres, a catalog is called a "database".
|
|
"""
|
|
return {
|
|
catalog
|
|
for (catalog,) in inspector.bind.execute(
|
|
"""
|
|
SELECT datname FROM pg_database
|
|
WHERE datistemplate = false;
|
|
"""
|
|
)
|
|
}
|
|
|
|
@classmethod
|
|
def get_table_names(
|
|
cls, database: Database, inspector: PGInspector, schema: str | None
|
|
) -> set[str]:
|
|
"""Need to consider foreign tables for PostgreSQL"""
|
|
return set(inspector.get_table_names(schema)) | set(
|
|
inspector.get_foreign_table_names(schema)
|
|
)
|
|
|
|
@staticmethod
|
|
def get_extra_params(
|
|
database: Database, source: QuerySource | None = None
|
|
) -> dict[str, Any]:
|
|
"""
|
|
For Postgres, the path to a SSL certificate is placed in `connect_args`.
|
|
|
|
:param database: database instance from which to extract extras
|
|
:raises CertificateException: If certificate is not valid/unparseable
|
|
:raises SupersetException: If database extra json payload is unparseable
|
|
"""
|
|
try:
|
|
extra = json.loads(database.extra or "{}")
|
|
except json.JSONDecodeError as ex:
|
|
raise SupersetException("Unable to parse database extras") from ex
|
|
|
|
if database.server_cert:
|
|
engine_params = extra.get("engine_params", {})
|
|
connect_args = engine_params.get("connect_args", {})
|
|
connect_args["sslmode"] = connect_args.get("sslmode", "verify-full")
|
|
path = utils.create_ssl_cert_file(database.server_cert)
|
|
connect_args["sslrootcert"] = path
|
|
engine_params["connect_args"] = connect_args
|
|
extra["engine_params"] = engine_params
|
|
return extra
|
|
|
|
@classmethod
|
|
def get_datatype(cls, type_code: Any) -> str | None:
|
|
# pylint: disable=import-outside-toplevel
|
|
from psycopg2.extensions import binary_types, string_types
|
|
|
|
types = binary_types.copy()
|
|
types.update(string_types)
|
|
if type_code in types:
|
|
return types[type_code].name
|
|
return None
|
|
|
|
@classmethod
|
|
def get_cancel_query_id(cls, cursor: Any, query: Query) -> str | None:
|
|
"""
|
|
Get Postgres PID that will be used to cancel all other running
|
|
queries in the same session.
|
|
|
|
:param cursor: Cursor instance in which the query will be executed
|
|
:param query: Query instance
|
|
:return: Postgres PID
|
|
"""
|
|
cursor.execute("SELECT pg_backend_pid()")
|
|
row = cursor.fetchone()
|
|
return row[0]
|
|
|
|
@classmethod
|
|
def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool:
|
|
"""
|
|
Cancel query in the underlying database.
|
|
|
|
:param cursor: New cursor instance to the db of the query
|
|
:param query: Query instance
|
|
:param cancel_query_id: Postgres PID
|
|
:return: True if query cancelled successfully, False otherwise
|
|
"""
|
|
try:
|
|
cursor.execute(
|
|
"SELECT pg_terminate_backend(pid) " # noqa: S608
|
|
"FROM pg_stat_activity "
|
|
f"WHERE pid='{cancel_query_id}'"
|
|
)
|
|
except Exception: # pylint: disable=broad-except
|
|
return False
|
|
|
|
return True
|