mirror of
https://github.com/apache/superset.git
synced 2026-04-16 14:45:21 +00:00
184 lines
6.6 KiB
Python
184 lines
6.6 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
from urllib import parse
|
|
|
|
import simplejson as json
|
|
from sqlalchemy.engine.url import make_url, URL
|
|
|
|
from superset.db_engine_specs.base import BaseEngineSpec
|
|
from superset.utils import core as utils
|
|
|
|
|
|
class TrinoEngineSpec(BaseEngineSpec):
|
|
engine = "trino"
|
|
engine_name = "Trino"
|
|
|
|
_time_grain_expressions = {
|
|
None: "{col}",
|
|
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))",
|
|
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))",
|
|
"PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))",
|
|
"P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))",
|
|
"P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))",
|
|
"P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))",
|
|
"P0.25Y": "date_trunc('quarter', CAST({col} AS TIMESTAMP))",
|
|
"P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))",
|
|
# "1969-12-28T00:00:00Z/P1W", # Week starting Sunday
|
|
# "1969-12-29T00:00:00Z/P1W", # Week starting Monday
|
|
# "P1W/1970-01-03T00:00:00Z", # Week ending Saturday
|
|
# "P1W/1970-01-04T00:00:00Z", # Week ending Sunday
|
|
}
|
|
|
|
@classmethod
|
|
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
|
|
tt = target_type.upper()
|
|
if tt == utils.TemporalType.DATE:
|
|
value = dttm.date().isoformat()
|
|
return f"from_iso8601_date('{value}')"
|
|
if tt == utils.TemporalType.TIMESTAMP:
|
|
value = dttm.isoformat(timespec="microseconds")
|
|
return f"from_iso8601_timestamp('{value}')"
|
|
return None
|
|
|
|
@classmethod
|
|
def epoch_to_dttm(cls) -> str:
|
|
return "from_unixtime({col})"
|
|
|
|
@classmethod
|
|
def adjust_database_uri(
|
|
cls, uri: URL, selected_schema: Optional[str] = None
|
|
) -> None:
|
|
database = uri.database
|
|
if selected_schema and database:
|
|
selected_schema = parse.quote(selected_schema, safe="")
|
|
database = database.split("/")[0] + "/" + selected_schema
|
|
uri.database = database
|
|
|
|
@classmethod
|
|
def update_impersonation_config(
|
|
cls, connect_args: Dict[str, Any], uri: str, username: Optional[str],
|
|
) -> None:
|
|
"""
|
|
Update a configuration dictionary
|
|
that can set the correct properties for impersonating users
|
|
:param connect_args: config to be updated
|
|
:param uri: URI string
|
|
:param impersonate_user: Flag indicating if impersonation is enabled
|
|
:param username: Effective username
|
|
:return: None
|
|
"""
|
|
url = make_url(uri)
|
|
backend_name = url.get_backend_name()
|
|
|
|
# Must be Trino connection, enable impersonation, and set optional param
|
|
# auth=LDAP|KERBEROS
|
|
# Set principal_username=$effective_username
|
|
if backend_name == "trino" and username is not None:
|
|
connect_args["user"] = username
|
|
|
|
@classmethod
|
|
def modify_url_for_impersonation(
|
|
cls, url: URL, impersonate_user: bool, username: Optional[str]
|
|
) -> None:
|
|
"""
|
|
Modify the SQL Alchemy URL object with the user to impersonate if applicable.
|
|
:param url: SQLAlchemy URL object
|
|
:param impersonate_user: Flag indicating if impersonation is enabled
|
|
:param username: Effective username
|
|
"""
|
|
# Do nothing and let update_impersonation_config take care of impersonation
|
|
|
|
@classmethod
|
|
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
|
|
return True
|
|
|
|
@classmethod
|
|
def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]:
|
|
"""
|
|
Run a SQL query that estimates the cost of a given statement.
|
|
|
|
:param statement: A single SQL statement
|
|
:param database: Database instance
|
|
:param cursor: Cursor instance
|
|
:param username: Effective username
|
|
:return: JSON response from Trino
|
|
"""
|
|
sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {statement}"
|
|
cursor.execute(sql)
|
|
|
|
# the output from Trino is a single column and a single row containing
|
|
# JSON:
|
|
#
|
|
# {
|
|
# ...
|
|
# "estimate" : {
|
|
# "outputRowCount" : 8.73265878E8,
|
|
# "outputSizeInBytes" : 3.41425774958E11,
|
|
# "cpuCost" : 3.41425774958E11,
|
|
# "maxMemory" : 0.0,
|
|
# "networkCost" : 3.41425774958E11
|
|
# }
|
|
# }
|
|
result = json.loads(cursor.fetchone()[0])
|
|
return result
|
|
|
|
@classmethod
|
|
def query_cost_formatter(
|
|
cls, raw_cost: List[Dict[str, Any]]
|
|
) -> List[Dict[str, str]]:
|
|
"""
|
|
Format cost estimate.
|
|
|
|
:param raw_cost: JSON estimate from Trino
|
|
:return: Human readable cost estimate
|
|
"""
|
|
|
|
def humanize(value: Any, suffix: str) -> str:
|
|
try:
|
|
value = int(value)
|
|
except ValueError:
|
|
return str(value)
|
|
|
|
prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"]
|
|
prefix = ""
|
|
to_next_prefix = 1000
|
|
while value > to_next_prefix and prefixes:
|
|
prefix = prefixes.pop(0)
|
|
value //= to_next_prefix
|
|
|
|
return f"{value} {prefix}{suffix}"
|
|
|
|
cost = []
|
|
columns = [
|
|
("outputRowCount", "Output count", " rows"),
|
|
("outputSizeInBytes", "Output size", "B"),
|
|
("cpuCost", "CPU cost", ""),
|
|
("maxMemory", "Max memory", "B"),
|
|
("networkCost", "Network cost", ""),
|
|
]
|
|
for row in raw_cost:
|
|
estimate: Dict[str, float] = row.get("estimate", {})
|
|
statement_cost = {}
|
|
for key, label, suffix in columns:
|
|
if key in estimate:
|
|
statement_cost[label] = humanize(estimate[key], suffix).strip()
|
|
cost.append(statement_cost)
|
|
|
|
return cost
|