Files
superset2/superset/db_engine_specs/singlestore.py
2025-07-31 19:27:42 -07:00

550 lines
16 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
import re
from datetime import datetime
from typing import Any, Optional
from urllib import parse
from flask import current_app as app
from sqlalchemy import types
from sqlalchemy.engine import URL
from superset.constants import TimeGrain
from superset.db_engine_specs.base import (
BaseEngineSpec,
BasicParametersMixin,
ColumnTypeMapping,
LimitMethod,
)
from superset.models.core import Database
from superset.models.sql_lab import Query
from superset.utils.core import GenericDataType
logger = logging.getLogger(__name__)
class SingleStoreSpec(BasicParametersMixin, BaseEngineSpec):
engine_name = "SingleStore"
engine = "singlestoredb"
drivers = {"singlestoredb": "SingleStore Python client"}
default_driver = "singlestoredb"
limit_method = LimitMethod.FORCE_LIMIT
allows_joins = True
allows_subqueries = True
allows_alias_in_select = True
allows_alias_in_orderby = True
time_groupby_inline = False
allows_alias_to_source_column = True
allows_hidden_orderby_agg = True
allows_hidden_cc_in_orderby = False
allows_cte_in_subquery = True
allow_limit_clause = True
max_column_name_length = 256
allows_sql_comments = True
allows_escaped_colons = True
supports_file_upload = True
supports_dynamic_schema = True
disable_ssh_tunneling = False
sqlalchemy_uri_placeholder = (
"singlestoredb://{username}:{password}@{host}:{port}/{database}"
)
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
TimeGrain.DAY: "DATE_TRUNC('day', {col})",
TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
}
column_type_mappings: tuple[ColumnTypeMapping, ...] = (
(
re.compile(r"^tinyint", re.IGNORECASE),
types.SmallInteger(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^mediumint", re.IGNORECASE),
types.Integer(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^year", re.IGNORECASE),
types.Integer(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^bit", re.IGNORECASE),
types.LargeBinary(),
GenericDataType.STRING,
),
(
re.compile(r"^(var)?binary", re.IGNORECASE),
types.LargeBinary(),
GenericDataType.STRING,
),
(
re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
types.LargeBinary(),
GenericDataType.STRING,
),
(
re.compile(r"^json", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
),
(
re.compile(r"^bson", re.IGNORECASE),
types.LargeBinary(),
GenericDataType.STRING,
),
(
re.compile(r"^geographypoint", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
),
(
re.compile(r"^geography", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
),
(
re.compile(r"^vector", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
),
(
re.compile(r"^enum", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
),
(
re.compile(r"^set", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
),
)
@classmethod
def get_function_names(
cls,
database: Database,
) -> list[str]:
"""
Get a list of function names that are able to be called on the database.
Used for SQL Lab autocomplete.
:param database: The database to get functions for
:return: A list of function names usable in the database
"""
functions: set[str] = {
"ABS",
"ACOS",
"ADDTIME",
"AES_DECRYPT",
"AES_ENCRYPT",
"AGGREGATOR_ID",
"ANY_VALUE",
"APPROX_COUNT_DISTINCT",
"APPROX_COUNT_DISTINCT_ACCUMULATE",
"APPROX_COUNT_DISTINCT_COMBINE",
"APPROX_COUNT_DISTINCT_ESTIMATE",
"APPROX_GEOGRAPHY_INTERSECTS",
"APPROX_PERCENTILE",
"ASCII",
"ASIN",
"ATAN",
"ATAN2",
"AVG",
"BETWEEN",
"NOT",
"BIN",
"BIN_TO_UUID",
"BINARY",
"BIT_AND",
"BIT_COUNT",
"BIT_OR",
"BIT_XOR",
"BM25",
"BSON_ARRAY_CONTAINS_BSON",
"BSON_ARRAY_PUSH",
"BSON_ARRAY_SLICE",
"BSON_BUILD_ARRAY",
"BSON_BUILD_OBJECT",
"BSON_COMPARE",
"BSON_EXTRACT_BIGINT",
"BSON_EXTRACT_BOOL",
"BSON_EXTRACT_BSON",
"BSON_EXTRACT_DATETIME",
"BSON_EXTRACT_DOUBLE",
"BSON_EXTRACT_STRING",
"BSON_GET_TYPE",
"BSON_INCLUDE_MASK",
"BSON_EXCLUDE_MASK",
"BSON_LENGTH",
"BSON_MATCH_ANY",
"BSON_MATCH_ANY_EXISTS",
"BSON_MERGE",
"BSON_NORMALIZE",
"BSON_NORMALIZE_ASC",
"BSON_NORMALIZE_DESC",
"BSON_NORMALIZE_NO_ARRAYBSON_SET_BSON",
"BSON_UNWIND",
"CASE",
"CEIL",
"CHAR",
"CHARACTER_LENGTH",
"CHARSET",
"COALESCE",
"CONCAT",
"CONCAT_WS",
"CONNECTION_ID",
"CONV",
"CAST",
"CONVERT",
"CONVERT_TZ",
"COS",
"COT",
"COUNT",
"CRC32",
"CURRENT_DATE",
"CURDATE",
"CURRENT_TIME",
"CURTIME",
"CURRENT_TIMESTAMP",
"DATABASE",
"DATE",
"DATE_ADD",
"DATE_FORMAT",
"DATE_SUB",
"DATE_TRUNC",
"DATEDIFF",
"DAY",
"DAYNAME",
"DAYOFWEEK",
"DAYOFYEAR",
"DECODE",
"DEGREES",
"DENSE_RANK",
"DOT_PRODUCT",
"ELT",
"ESTIMATED_QUERY_LEAF_MEMORY",
"ESTIMATED_QUERY_RUNTIME",
"EUCLIDEAN_DISTANCE",
"EXP",
"EXTRACT",
"FIELD",
"FIRST",
"FIRST_VALUE",
"FLOOR",
"FORMAT",
"FOUND_ROWS",
"FROM_BASE64",
"FROM_DAYS",
"FROM_UNIXTIME",
"GEOGRAPHY_AREA",
"GEOGRAPHY_CONTAINS",
"GEOGRAPHY_DISTANCE",
"GEOGRAPHY_INTERSECTS",
"GEOGRAPHY_LATITUDE",
"GEOGRAPHY_LENGTH",
"GEOGRAPHY_LONGITUDE",
"GEOGRAPHY_POINT",
"GEOGRAPHY_WITHIN_DISTANCE",
"GET_FORMAT",
"GREATEST",
"GROUP_CONCAT",
"HEX",
"HIGHLIGHT",
"HOUR",
"IF",
"IN",
"INET_ATON",
"INET_NTOA",
"INET6_ATON",
"INET6_NTOA",
"INITCAP",
"INSTR",
"IS_BSON_NULL",
"IS_UUID",
"ISNULL",
"ISNUMERIC",
"JSON_AGG",
"JSON_ARRAY_CONTAINS_DOUBLE",
"JSON_ARRAY_CONTAINS_STRING",
"JSON_ARRAY_CONTAINS_JSON",
"JSON_ARRAY_PACK",
"JSON_ARRAY_PUSH_DOUBLE",
"JSON_ARRAY_PUSH_STRING",
"JSON_ARRAY_PUSH_JSON",
"JSON_ARRAY_UNPACK",
"JSON_BUILD_ARRAY",
"JSON_BUILD_OBJECT",
"JSON_DELETE_KEY",
"JSON_EXTRACT_DOUBLE",
"JSON_EXTRACT_STRING",
"JSON_EXTRACT_JSON",
"JSON_EXTRACT_BIGINT",
"JSON_GET_TYPE",
"JSON_KEYS",
"JSON_LENGTH",
"JSON_MATCH_ANY",
"JSON_MERGE_PATCH",
"JSON_PRETTY",
"JSON_SET_DOUBLE",
"JSON_SET_STRING",
"JSON_SET_JSON",
"JSON_SPLICE_DOUBLE",
"JSON_SPLICE_STRING",
"JSON_SPLICE_JSON",
"JSON_TO_ARRAY",
"LAG",
"LAST",
"LAST_DAY",
"LAST_INSERT_ID",
"LAST_VALUE",
"LCASE",
"LEAD",
"LEAST",
"LEFT",
"LENGTH",
"LIKE",
"LN",
"LOCALTIMESTAMP",
"LOCATE",
"LOG",
"LOG10",
"LOG2",
"LPAD",
"LTRIM",
"MATCH",
"MAX",
"MD5",
"MEDIAN",
"MICROSECOND",
"MIN",
"MINUTE",
"MOD",
"MONTH",
"MONTHNAME",
"MONTHS_BETWEEN",
"NOPARAM",
"NOW",
"NTH_VALUE",
"NTILE",
"NULLIF",
"NVL",
"IFNULL",
"PERCENT_RANK",
"PERCENTILE_CONT",
"PERCENTILE_DISC",
"PI",
"POW",
"QUARTER",
"QUOTE",
"RADIANS",
"RAND",
"RANK",
"REDUCE",
"REGEXP_INSTR",
"REGEXP_MATCH",
"REGEXP_REPLACE",
"REGEXP_SUBSTR",
"REPLACE",
"REVERSE",
"RIGHT",
"RLIKE",
"REGEXP",
"ROUND",
"ROW_COUNT",
"ROW_NUMBER",
"RPAD",
"RTRIM",
"SCALAR_VECTOR_MUL",
"SEC_TO_TIME",
"SECOND",
"SECRET",
"SET",
"SHA1",
"SHA2",
"SIGMOID",
"SIGN",
"SIN",
"SLEEP",
"SPLIT",
"SQRT",
"STD",
"STDDEV",
"STDDEV_POP",
"STDDEV_SAMP",
"STR_TO_DATE",
"strcmp",
"STRING_BYTES",
"SUBSTRING",
"SUBSTRING_INDEX",
"SUM",
"SYS_GUID",
"UUID",
"TAN",
"TIME",
"TIME_BUCKET",
"TIME_FORMAT",
"TIME_TO_SEC",
"TIMEDIFF",
"TIMESTAMP",
"TIMESTAMPADD",
"TIMESTAMPDIFF",
"TO_BASE64",
"TO_CHAR",
"TO_DATE",
"TO_DAYS",
"TO_JSON",
"TO_NUMBER",
"TO_SECONDS",
"TO_TIMESTAMP",
"TRIM",
"TRUNC",
"TRUNCATE",
"UCASE",
"UNHEX",
"UNIX_TIMESTAMP",
"USER",
"UTC_DATE",
"UTC_TIME",
"UTC_TIMESTAMP",
"UUID_TO_BIN",
"VARIANCE",
"VAR_SAMP",
"VECTOR_ADD",
"VECTOR_ELEMENTS_SUM",
"VECTOR_KTH_ELEMENT",
"VECTOR_MUL",
"VECTOR_NUM_ELEMENTS",
"VECTOR_SORT",
"VECTOR_SUB",
"VECTOR_SUBVECTOR",
"VECTOR_SUM",
"WEEK",
"WEEKDAY",
"YEAR",
}
if (database_name := cls.get_default_schema(database, None)) is not None:
df = database.get_df(
f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
)
functions.update(df.iloc[:, 0].tolist())
return list(functions)
@classmethod
def epoch_to_dttm(cls) -> str:
return "from_unixtime({col})"
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
) -> Optional[str]:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, types.Date):
return f"CAST('{dttm.date().isoformat()}' AS DATE)"
if isinstance(sqla_type, types.TIMESTAMP):
return f"""('{dttm.isoformat(sep=" ", timespec="microseconds")}' :> TIMESTAMP(6))""" # noqa: E501
if isinstance(sqla_type, types.DateTime):
return f"""CAST('{dttm.isoformat(sep=" ", timespec="microseconds")}' AS DATETIME(6))""" # noqa: E501
if isinstance(sqla_type, types.Time):
return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME(6))"""
return None
@classmethod
def adjust_engine_params(
cls,
uri: URL,
connect_args: dict[str, Any],
catalog: Optional[str] = None,
schema: Optional[str] = None,
) -> tuple[URL, dict[str, Any]]:
if schema:
uri = uri.set(database=parse.quote(schema, safe=""))
connect_args.setdefault(
"conn_attrs",
{
"_connector_name": "SingleStore Superset Database Engine",
"_connector_version": app.config.get("VERSION_STRING", "dev"),
"_product_version": app.config.get("VERSION_STRING", "dev"),
},
)
return uri, connect_args
@classmethod
def get_schema_from_engine_params(
cls,
sqlalchemy_uri: URL,
connect_args: dict[str, Any],
) -> Optional[str]:
"""
Return the configured schema.
A MySQL database is a SQLAlchemy schema.
"""
return parse.unquote(sqlalchemy_uri.database)
@classmethod
def get_cancel_query_id(cls, cursor: Any, query: Query) -> Optional[str]:
"""
Get SingleStore connection ID and aggregator ID that will be used to cancel all
other running queries in the same connection.
:param cursor: Cursor instance in which the query will be executed
:param query: Query instance
:return: SingleStore connection ID and aggregator ID
"""
cursor.execute("SELECT CONNECTION_ID(), AGGREGATOR_ID()")
row = cursor.fetchone()
return " ".join(str(item) for item in row)
@classmethod
def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool:
"""
Cancel query in the underlying database.
:param cursor: New cursor instance to the db of the query
:param query: Query instance
:param cancel_query_id: SingleStore connection ID and aggregator ID
:return: True if query cancelled successfully, False otherwise
"""
try:
cursor.execute(f"KILL CONNECTION {cancel_query_id}")
except Exception: # pylint: disable=broad-except
return False
return True