Files
superset2/superset/explorables/base.py
2026-05-05 12:07:46 -04:00

550 lines
19 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Base protocol for explorable data sources in Superset.
An "explorable" is any data source that can be explored to create charts,
including SQL datasets, saved queries, and semantic layer views.
"""
from __future__ import annotations
from collections.abc import Hashable
from datetime import datetime
from typing import Any, Protocol, runtime_checkable, TYPE_CHECKING, TypedDict
if TYPE_CHECKING:
from superset.common.query_object import QueryObject
from superset.models.helpers import QueryResult
from superset.superset_typing import ExplorableData, QueryObjectDict
class TimeGrainDict(TypedDict):
"""
TypedDict for time grain options returned by get_time_grains.
Represents a time granularity option that can be used for grouping
temporal data. Each time grain specifies how to bucket timestamps.
Attributes:
name: Display name for the time grain (e.g., "Hour", "Day", "Week")
function: Implementation-specific expression for applying the grain.
For SQL datasources, this is typically a SQL expression template
like "DATE_TRUNC('hour', {col})".
duration: ISO 8601 duration string (e.g., "PT1H", "P1D", "P1W")
"""
name: str
function: str
duration: str | None
@runtime_checkable
class MetricMetadata(Protocol):
"""
Protocol for metric metadata objects.
Represents a metric that's available on an explorable data source.
Metrics contain SQL expressions or references to semantic layer measures.
Attributes:
metric_name: Unique identifier for the metric
expression: SQL expression or reference for calculating the metric
verbose_name: Human-readable name for display in the UI
description: Description of what the metric represents
d3format: D3 format string for formatting numeric values
currency: Currency configuration for the metric (JSON object)
warning_text: Warning message to display when using this metric
certified_by: Person or entity that certified this metric
certification_details: Details about the certification
"""
@property
def metric_name(self) -> str:
"""Unique identifier for the metric."""
@property
def expression(self) -> str:
"""SQL expression or reference for calculating the metric."""
@property
def verbose_name(self) -> str | None:
"""Human-readable name for display in the UI."""
@property
def description(self) -> str | None:
"""Description of what the metric represents."""
@property
def d3format(self) -> str | None:
"""D3 format string for formatting numeric values."""
@property
def currency(self) -> dict[str, Any] | None:
"""Currency configuration for the metric (JSON object)."""
@property
def warning_text(self) -> str | None:
"""Warning message to display when using this metric."""
@property
def certified_by(self) -> str | None:
"""Person or entity that certified this metric."""
@property
def certification_details(self) -> str | None:
"""Details about the certification."""
@runtime_checkable
class ColumnMetadata(Protocol):
"""
Protocol for column metadata objects.
Represents a column/dimension that's available on an explorable data source.
Used for grouping, filtering, and dimension-based analysis.
Attributes:
column_name: Unique identifier for the column
type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME')
is_dttm: Whether this column represents a date or time value
verbose_name: Human-readable name for display in the UI
description: Description of what the column represents
groupby: Whether this column is allowed for grouping/aggregation
filterable: Whether this column can be used in filters
expression: SQL expression if this is a calculated column
python_date_format: Python datetime format string for temporal columns
advanced_data_type: Advanced data type classification
extra: Additional metadata stored as JSON
"""
@property
def column_name(self) -> str:
"""Unique identifier for the column."""
@property
def type(self) -> str:
"""SQL data type of the column."""
@property
def is_dttm(self) -> bool:
"""Whether this column represents a date or time value."""
@property
def verbose_name(self) -> str | None:
"""Human-readable name for display in the UI."""
@property
def description(self) -> str | None:
"""Description of what the column represents."""
@property
def groupby(self) -> bool:
"""Whether this column is allowed for grouping/aggregation."""
@property
def filterable(self) -> bool:
"""Whether this column can be used in filters."""
@property
def expression(self) -> str | None:
"""SQL expression if this is a calculated column."""
@property
def python_date_format(self) -> str | None:
"""Python datetime format string for temporal columns."""
@property
def advanced_data_type(self) -> str | None:
"""Advanced data type classification."""
@property
def extra(self) -> str | None:
"""Additional metadata stored as JSON."""
@runtime_checkable
class Explorable(Protocol):
"""
Protocol for objects that can be explored to create charts.
This protocol defines the minimal interface required for a data source
to be visualizable in Superset. It is implemented by:
- BaseDatasource (SQL datasets and queries)
- SemanticView (semantic layer views)
- Future: Other data source types
The protocol focuses on the essential methods and properties needed
for query execution, caching, and security.
"""
# =========================================================================
# Core Query Interface
# =========================================================================
def get_query_result(self, query_object: QueryObject) -> QueryResult:
"""
Execute a query and return results.
This is the primary method for data retrieval. It takes a query
object describing what data to fetch (columns, metrics, filters, time range,
etc.) and returns a QueryResult containing a pandas DataFrame with the results.
:param query_obj: QueryObject describing the query
:return: QueryResult containing:
- df: pandas DataFrame with query results
- query: string representation of the executed query
- duration: query execution time
- status: QueryStatus (SUCCESS/FAILED)
- error_message: error details if query failed
"""
def get_query_str(self, query_obj: QueryObjectDict) -> str:
"""
Get the query string without executing.
Returns a string representation of the query that would be executed
for the given query object. This is used for display in the UI
and debugging.
:param query_obj: Dictionary describing the query
:return: String representation of the query (SQL, GraphQL, etc.)
"""
# =========================================================================
# Identity & Metadata
# =========================================================================
@property
def id(self) -> int | str:
"""
Primary key identifier for this explorable.
Used for database lookups such as row-level security filter resolution.
Must be accessible without triggering expensive operations like
database engine connections.
:return: Primary key (typically int, but may be str for some implementations)
"""
@property
def uid(self) -> str:
"""
Unique identifier for this explorable.
Used as part of cache keys and for tracking. Should be stable
across application restarts but change when the explorable's
data or structure changes.
Format convention: "{type}_{id}" (e.g., "table_123", "semantic_view_abc")
:return: Unique identifier string
"""
@property
def type(self) -> str:
"""
Type discriminator for this explorable.
Identifies the kind of data source (e.g., 'table', 'query', 'semantic_view').
Used for routing and type-specific behavior.
:return: Type identifier string
"""
@property
def metrics(self) -> list[MetricMetadata]:
"""
List of metric metadata objects.
Each object should provide at minimum:
- metric_name: str - the metric's name
- expression: str - the metric's calculation expression
Used for validation, autocomplete, and query building.
:return: List of metric metadata objects
"""
# TODO: rename to dimensions
@property
def columns(self) -> list[ColumnMetadata]:
"""
List of column metadata objects.
Each object should provide at minimum:
- column_name: str - the column's name
- type: str - the column's data type
- is_dttm: bool - whether it's a datetime column
Used for validation, autocomplete, and query building.
:return: List of column metadata objects
"""
# TODO: remove and use columns instead
@property
def column_names(self) -> list[str]:
"""
List of available column names.
A simple list of all column names in the explorable.
Used for quick validation and filtering.
:return: List of column name strings
"""
@property
def data(self) -> ExplorableData:
"""
Full metadata representation sent to the frontend.
This property returns a dictionary containing all the metadata
needed by the Explore UI, including columns, metrics, and
other configuration.
Required keys in the returned dictionary:
- id: unique identifier (int or str)
- uid: unique string identifier
- name: display name
- type: explorable type ('table', 'query', 'semantic_view', etc.)
- columns: list of column metadata dicts (with column_name, type, etc.)
- metrics: list of metric metadata dicts (with metric_name, expression, etc.)
- database: database metadata dict (with id, backend, etc.)
Optional keys:
- description: human-readable description
- schema: schema name (if applicable)
- catalog: catalog name (if applicable)
- cache_timeout: default cache timeout
- offset: timezone offset
- owners: list of owner IDs
- verbose_map: dict mapping column/metric names to display names
:return: Dictionary with complete explorable metadata
"""
# =========================================================================
# Caching
# =========================================================================
@property
def cache_timeout(self) -> int | None:
"""
Default cache timeout in seconds.
Determines how long query results should be cached.
Returns None to use the system default cache timeout.
:return: Cache timeout in seconds, or None for system default
"""
@property
def changed_on(self) -> datetime | None:
"""
Last modification timestamp.
Used for cache invalidation - when this changes, cached
results for this explorable become invalid.
:return: Datetime of last modification, or None
"""
def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]:
"""
Additional cache key components specific to this explorable.
Provides explorable-specific values to include in cache keys.
Used to ensure cache invalidation when the explorable's
underlying data or configuration changes in ways not captured
by uid or changed_on.
:param query_obj: The query being executed
:return: List of additional hashable values for cache key
"""
# =========================================================================
# Security
# =========================================================================
@property
def perm(self) -> str:
"""
Permission string for this explorable.
Used by the security manager to check if a user has access
to this data source. Format depends on the explorable type
(e.g., "[database].[schema].[table]" for SQL tables).
:return: Permission identifier string
"""
# =========================================================================
# Time/Date Handling
# =========================================================================
@property
def offset(self) -> int:
"""
Timezone offset for datetime columns.
Used to normalize datetime values to the user's timezone.
Returns 0 for UTC, or an offset in seconds.
:return: Timezone offset in seconds (0 for UTC)
"""
# =========================================================================
# Time Granularity
# =========================================================================
def get_time_grains(self) -> list[TimeGrainDict]:
"""
Get available time granularities for temporal grouping.
Returns a list of time grain options that can be used for grouping
temporal data. Each time grain specifies how to bucket timestamps
(e.g., by hour, day, week, month).
Each dictionary in the returned list should contain:
- name: str - Display name (e.g., "Hour", "Day", "Week")
- function: str - How to apply the grain (implementation-specific)
- duration: str - ISO 8601 duration string (e.g., "PT1H", "P1D", "P1W")
For SQL datasources, the function is typically a SQL expression template
like "DATE_TRUNC('hour', {col})". For semantic layers, it might be a
semantic layer-specific identifier like "hour" or "day".
Return an empty list if time grains are not supported or applicable.
Example return value:
```python
[
{
"name": "Second",
"function": "DATE_TRUNC('second', {col})",
"duration": "PT1S",
},
{
"name": "Minute",
"function": "DATE_TRUNC('minute', {col})",
"duration": "PT1M",
},
{
"name": "Hour",
"function": "DATE_TRUNC('hour', {col})",
"duration": "PT1H",
},
{
"name": "Day",
"function": "DATE_TRUNC('day', {col})",
"duration": "P1D",
},
]
```
:return: List of time grain dictionaries (empty list if not supported)
"""
# =========================================================================
# Drilling
# =========================================================================
def has_drill_by_columns(self, column_names: list[str]) -> bool:
"""
Check if the specified columns support drill-by operations.
Drill-by allows users to navigate from aggregated views to detailed
data by grouping on specific dimensions. This method determines whether
the given columns can be used for drill-by in the current datasource.
For SQL datasources, this typically checks if columns are marked as
groupable in the metadata. For semantic views, it checks against the
semantic layer's dimension definitions.
:param column_names: List of column names to check
:return: True if all columns support drill-by, False otherwise
"""
# =========================================================================
# Optional Properties
# =========================================================================
@property
def is_rls_supported(self) -> bool:
"""
Whether this explorable supports Row Level Security.
Row Level Security (RLS) allows filtering data based on user identity.
SQL-based datasources typically support this via SQL queries, while
semantic layers may handle security at the semantic layer level.
:return: True if RLS is supported, False otherwise
"""
@property
def query_language(self) -> str | None:
"""
Query language identifier for syntax highlighting.
Specifies the language used in queries for proper syntax highlighting
in the UI (e.g., 'sql', 'graphql', 'jsoniq').
:return: Language identifier string, or None if not applicable
"""
# =========================================================================
# Compatibility
# =========================================================================
def get_compatible_metrics(
self,
selected_metrics: list[str],
selected_dimensions: list[str],
) -> list[str]:
"""
Return the names of metrics that can be queried alongside the current
selection of metrics and dimensions.
SQL datasets always return every metric name unchanged — there is no
concept of incompatibility at the SQL layer. Semantic views delegate
to ``SemanticView.get_compatible_metrics`` so the semantic layer can
enforce its own join / grain constraints.
:param selected_metrics: Metric names already chosen by the user.
:param selected_dimensions: Dimension names already chosen by the user.
:return: Names of metrics the user is still allowed to add.
"""
def get_compatible_dimensions(
self,
selected_metrics: list[str],
selected_dimensions: list[str],
) -> list[str]:
"""
Return the names of dimensions that can be queried alongside the
current selection of metrics and dimensions.
SQL datasets always return every column name unchanged. Semantic
views delegate to ``SemanticView.get_compatible_dimensions``.
:param selected_metrics: Metric names already chosen by the user.
:param selected_dimensions: Dimension names already chosen by the user.
:return: Names of dimensions the user is still allowed to add.
"""