superset2/superset/explorables/base.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Base protocol for explorable data sources in Superset.

An "explorable" is any data source that can be explored to create charts,
including SQL datasets, saved queries, and semantic layer views.
"""

from __future__ import annotations

from collections.abc import Hashable
from datetime import datetime
from typing import Any, Protocol, runtime_checkable, TYPE_CHECKING, TypedDict

if TYPE_CHECKING:
    from superset.common.query_object import QueryObject
    from superset.models.helpers import QueryResult
    from superset.superset_typing import ExplorableData, QueryObjectDict


class TimeGrainDict(TypedDict):
    """
    TypedDict for time grain options returned by get_time_grains.

    Represents a time granularity option that can be used for grouping
    temporal data. Each time grain specifies how to bucket timestamps.

    Attributes:
        name: Display name for the time grain (e.g., "Hour", "Day", "Week")
        function: Implementation-specific expression for applying the grain.
            For SQL datasources, this is typically a SQL expression template
            like "DATE_TRUNC('hour', {col})".
        duration: ISO 8601 duration string (e.g., "PT1H", "P1D", "P1W")
    """

    name: str
    function: str
    duration: str | None


@runtime_checkable
class MetricMetadata(Protocol):
    """
    Protocol for metric metadata objects.

    Represents a metric that's available on an explorable data source.
    Metrics contain SQL expressions or references to semantic layer measures.

    Attributes:
        metric_name: Unique identifier for the metric
        expression: SQL expression or reference for calculating the metric
        verbose_name: Human-readable name for display in the UI
        description: Description of what the metric represents
        d3format: D3 format string for formatting numeric values
        currency: Currency configuration for the metric (JSON object)
        warning_text: Warning message to display when using this metric
        certified_by: Person or entity that certified this metric
        certification_details: Details about the certification
    """

    @property
    def metric_name(self) -> str:
        """Unique identifier for the metric."""

    @property
    def expression(self) -> str:
        """SQL expression or reference for calculating the metric."""

    @property
    def verbose_name(self) -> str | None:
        """Human-readable name for display in the UI."""

    @property
    def description(self) -> str | None:
        """Description of what the metric represents."""

    @property
    def d3format(self) -> str | None:
        """D3 format string for formatting numeric values."""

    @property
    def currency(self) -> dict[str, Any] | None:
        """Currency configuration for the metric (JSON object)."""

    @property
    def warning_text(self) -> str | None:
        """Warning message to display when using this metric."""

    @property
    def certified_by(self) -> str | None:
        """Person or entity that certified this metric."""

    @property
    def certification_details(self) -> str | None:
        """Details about the certification."""


@runtime_checkable
class ColumnMetadata(Protocol):
    """
    Protocol for column metadata objects.

    Represents a column/dimension that's available on an explorable data source.
    Used for grouping, filtering, and dimension-based analysis.

    Attributes:
        column_name: Unique identifier for the column
        type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME')
        is_dttm: Whether this column represents a date or time value
        verbose_name: Human-readable name for display in the UI
        description: Description of what the column represents
        groupby: Whether this column is allowed for grouping/aggregation
        filterable: Whether this column can be used in filters
        expression: SQL expression if this is a calculated column
        python_date_format: Python datetime format string for temporal columns
        advanced_data_type: Advanced data type classification
        extra: Additional metadata stored as JSON
    """

    @property
    def column_name(self) -> str:
        """Unique identifier for the column."""

    @property
    def type(self) -> str:
        """SQL data type of the column."""

    @property
    def is_dttm(self) -> bool:
        """Whether this column represents a date or time value."""

    @property
    def verbose_name(self) -> str | None:
        """Human-readable name for display in the UI."""

    @property
    def description(self) -> str | None:
        """Description of what the column represents."""

    @property
    def groupby(self) -> bool:
        """Whether this column is allowed for grouping/aggregation."""

    @property
    def filterable(self) -> bool:
        """Whether this column can be used in filters."""

    @property
    def expression(self) -> str | None:
        """SQL expression if this is a calculated column."""

    @property
    def python_date_format(self) -> str | None:
        """Python datetime format string for temporal columns."""

    @property
    def advanced_data_type(self) -> str | None:
        """Advanced data type classification."""

    @property
    def extra(self) -> str | None:
        """Additional metadata stored as JSON."""


@runtime_checkable
class Explorable(Protocol):
    """
    Protocol for objects that can be explored to create charts.

    This protocol defines the minimal interface required for a data source
    to be visualizable in Superset. It is implemented by:
    - BaseDatasource (SQL datasets and queries)
    - SemanticView (semantic layer views)
    - Future: Other data source types

    The protocol focuses on the essential methods and properties needed
    for query execution, caching, and security.
    """

    # =========================================================================
    # Core Query Interface
    # =========================================================================

    def get_query_result(self, query_object: QueryObject) -> QueryResult:
        """
        Execute a query and return results.

        This is the primary method for data retrieval. It takes a query
        object describing what data to fetch (columns, metrics, filters, time range,
        etc.) and returns a QueryResult containing a pandas DataFrame with the results.

        :param query_obj: QueryObject describing the query

        :return: QueryResult containing:
            - df: pandas DataFrame with query results
            - query: string representation of the executed query
            - duration: query execution time
            - status: QueryStatus (SUCCESS/FAILED)
            - error_message: error details if query failed
        """

    def get_query_str(self, query_obj: QueryObjectDict) -> str:
        """
        Get the query string without executing.

        Returns a string representation of the query that would be executed
        for the given query object. This is used for display in the UI
        and debugging.

        :param query_obj: Dictionary describing the query
        :return: String representation of the query (SQL, GraphQL, etc.)
        """

    # =========================================================================
    # Identity & Metadata
    # =========================================================================

    @property
    def id(self) -> int | str:
        """
        Primary key identifier for this explorable.

        Used for database lookups such as row-level security filter resolution.
        Must be accessible without triggering expensive operations like
        database engine connections.

        :return: Primary key (typically int, but may be str for some implementations)
        """

    @property
    def uid(self) -> str:
        """
        Unique identifier for this explorable.

        Used as part of cache keys and for tracking. Should be stable
        across application restarts but change when the explorable's
        data or structure changes.

        Format convention: "{type}_{id}" (e.g., "table_123", "semantic_view_abc")

        :return: Unique identifier string
        """

    @property
    def type(self) -> str:
        """
        Type discriminator for this explorable.

        Identifies the kind of data source (e.g., 'table', 'query', 'semantic_view').
        Used for routing and type-specific behavior.

        :return: Type identifier string
        """

    @property
    def metrics(self) -> list[MetricMetadata]:
        """
        List of metric metadata objects.

        Each object should provide at minimum:
        - metric_name: str - the metric's name
        - expression: str - the metric's calculation expression

        Used for validation, autocomplete, and query building.

        :return: List of metric metadata objects
        """

    # TODO: rename to dimensions
    @property
    def columns(self) -> list[ColumnMetadata]:
        """
        List of column metadata objects.

        Each object should provide at minimum:
        - column_name: str - the column's name
        - type: str - the column's data type
        - is_dttm: bool - whether it's a datetime column

        Used for validation, autocomplete, and query building.

        :return: List of column metadata objects
        """

    # TODO: remove and use columns instead
    @property
    def column_names(self) -> list[str]:
        """
        List of available column names.

        A simple list of all column names in the explorable.
        Used for quick validation and filtering.

        :return: List of column name strings
        """

    @property
    def data(self) -> ExplorableData:
        """
        Full metadata representation sent to the frontend.

        This property returns a dictionary containing all the metadata
        needed by the Explore UI, including columns, metrics, and
        other configuration.

        Required keys in the returned dictionary:
        - id: unique identifier (int or str)
        - uid: unique string identifier
        - name: display name
        - type: explorable type ('table', 'query', 'semantic_view', etc.)
        - columns: list of column metadata dicts (with column_name, type, etc.)
        - metrics: list of metric metadata dicts (with metric_name, expression, etc.)
        - database: database metadata dict (with id, backend, etc.)

        Optional keys:
        - description: human-readable description
        - schema: schema name (if applicable)
        - catalog: catalog name (if applicable)
        - cache_timeout: default cache timeout
        - offset: timezone offset
        - owners: list of owner IDs
        - verbose_map: dict mapping column/metric names to display names

        :return: Dictionary with complete explorable metadata
        """

    # =========================================================================
    # Caching
    # =========================================================================

    @property
    def cache_timeout(self) -> int | None:
        """
        Default cache timeout in seconds.

        Determines how long query results should be cached.
        Returns None to use the system default cache timeout.

        :return: Cache timeout in seconds, or None for system default
        """

    @property
    def changed_on(self) -> datetime | None:
        """
        Last modification timestamp.

        Used for cache invalidation - when this changes, cached
        results for this explorable become invalid.

        :return: Datetime of last modification, or None
        """

    def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]:
        """
        Additional cache key components specific to this explorable.

        Provides explorable-specific values to include in cache keys.
        Used to ensure cache invalidation when the explorable's
        underlying data or configuration changes in ways not captured
        by uid or changed_on.

        :param query_obj: The query being executed
        :return: List of additional hashable values for cache key
        """

    # =========================================================================
    # Security
    # =========================================================================

    @property
    def perm(self) -> str:
        """
        Permission string for this explorable.

        Used by the security manager to check if a user has access
        to this data source. Format depends on the explorable type
        (e.g., "[database].[schema].[table]" for SQL tables).

        :return: Permission identifier string
        """

    # =========================================================================
    # Time/Date Handling
    # =========================================================================

    @property
    def offset(self) -> int:
        """
        Timezone offset for datetime columns.

        Used to normalize datetime values to the user's timezone.
        Returns 0 for UTC, or an offset in seconds.

        :return: Timezone offset in seconds (0 for UTC)
        """

    # =========================================================================
    # Time Granularity
    # =========================================================================

    def get_time_grains(self) -> list[TimeGrainDict]:
        """
        Get available time granularities for temporal grouping.

        Returns a list of time grain options that can be used for grouping
        temporal data. Each time grain specifies how to bucket timestamps
        (e.g., by hour, day, week, month).

        Each dictionary in the returned list should contain:
        - name: str - Display name (e.g., "Hour", "Day", "Week")
        - function: str - How to apply the grain (implementation-specific)
        - duration: str - ISO 8601 duration string (e.g., "PT1H", "P1D", "P1W")

        For SQL datasources, the function is typically a SQL expression template
        like "DATE_TRUNC('hour', {col})". For semantic layers, it might be a
        semantic layer-specific identifier like "hour" or "day".

        Return an empty list if time grains are not supported or applicable.

        Example return value:
        ```python
        [
            {
                "name": "Second",
                "function": "DATE_TRUNC('second', {col})",
                "duration": "PT1S",
            },
            {
                "name": "Minute",
                "function": "DATE_TRUNC('minute', {col})",
                "duration": "PT1M",
            },
            {
                "name": "Hour",
                "function": "DATE_TRUNC('hour', {col})",
                "duration": "PT1H",
            },
            {
                "name": "Day",
                "function": "DATE_TRUNC('day', {col})",
                "duration": "P1D",
            },
        ]
        ```

        :return: List of time grain dictionaries (empty list if not supported)
        """

    # =========================================================================
    # Drilling
    # =========================================================================

    def has_drill_by_columns(self, column_names: list[str]) -> bool:
        """
        Check if the specified columns support drill-by operations.

        Drill-by allows users to navigate from aggregated views to detailed
        data by grouping on specific dimensions. This method determines whether
        the given columns can be used for drill-by in the current datasource.

        For SQL datasources, this typically checks if columns are marked as
        groupable in the metadata. For semantic views, it checks against the
        semantic layer's dimension definitions.

        :param column_names: List of column names to check
        :return: True if all columns support drill-by, False otherwise
        """

    # =========================================================================
    # Optional Properties
    # =========================================================================

    @property
    def is_rls_supported(self) -> bool:
        """
        Whether this explorable supports Row Level Security.

        Row Level Security (RLS) allows filtering data based on user identity.
        SQL-based datasources typically support this via SQL queries, while
        semantic layers may handle security at the semantic layer level.

        :return: True if RLS is supported, False otherwise
        """

    @property
    def query_language(self) -> str | None:
        """
        Query language identifier for syntax highlighting.

        Specifies the language used in queries for proper syntax highlighting
        in the UI (e.g., 'sql', 'graphql', 'jsoniq').

        :return: Language identifier string, or None if not applicable
        """

    # =========================================================================
    # Compatibility
    # =========================================================================

    def get_compatible_metrics(
        self,
        selected_metrics: list[str],
        selected_dimensions: list[str],
    ) -> list[str]:
        """
        Return the names of metrics that can be queried alongside the current
        selection of metrics and dimensions.

        SQL datasets always return every metric name unchanged — there is no
        concept of incompatibility at the SQL layer.  Semantic views delegate
        to ``SemanticView.get_compatible_metrics`` so the semantic layer can
        enforce its own join / grain constraints.

        :param selected_metrics: Metric names already chosen by the user.
        :param selected_dimensions: Dimension names already chosen by the user.
        :return: Names of metrics the user is still allowed to add.
        """

    def get_compatible_dimensions(
        self,
        selected_metrics: list[str],
        selected_dimensions: list[str],
    ) -> list[str]:
        """
        Return the names of dimensions that can be queried alongside the
        current selection of metrics and dimensions.

        SQL datasets always return every column name unchanged.  Semantic
        views delegate to ``SemanticView.get_compatible_dimensions``.

        :param selected_metrics: Metric names already chosen by the user.
        :param selected_dimensions: Dimension names already chosen by the user.
        :return: Names of dimensions the user is still allowed to add.
        """