Compare commits

...

5 Commits

Author SHA1 Message Date
Beto Dealmeida
b11ac4dd90 chore: container for testing 2026-02-10 15:39:50 -05:00
Beto Dealmeida
e182520bb3 feat: Explore integration 2026-02-10 15:39:50 -05:00
Beto Dealmeida
bfa4d5bd92 feat: models and DAOs 2026-02-10 15:38:15 -05:00
Beto Dealmeida
0e9c71e283 chore: remove AdhocFilter 2026-02-10 11:23:53 -05:00
Beto Dealmeida
5c1e250b77 feat: semantic layer extension 2026-02-09 15:10:06 -05:00
25 changed files with 5918 additions and 39 deletions

View File

@@ -52,6 +52,7 @@ jobs:
SUPERSET_SECRET_KEY: not-a-secret
run: |
pytest --durations-min=0.5 --cov=superset/sql/ ./tests/unit_tests/sql/ --cache-clear --cov-fail-under=100
pytest --durations-min=0.5 --cov=superset/semantic_layers/ ./tests/unit_tests/semantic_layers/ --cache-clear --cov-fail-under=100
- name: Upload code coverage
uses: codecov/codecov-action@v5
with:

View File

@@ -105,7 +105,12 @@ class CeleryConfig:
CELERY_CONFIG = CeleryConfig
FEATURE_FLAGS = {"ALERT_REPORTS": True, "DATASET_FOLDERS": True}
FEATURE_FLAGS = {
"ALERT_REPORTS": True,
"DATASET_FOLDERS": True,
"ENABLE_EXTENSIONS": True,
}
EXTENSIONS_PATH = "/app/docker/extensions"
ALERT_REPORTS_NOTIFICATION_DRY_RUN = True
WEBDRIVER_BASEURL = f"http://superset_app{os.environ.get('SUPERSET_APP_ROOT', '/')}/" # When using docker compose baseurl should be http://superset_nginx{ENV{BASEPATH}}/ # noqa: E501
# The base URL for the email report hyperlinks.

View File

@@ -0,0 +1,114 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from typing import Any, Protocol, runtime_checkable, TypeVar
from pydantic import BaseModel
from superset_core.semantic_layers.semantic_view import SemanticView
ConfigT = TypeVar("ConfigT", bound=BaseModel, contravariant=True)
SemanticViewT = TypeVar("SemanticViewT", bound="SemanticView")
# TODO (betodealmeida): convert to ABC
@runtime_checkable
class SemanticLayer(Protocol[ConfigT, SemanticViewT]):
"""
A protocol for semantic layers.
"""
@classmethod
def from_configuration(
cls,
configuration: dict[str, Any],
) -> SemanticLayer[ConfigT, SemanticViewT]:
"""
Create a semantic layer from its configuration.
"""
@classmethod
def get_configuration_schema(
cls,
configuration: ConfigT | None = None,
) -> dict[str, Any]:
"""
Get the JSON schema for the configuration needed to add the semantic layer.
A partial configuration `configuration` can be sent to improve the schema,
allowing for progressive validation and better UX. For example, a semantic
layer might require:
- auth information
- a database
If the user provides the auth information, a client can send the partial
configuration to this method, and the resulting JSON schema would include
the list of databases the user has access to, allowing a dropdown to be
populated.
The Snowflake semantic layer has an example implementation of this method, where
database and schema names are populated based on the provided connection info.
"""
@classmethod
def get_runtime_schema(
cls,
configuration: ConfigT,
runtime_data: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Get the JSON schema for the runtime parameters needed to load semantic views.
This returns the schema needed to connect to a semantic view given the
configuration for the semantic layer. For example, a semantic layer might
be configured by:
- auth information
- an optional database
If the user does not provide a database when creating the semantic layer, the
runtime schema would require the database name to be provided before loading any
semantic views. This allows users to create semantic layers that connect to a
specific database (or project, account, etc.), or that allow users to select it
at query time.
The Snowflake semantic layer has an example implementation of this method, where
database and schema names are required if they were not provided in the initial
configuration.
"""
def get_semantic_views(
self,
runtime_configuration: dict[str, Any],
) -> set[SemanticViewT]:
"""
Get the semantic views available in the semantic layer.
The runtime configuration can provide information like a given project or
schema, used to restrict the semantic views returned.
"""
def get_semantic_view(
self,
name: str,
additional_configuration: dict[str, Any],
) -> SemanticViewT:
"""
Get a specific semantic view by its name and additional configuration.
"""

View File

@@ -0,0 +1,105 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import enum
from typing import Protocol, runtime_checkable
from superset_core.semantic_layers.types import (
Dimension,
Filter,
GroupLimit,
Metric,
OrderTuple,
SemanticResult,
)
# TODO (betodealmeida): move to the extension JSON
class SemanticViewFeature(enum.Enum):
"""
Custom features supported by semantic layers.
"""
ADHOC_EXPRESSIONS_IN_ORDERBY = "ADHOC_EXPRESSIONS_IN_ORDERBY"
GROUP_LIMIT = "GROUP_LIMIT"
GROUP_OTHERS = "GROUP_OTHERS"
# TODO (betodealmeida): convert to ABC
@runtime_checkable
class SemanticView(Protocol):
"""
A protocol for semantic views.
"""
features: frozenset[SemanticViewFeature]
def uid(self) -> str:
"""
Returns a unique identifier for the semantic view.
"""
def get_dimensions(self) -> set[Dimension]:
"""
Get the dimensions defined in the semantic view.
"""
def get_metrics(self) -> set[Metric]:
"""
Get the metrics defined in the semantic view.
"""
def get_values(
self,
dimension: Dimension,
filters: set[Filter] | None = None,
) -> SemanticResult:
"""
Return distinct values for a dimension.
"""
def get_dataframe(
self,
metrics: list[Metric],
dimensions: list[Dimension],
filters: set[Filter] | None = None,
order: list[OrderTuple] | None = None,
limit: int | None = None,
offset: int | None = None,
*,
group_limit: GroupLimit | None = None,
) -> SemanticResult:
"""
Execute a semantic query and return the results as a DataFrame.
"""
def get_row_count(
self,
metrics: list[Metric],
dimensions: list[Dimension],
filters: set[Filter] | None = None,
order: list[OrderTuple] | None = None,
limit: int | None = None,
offset: int | None = None,
*,
group_limit: GroupLimit | None = None,
) -> SemanticResult:
"""
Execute a query and return the number of rows the result would have.
"""

View File

@@ -0,0 +1,328 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import enum
from dataclasses import dataclass
from datetime import date, datetime, time, timedelta
from functools import total_ordering
from typing import Type as TypeOf
from pandas import DataFrame
__all__ = [
"BINARY",
"BOOLEAN",
"DATE",
"DATETIME",
"DECIMAL",
"Day",
"Dimension",
"Hour",
"INTEGER",
"INTERVAL",
"Minute",
"Month",
"NUMBER",
"OBJECT",
"Quarter",
"Second",
"STRING",
"TIME",
"Week",
"Year",
]
class Type:
"""
Base class for types.
"""
class INTEGER(Type):
"""
Represents an integer type.
"""
class NUMBER(Type):
"""
Represents a number type.
"""
class DECIMAL(Type):
"""
Represents a decimal type.
"""
class STRING(Type):
"""
Represents a string type.
"""
class BOOLEAN(Type):
"""
Represents a boolean type.
"""
class DATE(Type):
"""
Represents a date type.
"""
class TIME(Type):
"""
Represents a time type.
"""
class DATETIME(DATE, TIME):
"""
Represents a datetime type.
"""
class INTERVAL(Type):
"""
Represents an interval type.
"""
class OBJECT(Type):
"""
Represents an object type.
"""
class BINARY(Type):
"""
Represents a binary type.
"""
@dataclass(frozen=True)
@total_ordering
class Grain:
"""
Base class for time and date grains with comparison support.
Attributes:
name: Human-readable name of the grain (e.g., "Second")
representation: ISO 8601 representation (e.g., "PT1S")
value: Time period as a timedelta
"""
name: str
representation: str
value: timedelta
def __eq__(self, other: object) -> bool:
if isinstance(other, Grain):
return self.value == other.value
return NotImplemented
def __lt__(self, other: object) -> bool:
if isinstance(other, Grain):
return self.value < other.value
return NotImplemented
def __hash__(self) -> int:
return hash((self.name, self.representation, self.value))
class Second(Grain):
name = "Second"
representation = "PT1S"
value = timedelta(seconds=1)
class Minute(Grain):
name = "Minute"
representation = "PT1M"
value = timedelta(minutes=1)
class Hour(Grain):
name = "Hour"
representation = "PT1H"
value = timedelta(hours=1)
class Day(Grain):
name = "Day"
representation = "P1D"
value = timedelta(days=1)
class Week(Grain):
name = "Week"
representation = "P1W"
value = timedelta(weeks=1)
class Month(Grain):
name = "Month"
representation = "P1M"
value = timedelta(days=30)
class Quarter(Grain):
name = "Quarter"
representation = "P3M"
value = timedelta(days=90)
class Year(Grain):
name = "Year"
representation = "P1Y"
value = timedelta(days=365)
@dataclass(frozen=True)
class Dimension:
id: str
name: str
type: TypeOf[Type]
definition: str | None = None
description: str | None = None
grain: TypeOf[Grain] | None = None
@dataclass(frozen=True)
class Metric:
id: str
name: str
type: TypeOf[Type]
definition: str
description: str | None = None
@dataclass(frozen=True)
class AdhocExpression:
id: str
definition: str
class Operator(str, enum.Enum):
EQUALS = "="
NOT_EQUALS = "!="
GREATER_THAN = ">"
LESS_THAN = "<"
GREATER_THAN_OR_EQUAL = ">="
LESS_THAN_OR_EQUAL = "<="
IN = "IN"
NOT_IN = "NOT IN"
LIKE = "LIKE"
NOT_LIKE = "NOT LIKE"
IS_NULL = "IS NULL"
IS_NOT_NULL = "IS NOT NULL"
ADHOC = "ADHOC"
FilterValues = str | int | float | bool | datetime | date | time | timedelta | None
class PredicateType(enum.Enum):
WHERE = "WHERE"
HAVING = "HAVING"
@dataclass(frozen=True, order=True)
class Filter:
type: PredicateType
column: Dimension | Metric | None
operator: Operator
value: FilterValues | frozenset[FilterValues]
class OrderDirection(enum.Enum):
ASC = "ASC"
DESC = "DESC"
OrderTuple = tuple[Metric | Dimension | AdhocExpression, OrderDirection]
@dataclass(frozen=True)
class GroupLimit:
"""
Limit query to top/bottom N combinations of specified dimensions.
The `filters` parameter allows specifying separate filter constraints for the
group limit subquery. This is useful when you want to determine the top N groups
using different criteria (e.g., a different time range) than the main query.
For example, you might want to find the top 10 products by sales over the last
30 days, but then show daily sales for those products over the last 7 days.
"""
dimensions: list[Dimension]
top: int
metric: Metric | None
direction: OrderDirection = OrderDirection.DESC
group_others: bool = False
filters: set[Filter] | None = None
@dataclass(frozen=True)
class SemanticRequest:
"""
Represents a request made to obtain semantic results.
This could be a SQL query, an HTTP request, etc.
"""
type: str
definition: str
@dataclass(frozen=True)
class SemanticResult:
"""
Represents the results of a semantic query.
This includes any requests (SQL queries, HTTP requests) that were performed in order
to obtain the results, in order to help troubleshooting.
"""
requests: list[SemanticRequest]
# TODO (betodealmeida): convert to PyArrow Table
results: DataFrame
@dataclass(frozen=True)
class SemanticQuery:
"""
Represents a semantic query.
"""
metrics: list[Metric]
dimensions: list[Dimension]
filters: set[Filter] | None = None
order: list[OrderTuple] | None = None
limit: int | None = None
offset: int | None = None
group_limit: GroupLimit | None = None

View File

@@ -19,6 +19,15 @@
import { DatasourceType } from './types/Datasource';
const DATASOURCE_TYPE_MAP: Record<string, DatasourceType> = {
table: DatasourceType.Table,
query: DatasourceType.Query,
dataset: DatasourceType.Dataset,
sl_table: DatasourceType.SlTable,
saved_query: DatasourceType.SavedQuery,
semantic_view: DatasourceType.SemanticView,
};
export default class DatasourceKey {
readonly id: number;
@@ -27,8 +36,7 @@ export default class DatasourceKey {
constructor(key: string) {
const [idStr, typeStr] = key.split('__');
this.id = parseInt(idStr, 10);
this.type = DatasourceType.Table; // default to SqlaTable model
this.type = typeStr === 'query' ? DatasourceType.Query : this.type;
this.type = DATASOURCE_TYPE_MAP[typeStr] ?? DatasourceType.Table;
}
public toString() {

View File

@@ -26,6 +26,7 @@ export enum DatasourceType {
Dataset = 'dataset',
SlTable = 'sl_table',
SavedQuery = 'saved_query',
SemanticView = 'semantic_view',
}
export interface Currency {

View File

@@ -151,11 +151,8 @@ export const getSlicePayload = async (
const [id, typeString] = formData.datasource.split('__');
datasourceId = parseInt(id, 10);
const formattedTypeString =
typeString.charAt(0).toUpperCase() + typeString.slice(1);
if (formattedTypeString in DatasourceType) {
datasourceType =
DatasourceType[formattedTypeString as keyof typeof DatasourceType];
if (Object.values(DatasourceType).includes(typeString as DatasourceType)) {
datasourceType = typeString as DatasourceType;
}
}

View File

@@ -124,7 +124,7 @@ class GetExploreCommand(BaseCommand, ABC):
security_manager.raise_for_access(datasource=datasource)
viz_type = form_data.get("viz_type")
if not viz_type and datasource and datasource.default_endpoint:
if not viz_type and datasource and getattr(datasource, "default_endpoint", None):
raise WrongEndpointError(redirect=datasource.default_endpoint)
form_data["datasource"] = (

View File

@@ -107,6 +107,8 @@ from superset.sql.parse import Table
from superset.superset_typing import (
AdhocColumn,
AdhocMetric,
DatasetColumnData,
DatasetMetricData,
ExplorableData,
Metric,
QueryObjectDict,
@@ -463,8 +465,8 @@ class BaseDatasource(
# sqla-specific
"sql": self.sql,
# one to many
"columns": [o.data for o in self.columns],
"metrics": [o.data for o in self.metrics],
"columns": [cast(DatasetColumnData, o.data) for o in self.columns],
"metrics": [cast(DatasetMetricData, o.data) for o in self.metrics],
"folders": self.folders,
# TODO deprecate, move logic to JS
"order_by_choices": self.order_by_choices,

View File

@@ -0,0 +1,99 @@
"""
Script to create a Pandas semantic layer and Sales semantic view in Superset.
Run this inside the superset_app container:
python /app/superset/create_pandas_semantic_layer.py
"""
from __future__ import annotations
import logging
import sys
from typing import TYPE_CHECKING
# Add the Superset application directory to the Python path
sys.path.insert(0, "/app")
from superset.app import create_app
from superset.extensions import db
from superset.utils import json
if TYPE_CHECKING:
from superset.semantic_layers.models import SemanticLayer, SemanticView
app = create_app()
app.app_context().push()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
def create_pandas_semantic_layer() -> SemanticLayer:
"""Create a Pandas semantic layer with minimal configuration."""
from superset.semantic_layers.models import SemanticLayer
logger.info("Creating Pandas semantic layer...")
configuration = {
"dataset": "sales",
}
semantic_layer = SemanticLayer(
name="Pandas Semantic Layer",
description="In-memory semantic layer backed by a Pandas DataFrame",
type="pandas",
configuration=json.dumps(configuration),
cache_timeout=3600,
)
db.session.add(semantic_layer)
db.session.commit()
logger.info("Created semantic layer:")
logger.info(" Name: %s", semantic_layer.name)
logger.info(" UUID: %s", semantic_layer.uuid)
logger.info(" Type: %s", semantic_layer.type)
return semantic_layer
def create_sales_semantic_view(semantic_layer: SemanticLayer) -> SemanticView:
"""Create the Sales semantic view."""
from superset.semantic_layers.models import SemanticView
logger.info("Creating Sales semantic view...")
semantic_view = SemanticView(
name="sales",
configuration="{}",
cache_timeout=1800,
semantic_layer_uuid=semantic_layer.uuid,
)
db.session.add(semantic_view)
db.session.commit()
logger.info("Created semantic view:")
logger.info(" Name: %s", semantic_view.name)
logger.info(" UUID: %s", semantic_view.uuid)
logger.info(" Semantic Layer UUID: %s", semantic_view.semantic_layer_uuid)
return semantic_view
def main() -> None:
"""Main script execution."""
logger.info("=" * 60)
logger.info("Creating Pandas Semantic Layer and Sales Semantic View")
logger.info("=" * 60)
semantic_layer = create_pandas_semantic_layer()
create_sales_semantic_view(semantic_layer)
if __name__ == "__main__":
main()

View File

@@ -28,6 +28,7 @@ from superset.daos.exceptions import (
DatasourceValueIsIncorrect,
)
from superset.models.sql_lab import Query, SavedQuery
from superset.semantic_layers.models import SemanticView
from superset.utils.core import DatasourceType
logger = logging.getLogger(__name__)
@@ -40,6 +41,7 @@ class DatasourceDAO(BaseDAO[Datasource]):
DatasourceType.TABLE: SqlaTable,
DatasourceType.QUERY: Query,
DatasourceType.SAVEDQUERY: SavedQuery,
DatasourceType.SEMANTIC_VIEW: SemanticView,
}
@classmethod

View File

@@ -0,0 +1,152 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DAOs for semantic layer models."""
from __future__ import annotations
from superset.daos.base import BaseDAO
from superset.extensions import db
from superset.semantic_layers.models import SemanticLayer, SemanticView
class SemanticLayerDAO(BaseDAO[SemanticLayer]):
"""
Data Access Object for SemanticLayer model.
"""
@staticmethod
def validate_uniqueness(name: str) -> bool:
"""
Validate that semantic layer name is unique.
:param name: Semantic layer name
:return: True if name is unique, False otherwise
"""
query = db.session.query(SemanticLayer).filter(SemanticLayer.name == name)
return not db.session.query(query.exists()).scalar()
@staticmethod
def validate_update_uniqueness(layer_uuid: str, name: str) -> bool:
"""
Validate that semantic layer name is unique for updates.
:param layer_uuid: UUID of the semantic layer being updated
:param name: New name to validate
:return: True if name is unique, False otherwise
"""
query = db.session.query(SemanticLayer).filter(
SemanticLayer.name == name,
SemanticLayer.uuid != layer_uuid,
)
return not db.session.query(query.exists()).scalar()
@staticmethod
def find_by_name(name: str) -> SemanticLayer | None:
"""
Find semantic layer by name.
:param name: Semantic layer name
:return: SemanticLayer instance or None
"""
return (
db.session.query(SemanticLayer)
.filter(SemanticLayer.name == name)
.one_or_none()
)
@classmethod
def get_semantic_views(cls, layer_uuid: str) -> list[SemanticView]:
"""
Get all semantic views for a semantic layer.
:param layer_uuid: UUID of the semantic layer
:return: List of SemanticView instances
"""
return (
db.session.query(SemanticView)
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
.all()
)
class SemanticViewDAO(BaseDAO[SemanticView]):
"""Data Access Object for SemanticView model."""
@staticmethod
def find_by_semantic_layer(layer_uuid: str) -> list[SemanticView]:
"""
Find all views for a semantic layer.
:param layer_uuid: UUID of the semantic layer
:return: List of SemanticView instances
"""
return (
db.session.query(SemanticView)
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
.all()
)
@staticmethod
def validate_uniqueness(name: str, layer_uuid: str) -> bool:
"""
Validate that view name is unique within semantic layer.
:param name: View name
:param layer_uuid: UUID of the semantic layer
:return: True if name is unique within layer, False otherwise
"""
query = db.session.query(SemanticView).filter(
SemanticView.name == name,
SemanticView.semantic_layer_uuid == layer_uuid,
)
return not db.session.query(query.exists()).scalar()
@staticmethod
def validate_update_uniqueness(view_uuid: str, name: str, layer_uuid: str) -> bool:
"""
Validate that view name is unique within semantic layer for updates.
:param view_uuid: UUID of the view being updated
:param name: New name to validate
:param layer_uuid: UUID of the semantic layer
:return: True if name is unique within layer, False otherwise
"""
query = db.session.query(SemanticView).filter(
SemanticView.name == name,
SemanticView.semantic_layer_uuid == layer_uuid,
SemanticView.uuid != view_uuid,
)
return not db.session.query(query.exists()).scalar()
@staticmethod
def find_by_name(name: str, layer_uuid: str) -> SemanticView | None:
"""
Find semantic view by name within a semantic layer.
:param name: View name
:param layer_uuid: UUID of the semantic layer
:return: SemanticView instance or None
"""
return (
db.session.query(SemanticView)
.filter(
SemanticView.name == name,
SemanticView.semantic_layer_uuid == layer_uuid,
)
.one_or_none()
)

View File

@@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict):
duration: str | None
@runtime_checkable
class MetricMetadata(Protocol):
"""
Protocol for metric metadata objects.
Represents a metric that's available on an explorable data source.
Metrics contain SQL expressions or references to semantic layer measures.
Attributes:
metric_name: Unique identifier for the metric
expression: SQL expression or reference for calculating the metric
verbose_name: Human-readable name for display in the UI
description: Description of what the metric represents
d3format: D3 format string for formatting numeric values
currency: Currency configuration for the metric (JSON object)
warning_text: Warning message to display when using this metric
certified_by: Person or entity that certified this metric
certification_details: Details about the certification
"""
@property
def metric_name(self) -> str:
"""Unique identifier for the metric."""
@property
def expression(self) -> str:
"""SQL expression or reference for calculating the metric."""
@property
def verbose_name(self) -> str | None:
"""Human-readable name for display in the UI."""
@property
def description(self) -> str | None:
"""Description of what the metric represents."""
@property
def d3format(self) -> str | None:
"""D3 format string for formatting numeric values."""
@property
def currency(self) -> dict[str, Any] | None:
"""Currency configuration for the metric (JSON object)."""
@property
def warning_text(self) -> str | None:
"""Warning message to display when using this metric."""
@property
def certified_by(self) -> str | None:
"""Person or entity that certified this metric."""
@property
def certification_details(self) -> str | None:
"""Details about the certification."""
@runtime_checkable
class ColumnMetadata(Protocol):
"""
Protocol for column metadata objects.
Represents a column/dimension that's available on an explorable data source.
Used for grouping, filtering, and dimension-based analysis.
Attributes:
column_name: Unique identifier for the column
type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME')
is_dttm: Whether this column represents a date or time value
verbose_name: Human-readable name for display in the UI
description: Description of what the column represents
groupby: Whether this column is allowed for grouping/aggregation
filterable: Whether this column can be used in filters
expression: SQL expression if this is a calculated column
python_date_format: Python datetime format string for temporal columns
advanced_data_type: Advanced data type classification
extra: Additional metadata stored as JSON
"""
@property
def column_name(self) -> str:
"""Unique identifier for the column."""
@property
def type(self) -> str:
"""SQL data type of the column."""
@property
def is_dttm(self) -> bool:
"""Whether this column represents a date or time value."""
@property
def verbose_name(self) -> str | None:
"""Human-readable name for display in the UI."""
@property
def description(self) -> str | None:
"""Description of what the column represents."""
@property
def groupby(self) -> bool:
"""Whether this column is allowed for grouping/aggregation."""
@property
def filterable(self) -> bool:
"""Whether this column can be used in filters."""
@property
def expression(self) -> str | None:
"""SQL expression if this is a calculated column."""
@property
def python_date_format(self) -> str | None:
"""Python datetime format string for temporal columns."""
@property
def advanced_data_type(self) -> str | None:
"""Advanced data type classification."""
@property
def extra(self) -> str | None:
"""Additional metadata stored as JSON."""
@runtime_checkable
class Explorable(Protocol):
"""
@@ -132,7 +256,7 @@ class Explorable(Protocol):
"""
@property
def metrics(self) -> list[Any]:
def metrics(self) -> list[MetricMetadata]:
"""
List of metric metadata objects.
@@ -147,7 +271,7 @@ class Explorable(Protocol):
# TODO: rename to dimensions
@property
def columns(self) -> list[Any]:
def columns(self) -> list[ColumnMetadata]:
"""
List of column metadata objects.

View File

@@ -0,0 +1,144 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""add_semantic_layers_and_views
Revision ID: 33d7e0e21daa
Revises: 9787190b3d89
Create Date: 2025-11-04 11:26:00.000000
"""
import uuid
import sqlalchemy as sa
from alembic import op
from sqlalchemy_utils import UUIDType
from sqlalchemy_utils.types.json import JSONType
from superset.extensions import encrypted_field_factory
from superset.migrations.shared.utils import (
create_fks_for_table,
create_table,
drop_table,
)
# revision identifiers, used by Alembic.
revision = "33d7e0e21daa"
down_revision = "9787190b3d89"
def upgrade():
# Create semantic_layers table
create_table(
"semantic_layers",
sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False),
sa.Column("created_on", sa.DateTime(), nullable=True),
sa.Column("changed_on", sa.DateTime(), nullable=True),
sa.Column("name", sa.String(length=250), nullable=False),
sa.Column("description", sa.Text(), nullable=True),
sa.Column("type", sa.String(length=250), nullable=False),
sa.Column(
"configuration",
encrypted_field_factory.create(JSONType),
nullable=True,
),
sa.Column("cache_timeout", sa.Integer(), nullable=True),
sa.Column("created_by_fk", sa.Integer(), nullable=True),
sa.Column("changed_by_fk", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("uuid"),
)
# Create foreign key constraints for semantic_layers
create_fks_for_table(
"fk_semantic_layers_created_by_fk_ab_user",
"semantic_layers",
"ab_user",
["created_by_fk"],
["id"],
)
create_fks_for_table(
"fk_semantic_layers_changed_by_fk_ab_user",
"semantic_layers",
"ab_user",
["changed_by_fk"],
["id"],
)
# Create semantic_views table
create_table(
"semantic_views",
sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False),
sa.Column("id", sa.Integer(), sa.Identity(), unique=True, nullable=False),
sa.Column("created_on", sa.DateTime(), nullable=True),
sa.Column("changed_on", sa.DateTime(), nullable=True),
sa.Column("name", sa.String(length=250), nullable=False),
sa.Column("description", sa.Text(), nullable=True),
sa.Column(
"configuration",
encrypted_field_factory.create(JSONType),
nullable=True,
),
sa.Column("cache_timeout", sa.Integer(), nullable=True),
sa.Column(
"semantic_layer_uuid",
UUIDType(binary=True),
sa.ForeignKey("semantic_layers.uuid", ondelete="CASCADE"),
nullable=False,
),
sa.Column("created_by_fk", sa.Integer(), nullable=True),
sa.Column("changed_by_fk", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("uuid"),
)
# Create foreign key constraints for semantic_views
create_fks_for_table(
"fk_semantic_views_created_by_fk_ab_user",
"semantic_views",
"ab_user",
["created_by_fk"],
["id"],
)
create_fks_for_table(
"fk_semantic_views_changed_by_fk_ab_user",
"semantic_views",
"ab_user",
["changed_by_fk"],
["id"],
)
# Update chart datasource constraint to allow semantic_view
with op.batch_alter_table("slices") as batch_op:
batch_op.drop_constraint("ck_chart_datasource", type_="check")
batch_op.create_check_constraint(
"ck_chart_datasource",
"datasource_type in ('table', 'semantic_view')",
)
def downgrade():
# Restore original constraint
with op.batch_alter_table("slices") as batch_op:
batch_op.drop_constraint("ck_chart_datasource", type_="check")
batch_op.create_check_constraint(
"ck_chart_datasource", "datasource_type in ('table')"
)
drop_table("semantic_views")
drop_table("semantic_layers")

View File

@@ -22,7 +22,7 @@ import logging
import re
from collections.abc import Hashable
from datetime import datetime
from typing import Any, Optional, TYPE_CHECKING
from typing import Any, cast, Optional, TYPE_CHECKING
import sqlalchemy as sqla
from flask import current_app as app
@@ -64,7 +64,7 @@ from superset.sql.parse import (
Table,
)
from superset.sqllab.limiting_factor import LimitingFactor
from superset.superset_typing import ExplorableData, QueryObjectDict
from superset.superset_typing import DatasetColumnData, ExplorableData, QueryObjectDict
from superset.utils import json
from superset.utils.core import (
get_column_name,
@@ -258,7 +258,7 @@ class Query(
],
"filter_select": True,
"name": self.tab_name,
"columns": [o.data for o in self.columns],
"columns": [cast(DatasetColumnData, o.data) for o in self.columns],
"metrics": [],
"id": self.id,
"type": self.type,

View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -0,0 +1,947 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Functions for mapping `QueryObject` to semantic layers.
These functions validate and convert a `QueryObject` into one or more `SemanticQuery`,
which are then passed to semantic layer implementations for execution, returning a
single dataframe.
"""
from datetime import datetime, timedelta
from time import time
from typing import Any, cast, Sequence, TypeGuard
import numpy as np
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
from superset_core.semantic_layers.types import (
AdhocExpression,
Day,
Dimension,
Filter,
FilterValues,
Grain,
GroupLimit,
Hour,
Metric,
Minute,
Month,
Operator,
OrderDirection,
OrderTuple,
PredicateType,
Quarter,
Second,
SemanticQuery,
SemanticResult,
Week,
Year,
)
from superset.common.db_query_status import QueryStatus
from superset.common.query_object import QueryObject
from superset.common.utils.time_range_utils import get_since_until_from_query_object
from superset.connectors.sqla.models import BaseDatasource
from superset.constants import NO_TIME_RANGE
from superset.models.helpers import QueryResult
from superset.superset_typing import AdhocColumn
from superset.utils.core import (
FilterOperator,
QueryObjectFilterClause,
TIME_COMPARISON,
)
from superset.utils.date_parser import get_past_or_future
class ValidatedQueryObjectFilterClause(QueryObjectFilterClause):
"""
A validated QueryObject filter clause with a string column name.
The `col` in a `QueryObjectFilterClause` can be either a string (column name) or an
adhoc column, but we only support the former in semantic layers.
"""
# overwrite to narrow type; mypy complains about more restrictive typed dicts,
# but the alternative would be to redefine the object
col: str # type: ignore[misc]
op: str # type: ignore[misc]
class ValidatedQueryObject(QueryObject):
"""
A query object that has a datasource defined.
"""
datasource: BaseDatasource
# overwrite to narrow type; mypy complains about the assignment since the base type
# allows adhoc filters, but we only support validated filters here
filter: list[ValidatedQueryObjectFilterClause] # type: ignore[assignment]
series_columns: Sequence[str] # type: ignore[assignment]
series_limit_metric: str | None
def get_results(query_object: QueryObject) -> QueryResult:
"""
Run 1+ queries based on `QueryObject` and return the results.
:param query_object: The QueryObject containing query specifications
:return: QueryResult compatible with Superset's query interface
"""
if not validate_query_object(query_object):
raise ValueError("QueryObject must have a datasource defined.")
# Track execution time
start_time = time()
semantic_view = query_object.datasource.implementation
dispatcher = (
semantic_view.get_row_count
if query_object.is_rowcount
else semantic_view.get_dataframe
)
# Step 1: Convert QueryObject to list of SemanticQuery objects
# The first query is the main query, subsequent queries are for time offsets
queries = map_query_object(query_object)
# Step 2: Execute the main query (first in the list)
main_query = queries[0]
main_result = dispatcher(
metrics=main_query.metrics,
dimensions=main_query.dimensions,
filters=main_query.filters,
order=main_query.order,
limit=main_query.limit,
offset=main_query.offset,
group_limit=main_query.group_limit,
)
main_df = main_result.results
# Collect all requests (SQL queries, HTTP requests, etc.) for troubleshooting
all_requests = list(main_result.requests)
# If no time offsets, return the main result as-is
if not query_object.time_offsets or len(queries) <= 1:
semantic_result = SemanticResult(
requests=all_requests,
results=main_df,
)
duration = timedelta(seconds=time() - start_time)
return map_semantic_result_to_query_result(
semantic_result,
query_object,
duration,
)
# Get metric names from the main query
# These are the columns that will be renamed with offset suffixes
metric_names = [metric.name for metric in main_query.metrics]
# Join keys are all columns except metrics
# These will be used to match rows between main and offset DataFrames
join_keys = [col for col in main_df.columns if col not in metric_names]
# Step 3 & 4: Execute each time offset query and join results
for offset_query, time_offset in zip(
queries[1:],
query_object.time_offsets,
strict=False,
):
# Execute the offset query
result = dispatcher(
metrics=offset_query.metrics,
dimensions=offset_query.dimensions,
filters=offset_query.filters,
order=offset_query.order,
limit=offset_query.limit,
offset=offset_query.offset,
group_limit=offset_query.group_limit,
)
# Add this query's requests to the collection
all_requests.extend(result.requests)
offset_df = result.results
# Handle empty results - add NaN columns directly instead of merging
# This avoids dtype mismatch issues with empty DataFrames
if offset_df.empty:
# Add offset metric columns with NaN values directly to main_df
for metric in metric_names:
offset_col_name = TIME_COMPARISON.join([metric, time_offset])
main_df[offset_col_name] = np.nan
else:
# Rename metric columns with time offset suffix
# Format: "{metric_name}__{time_offset}"
# Example: "revenue" -> "revenue__1 week ago"
offset_df = offset_df.rename(
columns={
metric: TIME_COMPARISON.join([metric, time_offset])
for metric in metric_names
}
)
# Step 5: Perform left join on dimension columns
# This preserves all rows from main_df and adds offset metrics
# where they match
main_df = main_df.merge(
offset_df,
on=join_keys,
how="left",
suffixes=("", "__duplicate"),
)
# Clean up any duplicate columns that might have been created
# (shouldn't happen with proper join keys, but defensive programming)
duplicate_cols = [
col for col in main_df.columns if col.endswith("__duplicate")
]
if duplicate_cols:
main_df = main_df.drop(columns=duplicate_cols)
# Convert final result to QueryResult
semantic_result = SemanticResult(requests=all_requests, results=main_df)
duration = timedelta(seconds=time() - start_time)
return map_semantic_result_to_query_result(
semantic_result,
query_object,
duration,
)
def map_semantic_result_to_query_result(
semantic_result: SemanticResult,
query_object: ValidatedQueryObject,
duration: timedelta,
) -> QueryResult:
"""
Convert a SemanticResult to a QueryResult.
:param semantic_result: Result from the semantic layer
:param query_object: Original QueryObject (for passthrough attributes)
:param duration: Time taken to execute the query
:return: QueryResult compatible with Superset's query interface
"""
# Get the query string from requests (typically one or more SQL queries)
query_str = ""
if semantic_result.requests:
# Join all requests for display (could be multiple for time comparisons)
query_str = "\n\n".join(
f"-- {req.type}\n{req.definition}" for req in semantic_result.requests
)
return QueryResult(
# Core data
df=semantic_result.results,
query=query_str,
duration=duration,
# Template filters - not applicable to semantic layers
# (semantic layers don't use Jinja templates)
applied_template_filters=None,
# Filter columns - not applicable to semantic layers
# (semantic layers handle filter validation internally)
applied_filter_columns=None,
rejected_filter_columns=None,
# Status - always success if we got here
# (errors would raise exceptions before reaching this point)
status=QueryStatus.SUCCESS,
error_message=None,
errors=None,
# Time range - pass through from original query_object
from_dttm=query_object.from_dttm,
to_dttm=query_object.to_dttm,
)
def _normalize_column(column: str | AdhocColumn, dimension_names: set[str]) -> str:
"""
Normalize a column to its dimension name.
Columns can be either:
- A string (dimension name directly)
- An AdhocColumn with isColumnReference=True and sqlExpression containing the
dimension name
"""
if isinstance(column, str):
return column
# Handle column references (e.g., from time-series charts)
if column.get("isColumnReference") and (sql_expr := column.get("sqlExpression")):
if sql_expr in dimension_names:
return sql_expr
raise ValueError("Adhoc dimensions are not supported in Semantic Views.")
def map_query_object(query_object: ValidatedQueryObject) -> list[SemanticQuery]:
"""
Convert a `QueryObject` into a list of `SemanticQuery`.
This function maps the `QueryObject` into query objects that focus less on
visualization and more on semantics.
"""
semantic_view = query_object.datasource.implementation
all_metrics = {metric.name: metric for metric in semantic_view.metrics}
all_dimensions = {
dimension.name: dimension for dimension in semantic_view.dimensions
}
# Normalize columns (may be dicts with isColumnReference=True for time-series)
dimension_names = set(all_dimensions.keys())
normalized_columns = {
_normalize_column(column, dimension_names) for column in query_object.columns
}
metrics = [all_metrics[metric] for metric in (query_object.metrics or [])]
grain = (
_convert_time_grain(query_object.extras["time_grain_sqla"])
if "time_grain_sqla" in query_object.extras
else None
)
dimensions = [
dimension
for dimension in semantic_view.dimensions
if dimension.name in normalized_columns
and (
# if a grain is specified, only include the time dimension if its grain
# matches the requested grain
grain is None
or dimension.name != query_object.granularity
or dimension.grain == grain
)
]
order = _get_order_from_query_object(query_object, all_metrics, all_dimensions)
limit = query_object.row_limit
offset = query_object.row_offset
group_limit = _get_group_limit_from_query_object(
query_object,
all_metrics,
all_dimensions,
)
queries = []
for time_offset in [None] + query_object.time_offsets:
filters = _get_filters_from_query_object(
query_object,
time_offset,
all_dimensions,
)
print(">>", filters)
queries.append(
SemanticQuery(
metrics=metrics,
dimensions=dimensions,
filters=filters,
order=order,
limit=limit,
offset=offset,
group_limit=group_limit,
)
)
return queries
def _get_filters_from_query_object(
query_object: ValidatedQueryObject,
time_offset: str | None,
all_dimensions: dict[str, Dimension],
) -> set[Filter]:
"""
Extract all filters from the query object, including time range filters.
This simplifies the complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm
by converting all time constraints into filters.
"""
filters: set[Filter] = set()
# 1. Add fetch values predicate if present
if (
query_object.apply_fetch_values_predicate
and query_object.datasource.fetch_values_predicate
):
filters.add(
Filter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value=query_object.datasource.fetch_values_predicate,
)
)
# 2. Add time range filter based on from_dttm/to_dttm
# For time offsets, this automatically calculates the shifted bounds
time_filters = _get_time_filter(query_object, time_offset, all_dimensions)
filters.update(time_filters)
# 3. Add filters from query_object.extras (WHERE and HAVING clauses)
extras_filters = _get_filters_from_extras(query_object.extras)
filters.update(extras_filters)
# 4. Add all other filters from query_object.filter
for filter_ in query_object.filter:
# Skip temporal range filters - we're using inner bounds instead
if (
filter_.get("op") == FilterOperator.TEMPORAL_RANGE.value
and query_object.granularity
):
continue
if converted_filters := _convert_query_object_filter(filter_, all_dimensions):
filters.update(converted_filters)
return filters
def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
"""
Extract filters from the extras dict.
The extras dict can contain various keys that affect query behavior:
Supported keys (converted to filters):
- "where": SQL WHERE clause expression (e.g., "customer_id > 100")
- "having": SQL HAVING clause expression (e.g., "SUM(sales) > 1000")
Other keys in extras (handled elsewhere in the mapper):
- "time_grain_sqla": Time granularity (e.g., "P1D", "PT1H")
Handled in _convert_time_grain() and used for dimension grain matching
Note: The WHERE and HAVING clauses from extras are SQL expressions that
are passed through as-is to the semantic layer as adhoc Filter objects.
"""
filters: set[Filter] = set()
# Add WHERE clause from extras
if where_clause := extras.get("where"):
filters.add(
Filter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value=where_clause,
)
)
# Add HAVING clause from extras
if having_clause := extras.get("having"):
filters.add(
Filter(
type=PredicateType.HAVING,
column=None,
operator=Operator.ADHOC,
value=having_clause,
)
)
return filters
def _get_time_filter(
query_object: ValidatedQueryObject,
time_offset: str | None,
all_dimensions: dict[str, Dimension],
) -> set[Filter]:
"""
Create a time range filter from the query object.
This handles both regular queries and time offset queries, simplifying the
complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm by using the
same time bounds for both the main query and series limit subqueries.
"""
filters: set[Filter] = set()
if not query_object.granularity:
return filters
time_dimension = all_dimensions.get(query_object.granularity)
if not time_dimension:
return filters
# Get the appropriate time bounds based on whether this is a time offset query
from_dttm, to_dttm = _get_time_bounds(query_object, time_offset)
if not from_dttm or not to_dttm:
return filters
# Create a filter with >= and < operators
return {
Filter(
type=PredicateType.WHERE,
column=time_dimension,
operator=Operator.GREATER_THAN_OR_EQUAL,
value=from_dttm,
),
Filter(
type=PredicateType.WHERE,
column=time_dimension,
operator=Operator.LESS_THAN,
value=to_dttm,
),
}
def _get_time_bounds(
query_object: ValidatedQueryObject,
time_offset: str | None,
) -> tuple[datetime | None, datetime | None]:
"""
Get the appropriate time bounds for the query.
For regular queries (time_offset is None), returns from_dttm/to_dttm.
For time offset queries, calculates the shifted bounds.
This simplifies the inner_from_dttm/inner_to_dttm complexity by using
the same bounds for both main queries and series limit subqueries (Option 1).
"""
if time_offset is None:
# Main query: use from_dttm/to_dttm directly
return query_object.from_dttm, query_object.to_dttm
# Time offset query: calculate shifted bounds
# Use from_dttm/to_dttm if available, otherwise try to get from time_range
outer_from = query_object.from_dttm
outer_to = query_object.to_dttm
if not outer_from or not outer_to:
# Fall back to parsing time_range if from_dttm/to_dttm not set
outer_from, outer_to = get_since_until_from_query_object(query_object)
if not outer_from or not outer_to:
return None, None
# Apply the offset to both bounds
offset_from = get_past_or_future(time_offset, outer_from)
offset_to = get_past_or_future(time_offset, outer_to)
return offset_from, offset_to
def _convert_query_object_filter(
filter_: ValidatedQueryObjectFilterClause,
all_dimensions: dict[str, Dimension],
) -> set[Filter] | None:
"""
Convert a QueryObject filter dict to a semantic layer Filter.
"""
operator_str = filter_["op"]
# Handle simple column filters
col = filter_.get("col")
if col not in all_dimensions:
return None
dimension = all_dimensions[col]
val_str = filter_["val"]
value: FilterValues | frozenset[FilterValues]
if val_str is None:
value = None
elif isinstance(val_str, (list, tuple)):
value = frozenset(val_str)
else:
value = val_str
# Special case for temporal range
if operator_str == FilterOperator.TEMPORAL_RANGE.value:
if not isinstance(value, str) or value == NO_TIME_RANGE:
return None
start, end = value.split(" : ")
return {
Filter(
type=PredicateType.WHERE,
column=dimension,
operator=Operator.GREATER_THAN_OR_EQUAL,
value=start,
),
Filter(
type=PredicateType.WHERE,
column=dimension,
operator=Operator.LESS_THAN,
value=end,
),
}
# Map QueryObject operators to semantic layer operators
operator_mapping = {
FilterOperator.EQUALS.value: Operator.EQUALS,
FilterOperator.NOT_EQUALS.value: Operator.NOT_EQUALS,
FilterOperator.GREATER_THAN.value: Operator.GREATER_THAN,
FilterOperator.LESS_THAN.value: Operator.LESS_THAN,
FilterOperator.GREATER_THAN_OR_EQUALS.value: Operator.GREATER_THAN_OR_EQUAL,
FilterOperator.LESS_THAN_OR_EQUALS.value: Operator.LESS_THAN_OR_EQUAL,
FilterOperator.IN.value: Operator.IN,
FilterOperator.NOT_IN.value: Operator.NOT_IN,
FilterOperator.LIKE.value: Operator.LIKE,
FilterOperator.NOT_LIKE.value: Operator.NOT_LIKE,
FilterOperator.IS_NULL.value: Operator.IS_NULL,
FilterOperator.IS_NOT_NULL.value: Operator.IS_NOT_NULL,
}
operator = operator_mapping.get(operator_str)
if not operator:
# Unknown operator - create adhoc filter
return None
return {
Filter(
type=PredicateType.WHERE,
column=dimension,
operator=operator,
value=value,
)
}
def _get_order_from_query_object(
query_object: ValidatedQueryObject,
all_metrics: dict[str, Metric],
all_dimensions: dict[str, Dimension],
) -> list[OrderTuple]:
order: list[OrderTuple] = []
for element, ascending in query_object.orderby:
direction = OrderDirection.ASC if ascending else OrderDirection.DESC
# adhoc
if isinstance(element, dict):
if element["sqlExpression"] is not None:
order.append(
(
AdhocExpression(
id=element["label"] or element["sqlExpression"],
definition=element["sqlExpression"],
),
direction,
)
)
elif element in all_dimensions:
order.append((all_dimensions[element], direction))
elif element in all_metrics:
order.append((all_metrics[element], direction))
return order
def _get_group_limit_from_query_object(
query_object: ValidatedQueryObject,
all_metrics: dict[str, Metric],
all_dimensions: dict[str, Dimension],
) -> GroupLimit | None:
# no limit
if query_object.series_limit == 0 or not query_object.columns:
return None
dimensions = [all_dimensions[dim_id] for dim_id in query_object.series_columns]
top = query_object.series_limit
metric = (
all_metrics[query_object.series_limit_metric]
if query_object.series_limit_metric
else None
)
direction = OrderDirection.DESC if query_object.order_desc else OrderDirection.ASC
group_others = query_object.group_others_when_limit_reached
# Check if we need separate filters for the group limit subquery
# This happens when inner_from_dttm/inner_to_dttm differ from from_dttm/to_dttm
group_limit_filters = _get_group_limit_filters(query_object, all_dimensions)
return GroupLimit(
dimensions=dimensions,
top=top,
metric=metric,
direction=direction,
group_others=group_others,
filters=group_limit_filters,
)
def _get_group_limit_filters(
query_object: ValidatedQueryObject,
all_dimensions: dict[str, Dimension],
) -> set[Filter] | None:
"""
Get separate filters for the group limit subquery if needed.
This is used when inner_from_dttm/inner_to_dttm differ from from_dttm/to_dttm,
which happens during time comparison queries. The group limit subquery may need
different time bounds to determine the top N groups.
Returns None if the group limit should use the same filters as the main query.
"""
# Check if inner time bounds are explicitly set and differ from outer bounds
if (
query_object.inner_from_dttm is None
or query_object.inner_to_dttm is None
or (
query_object.inner_from_dttm == query_object.from_dttm
and query_object.inner_to_dttm == query_object.to_dttm
)
):
# No separate bounds needed - use the same filters as the main query
return None
# Create separate filters for the group limit subquery
filters: set[Filter] = set()
# Add time range filter using inner bounds
if query_object.granularity:
time_dimension = all_dimensions.get(query_object.granularity)
if (
time_dimension
and query_object.inner_from_dttm
and query_object.inner_to_dttm
):
filters.update(
{
Filter(
type=PredicateType.WHERE,
column=time_dimension,
operator=Operator.GREATER_THAN_OR_EQUAL,
value=query_object.inner_from_dttm,
),
Filter(
type=PredicateType.WHERE,
column=time_dimension,
operator=Operator.LESS_THAN,
value=query_object.inner_to_dttm,
),
}
)
# Add fetch values predicate if present
if (
query_object.apply_fetch_values_predicate
and query_object.datasource.fetch_values_predicate
):
filters.add(
Filter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value=query_object.datasource.fetch_values_predicate,
)
)
# Add filters from query_object.extras (WHERE and HAVING clauses)
extras_filters = _get_filters_from_extras(query_object.extras)
filters.update(extras_filters)
# Add all other non-temporal filters from query_object.filter
for filter_ in query_object.filter:
# Skip temporal range filters - we're using inner bounds instead
if (
filter_.get("op") == FilterOperator.TEMPORAL_RANGE.value
and query_object.granularity
):
continue
if converted_filters := _convert_query_object_filter(filter_, all_dimensions):
filters.update(converted_filters)
return filters if filters else None
def _convert_time_grain(time_grain: str) -> type[Grain] | None:
"""
Convert a time grain string from the query object to a Grain enum.
"""
mapping = {
grain.representation: grain
for grain in [
Second,
Minute,
Hour,
Day,
Week,
Month,
Quarter,
Year,
]
}
return mapping.get(time_grain)
def validate_query_object(
query_object: QueryObject,
) -> TypeGuard[ValidatedQueryObject]:
"""
Validate that the `QueryObject` is compatible with the `SemanticView`.
If some semantic view implementation supports these features we should add an
attribute to the `SemanticViewImplementation` to indicate support for them.
"""
if not query_object.datasource:
return False
query_object = cast(ValidatedQueryObject, query_object)
_validate_metrics(query_object)
_validate_dimensions(query_object)
_validate_filters(query_object)
_validate_granularity(query_object)
_validate_group_limit(query_object)
_validate_orderby(query_object)
return True
def _validate_metrics(query_object: ValidatedQueryObject) -> None:
"""
Make sure metrics are defined in the semantic view.
"""
semantic_view = query_object.datasource.implementation
if any(not isinstance(metric, str) for metric in (query_object.metrics or [])):
raise ValueError("Adhoc metrics are not supported in Semantic Views.")
metric_names = {metric.name for metric in semantic_view.metrics}
if not set(query_object.metrics or []) <= metric_names:
raise ValueError("All metrics must be defined in the Semantic View.")
def _validate_dimensions(query_object: ValidatedQueryObject) -> None:
"""
Make sure all dimensions are defined in the semantic view.
"""
semantic_view = query_object.datasource.implementation
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
# Normalize all columns to dimension names
normalized_columns = [
_normalize_column(column, dimension_names) for column in query_object.columns
]
if not set(normalized_columns) <= dimension_names:
raise ValueError("All dimensions must be defined in the Semantic View.")
def _validate_filters(query_object: ValidatedQueryObject) -> None:
"""
Make sure all filters are valid.
"""
for filter_ in query_object.filter:
if isinstance(filter_["col"], dict):
raise ValueError(
"Adhoc columns are not supported in Semantic View filters."
)
if not filter_.get("op"):
raise ValueError("All filters must have an operator defined.")
def _validate_granularity(query_object: ValidatedQueryObject) -> None:
"""
Make sure time column and time grain are valid.
"""
semantic_view = query_object.datasource.implementation
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
if time_column := query_object.granularity:
if time_column not in dimension_names:
raise ValueError(
"The time column must be defined in the Semantic View dimensions."
)
if time_grain := query_object.extras.get("time_grain_sqla"):
if not time_column:
raise ValueError(
"A time column must be specified when a time grain is provided."
)
supported_time_grains = {
dimension.grain
for dimension in semantic_view.dimensions
if dimension.name == time_column and dimension.grain
}
if _convert_time_grain(time_grain) not in supported_time_grains:
raise ValueError(
"The time grain is not supported for the time column in the "
"Semantic View."
)
def _validate_group_limit(query_object: ValidatedQueryObject) -> None:
"""
Validate group limit related features in the query object.
"""
semantic_view = query_object.datasource.implementation
# no limit
if query_object.series_limit == 0:
return
if (
query_object.series_columns
and SemanticViewFeature.GROUP_LIMIT not in semantic_view.features
):
raise ValueError("Group limit is not supported in this Semantic View.")
if any(not isinstance(col, str) for col in query_object.series_columns):
raise ValueError("Adhoc dimensions are not supported in series columns.")
metric_names = {metric.name for metric in semantic_view.metrics}
if query_object.series_limit_metric and (
not isinstance(query_object.series_limit_metric, str)
or query_object.series_limit_metric not in metric_names
):
raise ValueError(
"The series limit metric must be defined in the Semantic View."
)
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
if not set(query_object.series_columns) <= dimension_names:
raise ValueError("All series columns must be defined in the Semantic View.")
if (
query_object.group_others_when_limit_reached
and SemanticViewFeature.GROUP_OTHERS not in semantic_view.features
):
raise ValueError(
"Grouping others when limit is reached is not supported in this Semantic "
"View."
)
def _validate_orderby(query_object: ValidatedQueryObject) -> None:
"""
Validate order by elements in the query object.
"""
semantic_view = query_object.datasource.implementation
if (
any(not isinstance(element, str) for element, _ in query_object.orderby)
and SemanticViewFeature.ADHOC_EXPRESSIONS_IN_ORDERBY
not in semantic_view.features
):
raise ValueError(
"Adhoc expressions in order by are not supported in this Semantic View."
)
elements = {orderby[0] for orderby in query_object.orderby}
metric_names = {metric.name for metric in semantic_view.metrics}
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
if not elements <= metric_names | dimension_names:
raise ValueError("All order by elements must be defined in the Semantic View.")

View File

@@ -0,0 +1,398 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Semantic layer models."""
from __future__ import annotations
import uuid
from collections.abc import Hashable
from dataclasses import dataclass
from functools import cached_property
from typing import Any, TYPE_CHECKING
from flask_appbuilder import Model
from sqlalchemy import Column, ForeignKey, Identity, Integer, String, Text
from sqlalchemy.orm import relationship
from sqlalchemy_utils import UUIDType
from sqlalchemy_utils.types.json import JSONType
from superset_core.semantic_layers.semantic_layer import (
SemanticLayer as SemanticLayerProtocol,
)
from superset_core.semantic_layers.semantic_view import (
SemanticView as SemanticViewProtocol,
)
from superset_core.semantic_layers.types import (
BINARY,
BOOLEAN,
DATE,
DATETIME,
DECIMAL,
INTEGER,
INTERVAL,
NUMBER,
OBJECT,
STRING,
TIME,
Type,
)
from superset.common.query_object import QueryObject
from superset.explorables.base import TimeGrainDict
from superset.extensions import encrypted_field_factory
from superset.models.helpers import AuditMixinNullable, QueryResult
from superset.semantic_layers.mapper import get_results
from superset.semantic_layers.registry import registry
from superset.utils import json
from superset.utils.core import GenericDataType
if TYPE_CHECKING:
from superset.superset_typing import ExplorableData, QueryObjectDict
def get_column_type(semantic_type: type[Type]) -> GenericDataType:
"""
Map semantic layer types to generic data types.
"""
if semantic_type in {DATE, DATETIME, TIME}:
return GenericDataType.TEMPORAL
if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}:
return GenericDataType.NUMERIC
if semantic_type is BOOLEAN:
return GenericDataType.BOOLEAN
if semantic_type in {STRING, OBJECT, BINARY}:
return GenericDataType.STRING
return GenericDataType.STRING
@dataclass(frozen=True)
class MetricMetadata:
metric_name: str
expression: str
verbose_name: str | None = None
description: str | None = None
d3format: str | None = None
currency: dict[str, Any] | None = None
warning_text: str | None = None
certified_by: str | None = None
certification_details: str | None = None
@dataclass(frozen=True)
class ColumnMetadata:
column_name: str
type: str
is_dttm: bool
verbose_name: str | None = None
description: str | None = None
groupby: bool = True
filterable: bool = True
expression: str | None = None
python_date_format: str | None = None
advanced_data_type: str | None = None
extra: str | None = None
class SemanticLayer(AuditMixinNullable, Model):
"""
Semantic layer model.
A semantic layer provides an abstraction over data sources,
allowing users to query data through a semantic interface.
"""
__tablename__ = "semantic_layers"
uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4)
# Core fields
name = Column(String(250), nullable=False)
description = Column(Text, nullable=True)
type = Column(String(250), nullable=False) # snowflake, etc
configuration = Column(encrypted_field_factory.create(JSONType), default=dict)
cache_timeout = Column(Integer, nullable=True)
# Semantic views relationship
semantic_views: list[SemanticView] = relationship(
"SemanticView",
back_populates="semantic_layer",
cascade="all, delete-orphan",
passive_deletes=True,
)
def __repr__(self) -> str:
return self.name or str(self.uuid)
@cached_property
def implementation(
self,
) -> SemanticLayerProtocol[Any, SemanticViewProtocol]:
"""
Return semantic layer implementation.
"""
# TODO (betodealmeida):
# return extension_manager.get_contribution("semanticLayers", self.type)
class_ = registry[self.type]
return class_.from_configuration(json.loads(self.configuration))
class SemanticView(AuditMixinNullable, Model):
"""
Semantic view model.
A semantic view represents a queryable view within a semantic layer.
"""
__tablename__ = "semantic_views"
uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4)
id = Column(Integer, Identity(), unique=True)
# Core fields
name = Column(String(250), nullable=False)
description = Column(Text, nullable=True)
configuration = Column(encrypted_field_factory.create(JSONType), default=dict)
cache_timeout = Column(Integer, nullable=True)
# Semantic layer relationship
semantic_layer_uuid = Column(
UUIDType(binary=True),
ForeignKey("semantic_layers.uuid", ondelete="CASCADE"),
nullable=False,
)
semantic_layer: SemanticLayer = relationship(
"SemanticLayer",
back_populates="semantic_views",
foreign_keys=[semantic_layer_uuid],
)
def __repr__(self) -> str:
return self.name or str(self.uuid)
@cached_property
def implementation(self) -> SemanticViewProtocol:
"""
Return semantic view implementation.
"""
return self.semantic_layer.implementation.get_semantic_view(
self.name,
json.loads(self.configuration),
)
# =========================================================================
# Explorable protocol implementation
# =========================================================================
def get_query_result(self, query_object: QueryObject) -> QueryResult:
return get_results(query_object)
def get_query_str(self, query_obj: QueryObjectDict) -> str:
return "Not implemented for semantic layers"
@property
def uid(self) -> str:
return self.implementation.uid()
@property
def type(self) -> str:
return "semantic_view"
@property
def metrics(self) -> list[MetricMetadata]:
return [
MetricMetadata(
metric_name=metric.name,
expression=metric.definition,
description=metric.description,
)
for metric in self.implementation.get_metrics()
]
@property
def columns(self) -> list[ColumnMetadata]:
return [
ColumnMetadata(
column_name=dimension.name,
type=dimension.type.__name__,
is_dttm=dimension.type in {DATE, TIME, DATETIME},
description=dimension.description,
expression=dimension.definition,
extra=json.dumps(
{"grain": dimension.grain.name if dimension.grain else None}
),
)
for dimension in self.implementation.get_dimensions()
]
@property
def column_names(self) -> list[str]:
return [dimension.name for dimension in self.implementation.get_dimensions()]
@property
def data(self) -> ExplorableData:
return {
# core
"id": self.id,
"uid": self.uid,
"type": "semantic_view",
"name": self.name,
"columns": [
{
"advanced_data_type": None,
"certification_details": None,
"certified_by": None,
"column_name": dimension.name,
"description": dimension.description,
"expression": dimension.definition,
"filterable": True,
"groupby": True,
"id": None,
"uuid": None,
"is_certified": False,
"is_dttm": dimension.type in {DATE, TIME, DATETIME},
"python_date_format": None,
"type": dimension.type.__name__,
"type_generic": get_column_type(dimension.type),
"verbose_name": None,
"warning_markdown": None,
}
for dimension in self.implementation.get_dimensions()
],
"metrics": [
{
"certification_details": None,
"certified_by": None,
"d3format": None,
"description": metric.description,
"expression": metric.definition,
"id": None,
"uuid": None,
"is_certified": False,
"metric_name": metric.name,
"warning_markdown": None,
"warning_text": None,
"verbose_name": None,
}
for metric in self.implementation.get_metrics()
],
"database": {},
# UI features
"verbose_map": {},
"order_by_choices": [],
"filter_select": True,
"filter_select_enabled": True,
"sql": None,
"select_star": None,
"owners": [],
"description": self.description,
"table_name": self.name,
"column_types": [
get_column_type(dimension.type)
for dimension in self.implementation.get_dimensions()
],
"column_names": [
dimension.name for dimension in self.implementation.get_dimensions()
],
# rare
"column_formats": {},
"datasource_name": self.name,
"perm": self.perm,
"offset": self.offset,
"cache_timeout": self.cache_timeout,
"params": None,
# sql-specific
"schema": None,
"catalog": None,
"main_dttm_col": None,
"time_grain_sqla": [],
"granularity_sqla": [],
"fetch_values_predicate": None,
"template_params": None,
"is_sqllab_view": False,
"extra": None,
"always_filter_main_dttm": False,
"normalize_columns": False,
# TODO XXX
# "owners": [owner.id for owner in self.owners],
"edit_url": "",
"default_endpoint": None,
"folders": [],
"health_check_message": None,
}
def data_for_slices(self, slices: list[Any]) -> dict[str, Any]:
return self.data
def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]:
return []
@property
def perm(self) -> str:
return self.semantic_layer_uuid.hex + "::" + self.uuid.hex
@property
def catalog_perm(self) -> str | None:
return None
@property
def schema_perm(self) -> str | None:
return None
@property
def schema(self) -> str | None:
return None
@property
def url(self) -> str:
return f"/semantic_view/{self.uuid}/"
@property
def explore_url(self) -> str:
return f"/explore/?datasource_type=semantic_view&datasource_id={self.id}"
@property
def offset(self) -> int:
# always return datetime as UTC
return 0
@property
def get_time_grains(self) -> list[TimeGrainDict]:
return [
{
"name": dimension.grain.name,
"function": "",
"duration": dimension.grain.representation,
}
for dimension in self.implementation.get_dimensions()
if dimension.grain
]
def has_drill_by_columns(self, column_names: list[str]) -> bool:
dimension_names = {
dimension.name for dimension in self.implementation.get_dimensions()
}
return all(column_name in dimension_names for column_name in column_names)
@property
def is_rls_supported(self) -> bool:
return False
@property
def query_language(self) -> str | None:
return None

View File

@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from superset_core.semantic_layers.semantic_layer import SemanticLayer
registry: dict[str, type[SemanticLayer]] = {}

View File

@@ -30,6 +30,46 @@ if TYPE_CHECKING:
SQLType: TypeAlias = TypeEngine | type[TypeEngine]
class DatasetColumnData(TypedDict, total=False):
"""Type for column metadata in ExplorableData datasets."""
advanced_data_type: str | None
certification_details: str | None
certified_by: str | None
column_name: str
description: str | None
expression: str | None
filterable: bool
groupby: bool
id: int | None
uuid: str | None
is_certified: bool
is_dttm: bool
python_date_format: str | None
type: str
type_generic: NotRequired["GenericDataType" | None]
verbose_name: str | None
warning_markdown: str | None
class DatasetMetricData(TypedDict, total=False):
"""Type for metric metadata in ExplorableData datasets."""
certification_details: str | None
certified_by: str | None
currency: NotRequired[dict[str, Any]]
d3format: str | None
description: str | None
expression: str | None
id: int | None
uuid: str | None
is_certified: bool
metric_name: str
warning_markdown: str | None
warning_text: str | None
verbose_name: str | None
class LegacyMetric(TypedDict):
label: str | None
@@ -254,7 +294,7 @@ class ExplorableData(TypedDict, total=False):
"""
# Core fields from BaseDatasource.data
id: int
id: int | str # String for UUID-based explorables like SemanticView
uid: str
column_formats: dict[str, str | None]
description: str | None
@@ -274,8 +314,8 @@ class ExplorableData(TypedDict, total=False):
perm: str | None
edit_url: str
sql: str | None
columns: list[dict[str, Any]]
metrics: list[dict[str, Any]]
columns: list["DatasetColumnData"]
metrics: list["DatasetMetricData"]
folders: Any # JSON field, can be list or dict
order_by_choices: list[tuple[str, str]]
owners: list[int] | list[dict[str, Any]] # Can be either format
@@ -283,8 +323,8 @@ class ExplorableData(TypedDict, total=False):
select_star: str | None
# Additional fields from SqlaTable and data_for_slices
column_types: list[Any]
column_names: set[str] | set[Any]
column_types: list["GenericDataType"]
column_names: set[str] | list[str]
granularity_sqla: list[tuple[Any, Any]]
time_grain_sqla: list[tuple[Any, Any]]
main_dttm_col: str | None

View File

@@ -96,7 +96,6 @@ from superset.exceptions import (
SupersetException,
SupersetTimeoutException,
)
from superset.explorables.base import Explorable
from superset.sql.parse import sanitize_clause
from superset.superset_typing import (
AdhocColumn,
@@ -115,7 +114,7 @@ from superset.utils.hashing import hash_from_dict, hash_from_str
from superset.utils.pandas import detect_datetime_format
if TYPE_CHECKING:
from superset.connectors.sqla.models import TableColumn
from superset.explorables.base import ColumnMetadata, Explorable
from superset.models.core import Database
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
@@ -200,6 +199,7 @@ class DatasourceType(StrEnum):
QUERY = "query"
SAVEDQUERY = "saved_query"
VIEW = "view"
SEMANTIC_VIEW = "semantic_view"
class LoggerLevel(StrEnum):
@@ -1730,15 +1730,12 @@ def get_metric_type_from_column(column: Any, datasource: Explorable) -> str:
:return: The inferred metric type as a string, or an empty string if the
column is not a metric or no valid operation is found.
"""
from superset.connectors.sqla.models import SqlMetric
metric: SqlMetric = next(
(metric for metric in datasource.metrics if metric.metric_name == column),
SqlMetric(metric_name=""),
metric = next(
(m for m in datasource.metrics if m.metric_name == column),
None,
)
if metric.metric_name == "":
if metric is None:
return ""
expression: str = metric.expression
@@ -1784,7 +1781,7 @@ def extract_dataframe_dtypes(
generic_types: list[GenericDataType] = []
for column in df.columns:
column_object = columns_by_name.get(column)
column_object = columns_by_name.get(str(column))
series = df[column]
inferred_type: str = ""
if series.isna().all():
@@ -1814,11 +1811,17 @@ def extract_dataframe_dtypes(
return generic_types
def extract_column_dtype(col: TableColumn) -> GenericDataType:
if col.is_temporal:
def extract_column_dtype(col: ColumnMetadata) -> GenericDataType:
# Check for temporal type
if hasattr(col, "is_temporal") and col.is_temporal:
return GenericDataType.TEMPORAL
if col.is_numeric:
if col.is_dttm:
return GenericDataType.TEMPORAL
# Check for numeric type
if hasattr(col, "is_numeric") and col.is_numeric:
return GenericDataType.NUMERIC
# TODO: add check for boolean data type when proper support is added
return GenericDataType.STRING
@@ -1832,9 +1835,7 @@ def get_time_filter_status(
applied_time_extras: dict[str, str],
) -> tuple[list[dict[str, str]], list[dict[str, str]]]:
temporal_columns: set[Any] = {
(col.column_name if hasattr(col, "column_name") else col.get("column_name"))
for col in datasource.columns
if (col.is_dttm if hasattr(col, "is_dttm") else col.get("is_dttm"))
col.column_name for col in datasource.columns if col.is_dttm
}
applied: list[dict[str, str]] = []
rejected: list[dict[str, str]] = []

View File

@@ -626,7 +626,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase):
assert response == {
"message": {
"datasource_type": [
"Must be one of: table, dataset, query, saved_query, view."
"Must be one of: table, dataset, query, saved_query, view, "
"semantic_view."
]
}
}
@@ -981,7 +982,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase):
assert response == {
"message": {
"datasource_type": [
"Must be one of: table, dataset, query, saved_query, view."
"Must be one of: table, dataset, query, saved_query, view, "
"semantic_view."
]
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,621 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Tests for semantic layer models."""
from __future__ import annotations
import uuid
from unittest.mock import MagicMock, patch
import pytest
from superset_core.semantic_layers.types import (
BINARY,
BOOLEAN,
DATE,
DATETIME,
DECIMAL,
INTEGER,
INTERVAL,
NUMBER,
OBJECT,
STRING,
TIME,
Day,
Dimension,
Metric,
Type,
)
from superset.semantic_layers.models import (
ColumnMetadata,
MetricMetadata,
SemanticLayer,
SemanticView,
get_column_type,
)
from superset.utils.core import GenericDataType
# =============================================================================
# get_column_type tests
# =============================================================================
def test_get_column_type_temporal_date() -> None:
"""Test that DATE maps to TEMPORAL."""
assert get_column_type(DATE) == GenericDataType.TEMPORAL
def test_get_column_type_temporal_datetime() -> None:
"""Test that DATETIME maps to TEMPORAL."""
assert get_column_type(DATETIME) == GenericDataType.TEMPORAL
def test_get_column_type_temporal_time() -> None:
"""Test that TIME maps to TEMPORAL."""
assert get_column_type(TIME) == GenericDataType.TEMPORAL
def test_get_column_type_numeric_integer() -> None:
"""Test that INTEGER maps to NUMERIC."""
assert get_column_type(INTEGER) == GenericDataType.NUMERIC
def test_get_column_type_numeric_number() -> None:
"""Test that NUMBER maps to NUMERIC."""
assert get_column_type(NUMBER) == GenericDataType.NUMERIC
def test_get_column_type_numeric_decimal() -> None:
"""Test that DECIMAL maps to NUMERIC."""
assert get_column_type(DECIMAL) == GenericDataType.NUMERIC
def test_get_column_type_numeric_interval() -> None:
"""Test that INTERVAL maps to NUMERIC."""
assert get_column_type(INTERVAL) == GenericDataType.NUMERIC
def test_get_column_type_boolean() -> None:
"""Test that BOOLEAN maps to BOOLEAN."""
assert get_column_type(BOOLEAN) == GenericDataType.BOOLEAN
def test_get_column_type_string() -> None:
"""Test that STRING maps to STRING."""
assert get_column_type(STRING) == GenericDataType.STRING
def test_get_column_type_object() -> None:
"""Test that OBJECT maps to STRING."""
assert get_column_type(OBJECT) == GenericDataType.STRING
def test_get_column_type_binary() -> None:
"""Test that BINARY maps to STRING."""
assert get_column_type(BINARY) == GenericDataType.STRING
def test_get_column_type_unknown() -> None:
"""Test that unknown types default to STRING."""
class UnknownType(Type):
pass
assert get_column_type(UnknownType) == GenericDataType.STRING
# =============================================================================
# MetricMetadata tests
# =============================================================================
def test_metric_metadata_required_fields() -> None:
"""Test MetricMetadata with required fields only."""
metadata = MetricMetadata(
metric_name="revenue",
expression="SUM(amount)",
)
assert metadata.metric_name == "revenue"
assert metadata.expression == "SUM(amount)"
assert metadata.verbose_name is None
assert metadata.description is None
assert metadata.d3format is None
assert metadata.currency is None
assert metadata.warning_text is None
assert metadata.certified_by is None
assert metadata.certification_details is None
def test_metric_metadata_all_fields() -> None:
"""Test MetricMetadata with all fields."""
metadata = MetricMetadata(
metric_name="revenue",
expression="SUM(amount)",
verbose_name="Total Revenue",
description="Sum of all revenue",
d3format="$,.2f",
currency={"symbol": "$", "symbolPosition": "prefix"},
warning_text="Data may be incomplete",
certified_by="Data Team",
certification_details="Verified Q1 2024",
)
assert metadata.metric_name == "revenue"
assert metadata.expression == "SUM(amount)"
assert metadata.verbose_name == "Total Revenue"
assert metadata.description == "Sum of all revenue"
assert metadata.d3format == "$,.2f"
assert metadata.currency == {"symbol": "$", "symbolPosition": "prefix"}
assert metadata.warning_text == "Data may be incomplete"
assert metadata.certified_by == "Data Team"
assert metadata.certification_details == "Verified Q1 2024"
# =============================================================================
# ColumnMetadata tests
# =============================================================================
def test_column_metadata_required_fields() -> None:
"""Test ColumnMetadata with required fields only."""
metadata = ColumnMetadata(
column_name="order_date",
type="DATE",
is_dttm=True,
)
assert metadata.column_name == "order_date"
assert metadata.type == "DATE"
assert metadata.is_dttm is True
assert metadata.verbose_name is None
assert metadata.description is None
assert metadata.groupby is True
assert metadata.filterable is True
assert metadata.expression is None
assert metadata.python_date_format is None
assert metadata.advanced_data_type is None
assert metadata.extra is None
def test_column_metadata_all_fields() -> None:
"""Test ColumnMetadata with all fields."""
metadata = ColumnMetadata(
column_name="order_date",
type="DATE",
is_dttm=True,
verbose_name="Order Date",
description="Date of the order",
groupby=True,
filterable=True,
expression="DATE(order_timestamp)",
python_date_format="%Y-%m-%d",
advanced_data_type="date",
extra='{"grain": "day"}',
)
assert metadata.column_name == "order_date"
assert metadata.type == "DATE"
assert metadata.is_dttm is True
assert metadata.verbose_name == "Order Date"
assert metadata.description == "Date of the order"
assert metadata.groupby is True
assert metadata.filterable is True
assert metadata.expression == "DATE(order_timestamp)"
assert metadata.python_date_format == "%Y-%m-%d"
assert metadata.advanced_data_type == "date"
assert metadata.extra == '{"grain": "day"}'
# =============================================================================
# SemanticLayer tests
# =============================================================================
def test_semantic_layer_repr_with_name() -> None:
"""Test SemanticLayer __repr__ with name."""
layer = SemanticLayer()
layer.name = "My Semantic Layer"
layer.uuid = uuid.uuid4()
assert repr(layer) == "My Semantic Layer"
def test_semantic_layer_repr_without_name() -> None:
"""Test SemanticLayer __repr__ without name (uses uuid)."""
layer = SemanticLayer()
layer.name = None
test_uuid = uuid.uuid4()
layer.uuid = test_uuid
assert repr(layer) == str(test_uuid)
def test_semantic_layer_implementation_not_implemented() -> None:
"""Test that implementation raises NotImplementedError."""
layer = SemanticLayer()
with pytest.raises(NotImplementedError):
_ = layer.implementation
# =============================================================================
# SemanticView tests
# =============================================================================
@pytest.fixture
def mock_dimensions() -> list[Dimension]:
"""Create mock dimensions for testing."""
return [
Dimension(
id="orders.order_date",
name="order_date",
type=DATE,
definition="orders.order_date",
description="Date of the order",
grain=Day,
),
Dimension(
id="products.category",
name="category",
type=STRING,
definition="products.category",
description="Product category",
grain=None,
),
]
@pytest.fixture
def mock_metrics() -> list[Metric]:
"""Create mock metrics for testing."""
return [
Metric(
id="orders.revenue",
name="revenue",
type=NUMBER,
definition="SUM(orders.amount)",
description="Total revenue",
),
Metric(
id="orders.count",
name="order_count",
type=INTEGER,
definition="COUNT(*)",
description="Number of orders",
),
]
@pytest.fixture
def mock_implementation(
mock_dimensions: list[Dimension],
mock_metrics: list[Metric],
) -> MagicMock:
"""Create a mock implementation."""
impl = MagicMock()
impl.get_dimensions.return_value = mock_dimensions
impl.get_metrics.return_value = mock_metrics
impl.uid.return_value = "semantic_view_uid_123"
return impl
@pytest.fixture
def semantic_view(mock_implementation: MagicMock) -> SemanticView:
"""Create a SemanticView with mocked implementation."""
view = SemanticView()
view.name = "Orders View"
view.description = "View of order data"
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
view.cache_timeout = 3600
view.configuration = "{}"
# Mock the implementation property
with patch.object(
SemanticView,
"implementation",
new_callable=lambda: property(lambda self: mock_implementation),
):
# We need to return the view but the patch won't persist
pass
return view
def test_semantic_view_repr_with_name() -> None:
"""Test SemanticView __repr__ with name."""
view = SemanticView()
view.name = "My View"
view.uuid = uuid.uuid4()
assert repr(view) == "My View"
def test_semantic_view_repr_without_name() -> None:
"""Test SemanticView __repr__ without name (uses uuid)."""
view = SemanticView()
view.name = None
test_uuid = uuid.uuid4()
view.uuid = test_uuid
assert repr(view) == str(test_uuid)
def test_semantic_view_type() -> None:
"""Test SemanticView type property."""
view = SemanticView()
assert view.type == "semantic_view"
def test_semantic_view_offset() -> None:
"""Test SemanticView offset property."""
view = SemanticView()
assert view.offset == 0
def test_semantic_view_is_rls_supported() -> None:
"""Test SemanticView is_rls_supported property."""
view = SemanticView()
assert view.is_rls_supported is False
def test_semantic_view_query_language() -> None:
"""Test SemanticView query_language property."""
view = SemanticView()
assert view.query_language is None
def test_semantic_view_get_query_str() -> None:
"""Test SemanticView get_query_str method."""
view = SemanticView()
result = view.get_query_str({})
assert result == "Not implemented for semantic layers"
def test_semantic_view_get_extra_cache_keys() -> None:
"""Test SemanticView get_extra_cache_keys method."""
view = SemanticView()
result = view.get_extra_cache_keys({})
assert result == []
def test_semantic_view_perm() -> None:
"""Test SemanticView perm property."""
view = SemanticView()
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
assert view.perm == "87654321432187654321876543218765::12345678123456781234567812345678"
def test_semantic_view_uid(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
mock_metrics: list[Metric],
) -> None:
"""Test SemanticView uid property."""
view = SemanticView()
view.name = "Test View"
view.uuid = uuid.uuid4()
view.semantic_layer_uuid = uuid.uuid4()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.uid == "semantic_view_uid_123"
def test_semantic_view_metrics(
mock_implementation: MagicMock,
mock_metrics: list[Metric],
) -> None:
"""Test SemanticView metrics property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
metrics = view.metrics
assert len(metrics) == 2
assert metrics[0].metric_name == "revenue"
assert metrics[0].expression == "SUM(orders.amount)"
assert metrics[0].description == "Total revenue"
assert metrics[1].metric_name == "order_count"
def test_semantic_view_columns(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test SemanticView columns property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
columns = view.columns
assert len(columns) == 2
assert columns[0].column_name == "order_date"
assert columns[0].type == "DATE"
assert columns[0].is_dttm is True
assert columns[0].description == "Date of the order"
assert columns[1].column_name == "category"
assert columns[1].type == "STRING"
assert columns[1].is_dttm is False
def test_semantic_view_column_names(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test SemanticView column_names property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
column_names = view.column_names
assert column_names == ["order_date", "category"]
def test_semantic_view_get_time_grains(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test SemanticView get_time_grains property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
time_grains = view.get_time_grains
assert len(time_grains) == 1
assert time_grains[0]["name"] == "Day"
assert time_grains[0]["duration"] == "P1D"
def test_semantic_view_has_drill_by_columns_all_exist(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test has_drill_by_columns when all columns exist."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.has_drill_by_columns(["order_date", "category"]) is True
def test_semantic_view_has_drill_by_columns_some_missing(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test has_drill_by_columns when some columns are missing."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.has_drill_by_columns(["order_date", "nonexistent"]) is False
def test_semantic_view_has_drill_by_columns_empty(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test has_drill_by_columns with empty list."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.has_drill_by_columns([]) is True
def test_semantic_view_data(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
mock_metrics: list[Metric],
) -> None:
"""Test SemanticView data property."""
view = SemanticView()
view.name = "Orders View"
view.description = "View of order data"
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
view.cache_timeout = 3600
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
data = view.data
# Check core fields
assert data["id"] == "12345678123456781234567812345678"
assert data["uid"] == "semantic_view_uid_123"
assert data["type"] == "semantic_view"
assert data["name"] == "Orders View"
assert data["description"] == "View of order data"
assert data["cache_timeout"] == 3600
# Check columns
assert len(data["columns"]) == 2
assert data["columns"][0]["column_name"] == "order_date"
assert data["columns"][0]["type"] == "DATE"
assert data["columns"][0]["is_dttm"] is True
assert data["columns"][0]["type_generic"] == GenericDataType.TEMPORAL
assert data["columns"][1]["column_name"] == "category"
assert data["columns"][1]["type"] == "STRING"
assert data["columns"][1]["type_generic"] == GenericDataType.STRING
# Check metrics
assert len(data["metrics"]) == 2
assert data["metrics"][0]["metric_name"] == "revenue"
assert data["metrics"][0]["expression"] == "SUM(orders.amount)"
assert data["metrics"][1]["metric_name"] == "order_count"
# Check column_types and column_names
assert data["column_types"] == [
GenericDataType.TEMPORAL,
GenericDataType.STRING,
]
assert data["column_names"] == {"order_date", "category"}
# Check other fields
assert data["table_name"] == "Orders View"
assert data["datasource_name"] == "Orders View"
assert data["offset"] == 0
def test_semantic_view_get_query_result(
mock_implementation: MagicMock,
) -> None:
"""Test SemanticView get_query_result method."""
view = SemanticView()
mock_query_object = MagicMock()
mock_result = MagicMock()
with patch(
"superset.semantic_layers.models.get_results",
return_value=mock_result,
) as mock_get_results:
result = view.get_query_result(mock_query_object)
mock_get_results.assert_called_once_with(mock_query_object)
assert result == mock_result
def test_semantic_view_implementation() -> None:
"""Test SemanticView implementation property."""
view = SemanticView()
view.name = "Test View"
view.configuration = '{"key": "value"}'
mock_semantic_layer = MagicMock()
mock_semantic_view_impl = MagicMock()
mock_semantic_layer.implementation.get_semantic_view.return_value = (
mock_semantic_view_impl
)
view.semantic_layer = mock_semantic_layer
# Clear cached property if it exists
if "implementation" in view.__dict__:
del view.__dict__["implementation"]
result = view.implementation
mock_semantic_layer.implementation.get_semantic_view.assert_called_once_with(
"Test View",
{"key": "value"},
)
assert result == mock_semantic_view_impl