Compare commits

...

4 Commits

Author SHA1 Message Date
Beto Dealmeida
b11ac4dd90 chore: container for testing 2026-02-10 15:39:50 -05:00
Beto Dealmeida
e182520bb3 feat: Explore integration 2026-02-10 15:39:50 -05:00
Beto Dealmeida
bfa4d5bd92 feat: models and DAOs 2026-02-10 15:38:15 -05:00
Beto Dealmeida
0e9c71e283 chore: remove AdhocFilter 2026-02-10 11:23:53 -05:00
22 changed files with 1729 additions and 86 deletions

View File

@@ -105,7 +105,12 @@ class CeleryConfig:
CELERY_CONFIG = CeleryConfig
FEATURE_FLAGS = {"ALERT_REPORTS": True, "DATASET_FOLDERS": True}
FEATURE_FLAGS = {
"ALERT_REPORTS": True,
"DATASET_FOLDERS": True,
"ENABLE_EXTENSIONS": True,
}
EXTENSIONS_PATH = "/app/docker/extensions"
ALERT_REPORTS_NOTIFICATION_DRY_RUN = True
WEBDRIVER_BASEURL = f"http://superset_app{os.environ.get('SUPERSET_APP_ROOT', '/')}/" # When using docker compose baseurl should be http://superset_nginx{ENV{BASEPATH}}/ # noqa: E501
# The base URL for the email report hyperlinks.

View File

@@ -21,7 +21,6 @@ import enum
from typing import Protocol, runtime_checkable
from superset_core.semantic_layers.types import (
AdhocFilter,
Dimension,
Filter,
GroupLimit,
@@ -69,7 +68,7 @@ class SemanticView(Protocol):
def get_values(
self,
dimension: Dimension,
filters: set[Filter | AdhocFilter] | None = None,
filters: set[Filter] | None = None,
) -> SemanticResult:
"""
Return distinct values for a dimension.
@@ -79,7 +78,7 @@ class SemanticView(Protocol):
self,
metrics: list[Metric],
dimensions: list[Dimension],
filters: set[Filter | AdhocFilter] | None = None,
filters: set[Filter] | None = None,
order: list[OrderTuple] | None = None,
limit: int | None = None,
offset: int | None = None,
@@ -94,7 +93,7 @@ class SemanticView(Protocol):
self,
metrics: list[Metric],
dimensions: list[Dimension],
filters: set[Filter | AdhocFilter] | None = None,
filters: set[Filter] | None = None,
order: list[OrderTuple] | None = None,
limit: int | None = None,
offset: int | None = None,

View File

@@ -216,7 +216,7 @@ class Metric:
name: str
type: TypeOf[Type]
definition: str | None
definition: str
description: str | None = None
@@ -239,6 +239,7 @@ class Operator(str, enum.Enum):
NOT_LIKE = "NOT LIKE"
IS_NULL = "IS NULL"
IS_NOT_NULL = "IS NOT NULL"
ADHOC = "ADHOC"
FilterValues = str | int | float | bool | datetime | date | time | timedelta | None
@@ -252,19 +253,11 @@ class PredicateType(enum.Enum):
@dataclass(frozen=True, order=True)
class Filter:
type: PredicateType
column: Dimension | Metric
column: Dimension | Metric | None
operator: Operator
value: FilterValues | frozenset[FilterValues]
# TODO (betodealmeida): convert into Operator:
# Filter(type=..., column=None, operator=Operator.AdHoc, value="some definition")
@dataclass(frozen=True, order=True)
class AdhocFilter:
type: PredicateType
definition: str
class OrderDirection(enum.Enum):
ASC = "ASC"
DESC = "DESC"
@@ -291,7 +284,7 @@ class GroupLimit:
metric: Metric | None
direction: OrderDirection = OrderDirection.DESC
group_others: bool = False
filters: set[Filter | AdhocFilter] | None = None
filters: set[Filter] | None = None
@dataclass(frozen=True)
@@ -328,7 +321,7 @@ class SemanticQuery:
metrics: list[Metric]
dimensions: list[Dimension]
filters: set[Filter | AdhocFilter] | None = None
filters: set[Filter] | None = None
order: list[OrderTuple] | None = None
limit: int | None = None
offset: int | None = None

View File

@@ -19,6 +19,15 @@
import { DatasourceType } from './types/Datasource';
const DATASOURCE_TYPE_MAP: Record<string, DatasourceType> = {
table: DatasourceType.Table,
query: DatasourceType.Query,
dataset: DatasourceType.Dataset,
sl_table: DatasourceType.SlTable,
saved_query: DatasourceType.SavedQuery,
semantic_view: DatasourceType.SemanticView,
};
export default class DatasourceKey {
readonly id: number;
@@ -27,8 +36,7 @@ export default class DatasourceKey {
constructor(key: string) {
const [idStr, typeStr] = key.split('__');
this.id = parseInt(idStr, 10);
this.type = DatasourceType.Table; // default to SqlaTable model
this.type = typeStr === 'query' ? DatasourceType.Query : this.type;
this.type = DATASOURCE_TYPE_MAP[typeStr] ?? DatasourceType.Table;
}
public toString() {

View File

@@ -26,6 +26,7 @@ export enum DatasourceType {
Dataset = 'dataset',
SlTable = 'sl_table',
SavedQuery = 'saved_query',
SemanticView = 'semantic_view',
}
export interface Currency {

View File

@@ -151,11 +151,8 @@ export const getSlicePayload = async (
const [id, typeString] = formData.datasource.split('__');
datasourceId = parseInt(id, 10);
const formattedTypeString =
typeString.charAt(0).toUpperCase() + typeString.slice(1);
if (formattedTypeString in DatasourceType) {
datasourceType =
DatasourceType[formattedTypeString as keyof typeof DatasourceType];
if (Object.values(DatasourceType).includes(typeString as DatasourceType)) {
datasourceType = typeString as DatasourceType;
}
}

View File

@@ -124,7 +124,7 @@ class GetExploreCommand(BaseCommand, ABC):
security_manager.raise_for_access(datasource=datasource)
viz_type = form_data.get("viz_type")
if not viz_type and datasource and datasource.default_endpoint:
if not viz_type and datasource and getattr(datasource, "default_endpoint", None):
raise WrongEndpointError(redirect=datasource.default_endpoint)
form_data["datasource"] = (

View File

@@ -107,6 +107,8 @@ from superset.sql.parse import Table
from superset.superset_typing import (
AdhocColumn,
AdhocMetric,
DatasetColumnData,
DatasetMetricData,
ExplorableData,
Metric,
QueryObjectDict,
@@ -463,8 +465,8 @@ class BaseDatasource(
# sqla-specific
"sql": self.sql,
# one to many
"columns": [o.data for o in self.columns],
"metrics": [o.data for o in self.metrics],
"columns": [cast(DatasetColumnData, o.data) for o in self.columns],
"metrics": [cast(DatasetMetricData, o.data) for o in self.metrics],
"folders": self.folders,
# TODO deprecate, move logic to JS
"order_by_choices": self.order_by_choices,

View File

@@ -0,0 +1,99 @@
"""
Script to create a Pandas semantic layer and Sales semantic view in Superset.
Run this inside the superset_app container:
python /app/superset/create_pandas_semantic_layer.py
"""
from __future__ import annotations
import logging
import sys
from typing import TYPE_CHECKING
# Add the Superset application directory to the Python path
sys.path.insert(0, "/app")
from superset.app import create_app
from superset.extensions import db
from superset.utils import json
if TYPE_CHECKING:
from superset.semantic_layers.models import SemanticLayer, SemanticView
app = create_app()
app.app_context().push()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
def create_pandas_semantic_layer() -> SemanticLayer:
"""Create a Pandas semantic layer with minimal configuration."""
from superset.semantic_layers.models import SemanticLayer
logger.info("Creating Pandas semantic layer...")
configuration = {
"dataset": "sales",
}
semantic_layer = SemanticLayer(
name="Pandas Semantic Layer",
description="In-memory semantic layer backed by a Pandas DataFrame",
type="pandas",
configuration=json.dumps(configuration),
cache_timeout=3600,
)
db.session.add(semantic_layer)
db.session.commit()
logger.info("Created semantic layer:")
logger.info(" Name: %s", semantic_layer.name)
logger.info(" UUID: %s", semantic_layer.uuid)
logger.info(" Type: %s", semantic_layer.type)
return semantic_layer
def create_sales_semantic_view(semantic_layer: SemanticLayer) -> SemanticView:
"""Create the Sales semantic view."""
from superset.semantic_layers.models import SemanticView
logger.info("Creating Sales semantic view...")
semantic_view = SemanticView(
name="sales",
configuration="{}",
cache_timeout=1800,
semantic_layer_uuid=semantic_layer.uuid,
)
db.session.add(semantic_view)
db.session.commit()
logger.info("Created semantic view:")
logger.info(" Name: %s", semantic_view.name)
logger.info(" UUID: %s", semantic_view.uuid)
logger.info(" Semantic Layer UUID: %s", semantic_view.semantic_layer_uuid)
return semantic_view
def main() -> None:
"""Main script execution."""
logger.info("=" * 60)
logger.info("Creating Pandas Semantic Layer and Sales Semantic View")
logger.info("=" * 60)
semantic_layer = create_pandas_semantic_layer()
create_sales_semantic_view(semantic_layer)
if __name__ == "__main__":
main()

View File

@@ -28,6 +28,7 @@ from superset.daos.exceptions import (
DatasourceValueIsIncorrect,
)
from superset.models.sql_lab import Query, SavedQuery
from superset.semantic_layers.models import SemanticView
from superset.utils.core import DatasourceType
logger = logging.getLogger(__name__)
@@ -40,6 +41,7 @@ class DatasourceDAO(BaseDAO[Datasource]):
DatasourceType.TABLE: SqlaTable,
DatasourceType.QUERY: Query,
DatasourceType.SAVEDQUERY: SavedQuery,
DatasourceType.SEMANTIC_VIEW: SemanticView,
}
@classmethod

View File

@@ -0,0 +1,152 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DAOs for semantic layer models."""
from __future__ import annotations
from superset.daos.base import BaseDAO
from superset.extensions import db
from superset.semantic_layers.models import SemanticLayer, SemanticView
class SemanticLayerDAO(BaseDAO[SemanticLayer]):
"""
Data Access Object for SemanticLayer model.
"""
@staticmethod
def validate_uniqueness(name: str) -> bool:
"""
Validate that semantic layer name is unique.
:param name: Semantic layer name
:return: True if name is unique, False otherwise
"""
query = db.session.query(SemanticLayer).filter(SemanticLayer.name == name)
return not db.session.query(query.exists()).scalar()
@staticmethod
def validate_update_uniqueness(layer_uuid: str, name: str) -> bool:
"""
Validate that semantic layer name is unique for updates.
:param layer_uuid: UUID of the semantic layer being updated
:param name: New name to validate
:return: True if name is unique, False otherwise
"""
query = db.session.query(SemanticLayer).filter(
SemanticLayer.name == name,
SemanticLayer.uuid != layer_uuid,
)
return not db.session.query(query.exists()).scalar()
@staticmethod
def find_by_name(name: str) -> SemanticLayer | None:
"""
Find semantic layer by name.
:param name: Semantic layer name
:return: SemanticLayer instance or None
"""
return (
db.session.query(SemanticLayer)
.filter(SemanticLayer.name == name)
.one_or_none()
)
@classmethod
def get_semantic_views(cls, layer_uuid: str) -> list[SemanticView]:
"""
Get all semantic views for a semantic layer.
:param layer_uuid: UUID of the semantic layer
:return: List of SemanticView instances
"""
return (
db.session.query(SemanticView)
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
.all()
)
class SemanticViewDAO(BaseDAO[SemanticView]):
"""Data Access Object for SemanticView model."""
@staticmethod
def find_by_semantic_layer(layer_uuid: str) -> list[SemanticView]:
"""
Find all views for a semantic layer.
:param layer_uuid: UUID of the semantic layer
:return: List of SemanticView instances
"""
return (
db.session.query(SemanticView)
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
.all()
)
@staticmethod
def validate_uniqueness(name: str, layer_uuid: str) -> bool:
"""
Validate that view name is unique within semantic layer.
:param name: View name
:param layer_uuid: UUID of the semantic layer
:return: True if name is unique within layer, False otherwise
"""
query = db.session.query(SemanticView).filter(
SemanticView.name == name,
SemanticView.semantic_layer_uuid == layer_uuid,
)
return not db.session.query(query.exists()).scalar()
@staticmethod
def validate_update_uniqueness(view_uuid: str, name: str, layer_uuid: str) -> bool:
"""
Validate that view name is unique within semantic layer for updates.
:param view_uuid: UUID of the view being updated
:param name: New name to validate
:param layer_uuid: UUID of the semantic layer
:return: True if name is unique within layer, False otherwise
"""
query = db.session.query(SemanticView).filter(
SemanticView.name == name,
SemanticView.semantic_layer_uuid == layer_uuid,
SemanticView.uuid != view_uuid,
)
return not db.session.query(query.exists()).scalar()
@staticmethod
def find_by_name(name: str, layer_uuid: str) -> SemanticView | None:
"""
Find semantic view by name within a semantic layer.
:param name: View name
:param layer_uuid: UUID of the semantic layer
:return: SemanticView instance or None
"""
return (
db.session.query(SemanticView)
.filter(
SemanticView.name == name,
SemanticView.semantic_layer_uuid == layer_uuid,
)
.one_or_none()
)

View File

@@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict):
duration: str | None
@runtime_checkable
class MetricMetadata(Protocol):
"""
Protocol for metric metadata objects.
Represents a metric that's available on an explorable data source.
Metrics contain SQL expressions or references to semantic layer measures.
Attributes:
metric_name: Unique identifier for the metric
expression: SQL expression or reference for calculating the metric
verbose_name: Human-readable name for display in the UI
description: Description of what the metric represents
d3format: D3 format string for formatting numeric values
currency: Currency configuration for the metric (JSON object)
warning_text: Warning message to display when using this metric
certified_by: Person or entity that certified this metric
certification_details: Details about the certification
"""
@property
def metric_name(self) -> str:
"""Unique identifier for the metric."""
@property
def expression(self) -> str:
"""SQL expression or reference for calculating the metric."""
@property
def verbose_name(self) -> str | None:
"""Human-readable name for display in the UI."""
@property
def description(self) -> str | None:
"""Description of what the metric represents."""
@property
def d3format(self) -> str | None:
"""D3 format string for formatting numeric values."""
@property
def currency(self) -> dict[str, Any] | None:
"""Currency configuration for the metric (JSON object)."""
@property
def warning_text(self) -> str | None:
"""Warning message to display when using this metric."""
@property
def certified_by(self) -> str | None:
"""Person or entity that certified this metric."""
@property
def certification_details(self) -> str | None:
"""Details about the certification."""
@runtime_checkable
class ColumnMetadata(Protocol):
"""
Protocol for column metadata objects.
Represents a column/dimension that's available on an explorable data source.
Used for grouping, filtering, and dimension-based analysis.
Attributes:
column_name: Unique identifier for the column
type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME')
is_dttm: Whether this column represents a date or time value
verbose_name: Human-readable name for display in the UI
description: Description of what the column represents
groupby: Whether this column is allowed for grouping/aggregation
filterable: Whether this column can be used in filters
expression: SQL expression if this is a calculated column
python_date_format: Python datetime format string for temporal columns
advanced_data_type: Advanced data type classification
extra: Additional metadata stored as JSON
"""
@property
def column_name(self) -> str:
"""Unique identifier for the column."""
@property
def type(self) -> str:
"""SQL data type of the column."""
@property
def is_dttm(self) -> bool:
"""Whether this column represents a date or time value."""
@property
def verbose_name(self) -> str | None:
"""Human-readable name for display in the UI."""
@property
def description(self) -> str | None:
"""Description of what the column represents."""
@property
def groupby(self) -> bool:
"""Whether this column is allowed for grouping/aggregation."""
@property
def filterable(self) -> bool:
"""Whether this column can be used in filters."""
@property
def expression(self) -> str | None:
"""SQL expression if this is a calculated column."""
@property
def python_date_format(self) -> str | None:
"""Python datetime format string for temporal columns."""
@property
def advanced_data_type(self) -> str | None:
"""Advanced data type classification."""
@property
def extra(self) -> str | None:
"""Additional metadata stored as JSON."""
@runtime_checkable
class Explorable(Protocol):
"""
@@ -132,7 +256,7 @@ class Explorable(Protocol):
"""
@property
def metrics(self) -> list[Any]:
def metrics(self) -> list[MetricMetadata]:
"""
List of metric metadata objects.
@@ -147,7 +271,7 @@ class Explorable(Protocol):
# TODO: rename to dimensions
@property
def columns(self) -> list[Any]:
def columns(self) -> list[ColumnMetadata]:
"""
List of column metadata objects.

View File

@@ -0,0 +1,144 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""add_semantic_layers_and_views
Revision ID: 33d7e0e21daa
Revises: 9787190b3d89
Create Date: 2025-11-04 11:26:00.000000
"""
import uuid
import sqlalchemy as sa
from alembic import op
from sqlalchemy_utils import UUIDType
from sqlalchemy_utils.types.json import JSONType
from superset.extensions import encrypted_field_factory
from superset.migrations.shared.utils import (
create_fks_for_table,
create_table,
drop_table,
)
# revision identifiers, used by Alembic.
revision = "33d7e0e21daa"
down_revision = "9787190b3d89"
def upgrade():
# Create semantic_layers table
create_table(
"semantic_layers",
sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False),
sa.Column("created_on", sa.DateTime(), nullable=True),
sa.Column("changed_on", sa.DateTime(), nullable=True),
sa.Column("name", sa.String(length=250), nullable=False),
sa.Column("description", sa.Text(), nullable=True),
sa.Column("type", sa.String(length=250), nullable=False),
sa.Column(
"configuration",
encrypted_field_factory.create(JSONType),
nullable=True,
),
sa.Column("cache_timeout", sa.Integer(), nullable=True),
sa.Column("created_by_fk", sa.Integer(), nullable=True),
sa.Column("changed_by_fk", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("uuid"),
)
# Create foreign key constraints for semantic_layers
create_fks_for_table(
"fk_semantic_layers_created_by_fk_ab_user",
"semantic_layers",
"ab_user",
["created_by_fk"],
["id"],
)
create_fks_for_table(
"fk_semantic_layers_changed_by_fk_ab_user",
"semantic_layers",
"ab_user",
["changed_by_fk"],
["id"],
)
# Create semantic_views table
create_table(
"semantic_views",
sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False),
sa.Column("id", sa.Integer(), sa.Identity(), unique=True, nullable=False),
sa.Column("created_on", sa.DateTime(), nullable=True),
sa.Column("changed_on", sa.DateTime(), nullable=True),
sa.Column("name", sa.String(length=250), nullable=False),
sa.Column("description", sa.Text(), nullable=True),
sa.Column(
"configuration",
encrypted_field_factory.create(JSONType),
nullable=True,
),
sa.Column("cache_timeout", sa.Integer(), nullable=True),
sa.Column(
"semantic_layer_uuid",
UUIDType(binary=True),
sa.ForeignKey("semantic_layers.uuid", ondelete="CASCADE"),
nullable=False,
),
sa.Column("created_by_fk", sa.Integer(), nullable=True),
sa.Column("changed_by_fk", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("uuid"),
)
# Create foreign key constraints for semantic_views
create_fks_for_table(
"fk_semantic_views_created_by_fk_ab_user",
"semantic_views",
"ab_user",
["created_by_fk"],
["id"],
)
create_fks_for_table(
"fk_semantic_views_changed_by_fk_ab_user",
"semantic_views",
"ab_user",
["changed_by_fk"],
["id"],
)
# Update chart datasource constraint to allow semantic_view
with op.batch_alter_table("slices") as batch_op:
batch_op.drop_constraint("ck_chart_datasource", type_="check")
batch_op.create_check_constraint(
"ck_chart_datasource",
"datasource_type in ('table', 'semantic_view')",
)
def downgrade():
# Restore original constraint
with op.batch_alter_table("slices") as batch_op:
batch_op.drop_constraint("ck_chart_datasource", type_="check")
batch_op.create_check_constraint(
"ck_chart_datasource", "datasource_type in ('table')"
)
drop_table("semantic_views")
drop_table("semantic_layers")

View File

@@ -22,7 +22,7 @@ import logging
import re
from collections.abc import Hashable
from datetime import datetime
from typing import Any, Optional, TYPE_CHECKING
from typing import Any, cast, Optional, TYPE_CHECKING
import sqlalchemy as sqla
from flask import current_app as app
@@ -64,7 +64,7 @@ from superset.sql.parse import (
Table,
)
from superset.sqllab.limiting_factor import LimitingFactor
from superset.superset_typing import ExplorableData, QueryObjectDict
from superset.superset_typing import DatasetColumnData, ExplorableData, QueryObjectDict
from superset.utils import json
from superset.utils.core import (
get_column_name,
@@ -258,7 +258,7 @@ class Query(
],
"filter_select": True,
"name": self.tab_name,
"columns": [o.data for o in self.columns],
"columns": [cast(DatasetColumnData, o.data) for o in self.columns],
"metrics": [],
"id": self.id,
"type": self.type,

View File

@@ -32,7 +32,6 @@ import numpy as np
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
from superset_core.semantic_layers.types import (
AdhocExpression,
AdhocFilter,
Day,
Dimension,
Filter,
@@ -370,14 +369,14 @@ def _get_filters_from_query_object(
query_object: ValidatedQueryObject,
time_offset: str | None,
all_dimensions: dict[str, Dimension],
) -> set[Filter | AdhocFilter]:
) -> set[Filter]:
"""
Extract all filters from the query object, including time range filters.
This simplifies the complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm
by converting all time constraints into filters.
"""
filters: set[Filter | AdhocFilter] = set()
filters: set[Filter] = set()
# 1. Add fetch values predicate if present
if (
@@ -385,9 +384,11 @@ def _get_filters_from_query_object(
and query_object.datasource.fetch_values_predicate
):
filters.add(
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition=query_object.datasource.fetch_values_predicate,
column=None,
operator=Operator.ADHOC,
value=query_object.datasource.fetch_values_predicate,
)
)
@@ -415,7 +416,7 @@ def _get_filters_from_query_object(
return filters
def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
"""
Extract filters from the extras dict.
@@ -430,25 +431,29 @@ def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
Handled in _convert_time_grain() and used for dimension grain matching
Note: The WHERE and HAVING clauses from extras are SQL expressions that
are passed through as-is to the semantic layer as AdhocFilter objects.
are passed through as-is to the semantic layer as adhoc Filter objects.
"""
filters: set[AdhocFilter] = set()
filters: set[Filter] = set()
# Add WHERE clause from extras
if where_clause := extras.get("where"):
filters.add(
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition=where_clause,
column=None,
operator=Operator.ADHOC,
value=where_clause,
)
)
# Add HAVING clause from extras
if having_clause := extras.get("having"):
filters.add(
AdhocFilter(
Filter(
type=PredicateType.HAVING,
definition=having_clause,
column=None,
operator=Operator.ADHOC,
value=having_clause,
)
)
@@ -540,7 +545,7 @@ def _convert_query_object_filter(
all_dimensions: dict[str, Dimension],
) -> set[Filter] | None:
"""
Convert a QueryObject filter dict to a semantic layer Filter or AdhocFilter.
Convert a QueryObject filter dict to a semantic layer Filter.
"""
operator_str = filter_["op"]
@@ -676,7 +681,7 @@ def _get_group_limit_from_query_object(
def _get_group_limit_filters(
query_object: ValidatedQueryObject,
all_dimensions: dict[str, Dimension],
) -> set[Filter | AdhocFilter] | None:
) -> set[Filter] | None:
"""
Get separate filters for the group limit subquery if needed.
@@ -699,7 +704,7 @@ def _get_group_limit_filters(
return None
# Create separate filters for the group limit subquery
filters: set[Filter | AdhocFilter] = set()
filters: set[Filter] = set()
# Add time range filter using inner bounds
if query_object.granularity:
@@ -732,9 +737,11 @@ def _get_group_limit_filters(
and query_object.datasource.fetch_values_predicate
):
filters.add(
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition=query_object.datasource.fetch_values_predicate,
column=None,
operator=Operator.ADHOC,
value=query_object.datasource.fetch_values_predicate,
)
)

View File

@@ -0,0 +1,398 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Semantic layer models."""
from __future__ import annotations
import uuid
from collections.abc import Hashable
from dataclasses import dataclass
from functools import cached_property
from typing import Any, TYPE_CHECKING
from flask_appbuilder import Model
from sqlalchemy import Column, ForeignKey, Identity, Integer, String, Text
from sqlalchemy.orm import relationship
from sqlalchemy_utils import UUIDType
from sqlalchemy_utils.types.json import JSONType
from superset_core.semantic_layers.semantic_layer import (
SemanticLayer as SemanticLayerProtocol,
)
from superset_core.semantic_layers.semantic_view import (
SemanticView as SemanticViewProtocol,
)
from superset_core.semantic_layers.types import (
BINARY,
BOOLEAN,
DATE,
DATETIME,
DECIMAL,
INTEGER,
INTERVAL,
NUMBER,
OBJECT,
STRING,
TIME,
Type,
)
from superset.common.query_object import QueryObject
from superset.explorables.base import TimeGrainDict
from superset.extensions import encrypted_field_factory
from superset.models.helpers import AuditMixinNullable, QueryResult
from superset.semantic_layers.mapper import get_results
from superset.semantic_layers.registry import registry
from superset.utils import json
from superset.utils.core import GenericDataType
if TYPE_CHECKING:
from superset.superset_typing import ExplorableData, QueryObjectDict
def get_column_type(semantic_type: type[Type]) -> GenericDataType:
"""
Map semantic layer types to generic data types.
"""
if semantic_type in {DATE, DATETIME, TIME}:
return GenericDataType.TEMPORAL
if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}:
return GenericDataType.NUMERIC
if semantic_type is BOOLEAN:
return GenericDataType.BOOLEAN
if semantic_type in {STRING, OBJECT, BINARY}:
return GenericDataType.STRING
return GenericDataType.STRING
@dataclass(frozen=True)
class MetricMetadata:
metric_name: str
expression: str
verbose_name: str | None = None
description: str | None = None
d3format: str | None = None
currency: dict[str, Any] | None = None
warning_text: str | None = None
certified_by: str | None = None
certification_details: str | None = None
@dataclass(frozen=True)
class ColumnMetadata:
column_name: str
type: str
is_dttm: bool
verbose_name: str | None = None
description: str | None = None
groupby: bool = True
filterable: bool = True
expression: str | None = None
python_date_format: str | None = None
advanced_data_type: str | None = None
extra: str | None = None
class SemanticLayer(AuditMixinNullable, Model):
"""
Semantic layer model.
A semantic layer provides an abstraction over data sources,
allowing users to query data through a semantic interface.
"""
__tablename__ = "semantic_layers"
uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4)
# Core fields
name = Column(String(250), nullable=False)
description = Column(Text, nullable=True)
type = Column(String(250), nullable=False) # snowflake, etc
configuration = Column(encrypted_field_factory.create(JSONType), default=dict)
cache_timeout = Column(Integer, nullable=True)
# Semantic views relationship
semantic_views: list[SemanticView] = relationship(
"SemanticView",
back_populates="semantic_layer",
cascade="all, delete-orphan",
passive_deletes=True,
)
def __repr__(self) -> str:
return self.name or str(self.uuid)
@cached_property
def implementation(
self,
) -> SemanticLayerProtocol[Any, SemanticViewProtocol]:
"""
Return semantic layer implementation.
"""
# TODO (betodealmeida):
# return extension_manager.get_contribution("semanticLayers", self.type)
class_ = registry[self.type]
return class_.from_configuration(json.loads(self.configuration))
class SemanticView(AuditMixinNullable, Model):
"""
Semantic view model.
A semantic view represents a queryable view within a semantic layer.
"""
__tablename__ = "semantic_views"
uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4)
id = Column(Integer, Identity(), unique=True)
# Core fields
name = Column(String(250), nullable=False)
description = Column(Text, nullable=True)
configuration = Column(encrypted_field_factory.create(JSONType), default=dict)
cache_timeout = Column(Integer, nullable=True)
# Semantic layer relationship
semantic_layer_uuid = Column(
UUIDType(binary=True),
ForeignKey("semantic_layers.uuid", ondelete="CASCADE"),
nullable=False,
)
semantic_layer: SemanticLayer = relationship(
"SemanticLayer",
back_populates="semantic_views",
foreign_keys=[semantic_layer_uuid],
)
def __repr__(self) -> str:
return self.name or str(self.uuid)
@cached_property
def implementation(self) -> SemanticViewProtocol:
"""
Return semantic view implementation.
"""
return self.semantic_layer.implementation.get_semantic_view(
self.name,
json.loads(self.configuration),
)
# =========================================================================
# Explorable protocol implementation
# =========================================================================
def get_query_result(self, query_object: QueryObject) -> QueryResult:
return get_results(query_object)
def get_query_str(self, query_obj: QueryObjectDict) -> str:
return "Not implemented for semantic layers"
@property
def uid(self) -> str:
return self.implementation.uid()
@property
def type(self) -> str:
return "semantic_view"
@property
def metrics(self) -> list[MetricMetadata]:
return [
MetricMetadata(
metric_name=metric.name,
expression=metric.definition,
description=metric.description,
)
for metric in self.implementation.get_metrics()
]
@property
def columns(self) -> list[ColumnMetadata]:
return [
ColumnMetadata(
column_name=dimension.name,
type=dimension.type.__name__,
is_dttm=dimension.type in {DATE, TIME, DATETIME},
description=dimension.description,
expression=dimension.definition,
extra=json.dumps(
{"grain": dimension.grain.name if dimension.grain else None}
),
)
for dimension in self.implementation.get_dimensions()
]
@property
def column_names(self) -> list[str]:
return [dimension.name for dimension in self.implementation.get_dimensions()]
@property
def data(self) -> ExplorableData:
return {
# core
"id": self.id,
"uid": self.uid,
"type": "semantic_view",
"name": self.name,
"columns": [
{
"advanced_data_type": None,
"certification_details": None,
"certified_by": None,
"column_name": dimension.name,
"description": dimension.description,
"expression": dimension.definition,
"filterable": True,
"groupby": True,
"id": None,
"uuid": None,
"is_certified": False,
"is_dttm": dimension.type in {DATE, TIME, DATETIME},
"python_date_format": None,
"type": dimension.type.__name__,
"type_generic": get_column_type(dimension.type),
"verbose_name": None,
"warning_markdown": None,
}
for dimension in self.implementation.get_dimensions()
],
"metrics": [
{
"certification_details": None,
"certified_by": None,
"d3format": None,
"description": metric.description,
"expression": metric.definition,
"id": None,
"uuid": None,
"is_certified": False,
"metric_name": metric.name,
"warning_markdown": None,
"warning_text": None,
"verbose_name": None,
}
for metric in self.implementation.get_metrics()
],
"database": {},
# UI features
"verbose_map": {},
"order_by_choices": [],
"filter_select": True,
"filter_select_enabled": True,
"sql": None,
"select_star": None,
"owners": [],
"description": self.description,
"table_name": self.name,
"column_types": [
get_column_type(dimension.type)
for dimension in self.implementation.get_dimensions()
],
"column_names": [
dimension.name for dimension in self.implementation.get_dimensions()
],
# rare
"column_formats": {},
"datasource_name": self.name,
"perm": self.perm,
"offset": self.offset,
"cache_timeout": self.cache_timeout,
"params": None,
# sql-specific
"schema": None,
"catalog": None,
"main_dttm_col": None,
"time_grain_sqla": [],
"granularity_sqla": [],
"fetch_values_predicate": None,
"template_params": None,
"is_sqllab_view": False,
"extra": None,
"always_filter_main_dttm": False,
"normalize_columns": False,
# TODO XXX
# "owners": [owner.id for owner in self.owners],
"edit_url": "",
"default_endpoint": None,
"folders": [],
"health_check_message": None,
}
def data_for_slices(self, slices: list[Any]) -> dict[str, Any]:
return self.data
def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]:
return []
@property
def perm(self) -> str:
return self.semantic_layer_uuid.hex + "::" + self.uuid.hex
@property
def catalog_perm(self) -> str | None:
return None
@property
def schema_perm(self) -> str | None:
return None
@property
def schema(self) -> str | None:
return None
@property
def url(self) -> str:
return f"/semantic_view/{self.uuid}/"
@property
def explore_url(self) -> str:
return f"/explore/?datasource_type=semantic_view&datasource_id={self.id}"
@property
def offset(self) -> int:
# always return datetime as UTC
return 0
@property
def get_time_grains(self) -> list[TimeGrainDict]:
return [
{
"name": dimension.grain.name,
"function": "",
"duration": dimension.grain.representation,
}
for dimension in self.implementation.get_dimensions()
if dimension.grain
]
def has_drill_by_columns(self, column_names: list[str]) -> bool:
dimension_names = {
dimension.name for dimension in self.implementation.get_dimensions()
}
return all(column_name in dimension_names for column_name in column_names)
@property
def is_rls_supported(self) -> bool:
return False
@property
def query_language(self) -> str | None:
return None

View File

@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from superset_core.semantic_layers.semantic_layer import SemanticLayer
registry: dict[str, type[SemanticLayer]] = {}

View File

@@ -30,6 +30,46 @@ if TYPE_CHECKING:
SQLType: TypeAlias = TypeEngine | type[TypeEngine]
class DatasetColumnData(TypedDict, total=False):
"""Type for column metadata in ExplorableData datasets."""
advanced_data_type: str | None
certification_details: str | None
certified_by: str | None
column_name: str
description: str | None
expression: str | None
filterable: bool
groupby: bool
id: int | None
uuid: str | None
is_certified: bool
is_dttm: bool
python_date_format: str | None
type: str
type_generic: NotRequired["GenericDataType" | None]
verbose_name: str | None
warning_markdown: str | None
class DatasetMetricData(TypedDict, total=False):
"""Type for metric metadata in ExplorableData datasets."""
certification_details: str | None
certified_by: str | None
currency: NotRequired[dict[str, Any]]
d3format: str | None
description: str | None
expression: str | None
id: int | None
uuid: str | None
is_certified: bool
metric_name: str
warning_markdown: str | None
warning_text: str | None
verbose_name: str | None
class LegacyMetric(TypedDict):
label: str | None
@@ -254,7 +294,7 @@ class ExplorableData(TypedDict, total=False):
"""
# Core fields from BaseDatasource.data
id: int
id: int | str # String for UUID-based explorables like SemanticView
uid: str
column_formats: dict[str, str | None]
description: str | None
@@ -274,8 +314,8 @@ class ExplorableData(TypedDict, total=False):
perm: str | None
edit_url: str
sql: str | None
columns: list[dict[str, Any]]
metrics: list[dict[str, Any]]
columns: list["DatasetColumnData"]
metrics: list["DatasetMetricData"]
folders: Any # JSON field, can be list or dict
order_by_choices: list[tuple[str, str]]
owners: list[int] | list[dict[str, Any]] # Can be either format
@@ -283,8 +323,8 @@ class ExplorableData(TypedDict, total=False):
select_star: str | None
# Additional fields from SqlaTable and data_for_slices
column_types: list[Any]
column_names: set[str] | set[Any]
column_types: list["GenericDataType"]
column_names: set[str] | list[str]
granularity_sqla: list[tuple[Any, Any]]
time_grain_sqla: list[tuple[Any, Any]]
main_dttm_col: str | None

View File

@@ -96,7 +96,6 @@ from superset.exceptions import (
SupersetException,
SupersetTimeoutException,
)
from superset.explorables.base import Explorable
from superset.sql.parse import sanitize_clause
from superset.superset_typing import (
AdhocColumn,
@@ -115,7 +114,7 @@ from superset.utils.hashing import hash_from_dict, hash_from_str
from superset.utils.pandas import detect_datetime_format
if TYPE_CHECKING:
from superset.connectors.sqla.models import TableColumn
from superset.explorables.base import ColumnMetadata, Explorable
from superset.models.core import Database
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
@@ -200,6 +199,7 @@ class DatasourceType(StrEnum):
QUERY = "query"
SAVEDQUERY = "saved_query"
VIEW = "view"
SEMANTIC_VIEW = "semantic_view"
class LoggerLevel(StrEnum):
@@ -1730,15 +1730,12 @@ def get_metric_type_from_column(column: Any, datasource: Explorable) -> str:
:return: The inferred metric type as a string, or an empty string if the
column is not a metric or no valid operation is found.
"""
from superset.connectors.sqla.models import SqlMetric
metric: SqlMetric = next(
(metric for metric in datasource.metrics if metric.metric_name == column),
SqlMetric(metric_name=""),
metric = next(
(m for m in datasource.metrics if m.metric_name == column),
None,
)
if metric.metric_name == "":
if metric is None:
return ""
expression: str = metric.expression
@@ -1784,7 +1781,7 @@ def extract_dataframe_dtypes(
generic_types: list[GenericDataType] = []
for column in df.columns:
column_object = columns_by_name.get(column)
column_object = columns_by_name.get(str(column))
series = df[column]
inferred_type: str = ""
if series.isna().all():
@@ -1814,11 +1811,17 @@ def extract_dataframe_dtypes(
return generic_types
def extract_column_dtype(col: TableColumn) -> GenericDataType:
if col.is_temporal:
def extract_column_dtype(col: ColumnMetadata) -> GenericDataType:
# Check for temporal type
if hasattr(col, "is_temporal") and col.is_temporal:
return GenericDataType.TEMPORAL
if col.is_numeric:
if col.is_dttm:
return GenericDataType.TEMPORAL
# Check for numeric type
if hasattr(col, "is_numeric") and col.is_numeric:
return GenericDataType.NUMERIC
# TODO: add check for boolean data type when proper support is added
return GenericDataType.STRING
@@ -1832,9 +1835,7 @@ def get_time_filter_status(
applied_time_extras: dict[str, str],
) -> tuple[list[dict[str, str]], list[dict[str, str]]]:
temporal_columns: set[Any] = {
(col.column_name if hasattr(col, "column_name") else col.get("column_name"))
for col in datasource.columns
if (col.is_dttm if hasattr(col, "is_dttm") else col.get("is_dttm"))
col.column_name for col in datasource.columns if col.is_dttm
}
applied: list[dict[str, str]] = []
rejected: list[dict[str, str]] = []

View File

@@ -626,7 +626,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase):
assert response == {
"message": {
"datasource_type": [
"Must be one of: table, dataset, query, saved_query, view."
"Must be one of: table, dataset, query, saved_query, view, "
"semantic_view."
]
}
}
@@ -981,7 +982,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase):
assert response == {
"message": {
"datasource_type": [
"Must be one of: table, dataset, query, saved_query, view."
"Must be one of: table, dataset, query, saved_query, view, "
"semantic_view."
]
}
}

View File

@@ -24,7 +24,6 @@ from pytest_mock import MockerFixture
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
from superset_core.semantic_layers.types import (
AdhocExpression,
AdhocFilter,
Day,
Dimension,
Filter,
@@ -202,9 +201,11 @@ def test_get_filters_from_extras_where() -> None:
assert len(result) == 1
filter_ = next(iter(result))
assert isinstance(filter_, AdhocFilter)
assert isinstance(filter_, Filter)
assert filter_.type == PredicateType.WHERE
assert filter_.definition == "customer_id > 100"
assert filter_.column is None
assert filter_.operator == Operator.ADHOC
assert filter_.value == "customer_id > 100"
def test_get_filters_from_extras_having() -> None:
@@ -215,7 +216,12 @@ def test_get_filters_from_extras_having() -> None:
result = _get_filters_from_extras(extras)
assert result == {
AdhocFilter(type=PredicateType.HAVING, definition="SUM(sales) > 1000"),
Filter(
type=PredicateType.HAVING,
column=None,
operator=Operator.ADHOC,
value="SUM(sales) > 1000",
),
}
@@ -230,8 +236,18 @@ def test_get_filters_from_extras_both() -> None:
result = _get_filters_from_extras(extras)
assert result == {
AdhocFilter(type=PredicateType.WHERE, definition="region = 'US'"),
AdhocFilter(type=PredicateType.HAVING, definition="COUNT(*) > 10"),
Filter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value="region = 'US'",
),
Filter(
type=PredicateType.HAVING,
column=None,
operator=Operator.ADHOC,
value="COUNT(*) > 10",
),
}
@@ -450,9 +466,11 @@ def test_get_filters_from_query_object_with_extras(mock_datasource: MagicMock) -
operator=Operator.LESS_THAN,
value=datetime(2025, 10, 22),
),
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition="customer_id > 100",
column=None,
operator=Operator.ADHOC,
value="customer_id > 100",
),
}
@@ -494,9 +512,11 @@ def test_get_filters_from_query_object_with_fetch_values(
operator=Operator.LESS_THAN,
value=datetime(2025, 10, 22),
),
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition="tenant_id = 123",
column=None,
operator=Operator.ADHOC,
value="tenant_id = 123",
),
}
@@ -796,9 +816,11 @@ def test_get_group_limit_filters_with_extras(mock_datasource: MagicMock) -> None
operator=Operator.LESS_THAN,
value=datetime(2025, 10, 22),
),
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition="customer_id > 100",
column=None,
operator=Operator.ADHOC,
value="customer_id > 100",
),
}
@@ -2019,9 +2041,11 @@ def test_get_group_limit_filters_with_fetch_values_predicate(
assert result is not None
assert (
AdhocFilter(
Filter(
type=PredicateType.WHERE,
definition="tenant_id = 123",
column=None,
operator=Operator.ADHOC,
value="tenant_id = 123",
)
in result
)
@@ -2372,6 +2396,7 @@ def test_get_filters_from_query_object_with_filter_loop(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]
@@ -2444,6 +2469,7 @@ def test_get_group_limit_filters_with_filter_loop(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]
@@ -2555,6 +2581,7 @@ def test_get_filters_from_query_object_filter_returns_none(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]
@@ -2607,6 +2634,7 @@ def test_get_group_limit_filters_filter_returns_none(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]

View File

@@ -0,0 +1,621 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Tests for semantic layer models."""
from __future__ import annotations
import uuid
from unittest.mock import MagicMock, patch
import pytest
from superset_core.semantic_layers.types import (
BINARY,
BOOLEAN,
DATE,
DATETIME,
DECIMAL,
INTEGER,
INTERVAL,
NUMBER,
OBJECT,
STRING,
TIME,
Day,
Dimension,
Metric,
Type,
)
from superset.semantic_layers.models import (
ColumnMetadata,
MetricMetadata,
SemanticLayer,
SemanticView,
get_column_type,
)
from superset.utils.core import GenericDataType
# =============================================================================
# get_column_type tests
# =============================================================================
def test_get_column_type_temporal_date() -> None:
"""Test that DATE maps to TEMPORAL."""
assert get_column_type(DATE) == GenericDataType.TEMPORAL
def test_get_column_type_temporal_datetime() -> None:
"""Test that DATETIME maps to TEMPORAL."""
assert get_column_type(DATETIME) == GenericDataType.TEMPORAL
def test_get_column_type_temporal_time() -> None:
"""Test that TIME maps to TEMPORAL."""
assert get_column_type(TIME) == GenericDataType.TEMPORAL
def test_get_column_type_numeric_integer() -> None:
"""Test that INTEGER maps to NUMERIC."""
assert get_column_type(INTEGER) == GenericDataType.NUMERIC
def test_get_column_type_numeric_number() -> None:
"""Test that NUMBER maps to NUMERIC."""
assert get_column_type(NUMBER) == GenericDataType.NUMERIC
def test_get_column_type_numeric_decimal() -> None:
"""Test that DECIMAL maps to NUMERIC."""
assert get_column_type(DECIMAL) == GenericDataType.NUMERIC
def test_get_column_type_numeric_interval() -> None:
"""Test that INTERVAL maps to NUMERIC."""
assert get_column_type(INTERVAL) == GenericDataType.NUMERIC
def test_get_column_type_boolean() -> None:
"""Test that BOOLEAN maps to BOOLEAN."""
assert get_column_type(BOOLEAN) == GenericDataType.BOOLEAN
def test_get_column_type_string() -> None:
"""Test that STRING maps to STRING."""
assert get_column_type(STRING) == GenericDataType.STRING
def test_get_column_type_object() -> None:
"""Test that OBJECT maps to STRING."""
assert get_column_type(OBJECT) == GenericDataType.STRING
def test_get_column_type_binary() -> None:
"""Test that BINARY maps to STRING."""
assert get_column_type(BINARY) == GenericDataType.STRING
def test_get_column_type_unknown() -> None:
"""Test that unknown types default to STRING."""
class UnknownType(Type):
pass
assert get_column_type(UnknownType) == GenericDataType.STRING
# =============================================================================
# MetricMetadata tests
# =============================================================================
def test_metric_metadata_required_fields() -> None:
"""Test MetricMetadata with required fields only."""
metadata = MetricMetadata(
metric_name="revenue",
expression="SUM(amount)",
)
assert metadata.metric_name == "revenue"
assert metadata.expression == "SUM(amount)"
assert metadata.verbose_name is None
assert metadata.description is None
assert metadata.d3format is None
assert metadata.currency is None
assert metadata.warning_text is None
assert metadata.certified_by is None
assert metadata.certification_details is None
def test_metric_metadata_all_fields() -> None:
"""Test MetricMetadata with all fields."""
metadata = MetricMetadata(
metric_name="revenue",
expression="SUM(amount)",
verbose_name="Total Revenue",
description="Sum of all revenue",
d3format="$,.2f",
currency={"symbol": "$", "symbolPosition": "prefix"},
warning_text="Data may be incomplete",
certified_by="Data Team",
certification_details="Verified Q1 2024",
)
assert metadata.metric_name == "revenue"
assert metadata.expression == "SUM(amount)"
assert metadata.verbose_name == "Total Revenue"
assert metadata.description == "Sum of all revenue"
assert metadata.d3format == "$,.2f"
assert metadata.currency == {"symbol": "$", "symbolPosition": "prefix"}
assert metadata.warning_text == "Data may be incomplete"
assert metadata.certified_by == "Data Team"
assert metadata.certification_details == "Verified Q1 2024"
# =============================================================================
# ColumnMetadata tests
# =============================================================================
def test_column_metadata_required_fields() -> None:
"""Test ColumnMetadata with required fields only."""
metadata = ColumnMetadata(
column_name="order_date",
type="DATE",
is_dttm=True,
)
assert metadata.column_name == "order_date"
assert metadata.type == "DATE"
assert metadata.is_dttm is True
assert metadata.verbose_name is None
assert metadata.description is None
assert metadata.groupby is True
assert metadata.filterable is True
assert metadata.expression is None
assert metadata.python_date_format is None
assert metadata.advanced_data_type is None
assert metadata.extra is None
def test_column_metadata_all_fields() -> None:
"""Test ColumnMetadata with all fields."""
metadata = ColumnMetadata(
column_name="order_date",
type="DATE",
is_dttm=True,
verbose_name="Order Date",
description="Date of the order",
groupby=True,
filterable=True,
expression="DATE(order_timestamp)",
python_date_format="%Y-%m-%d",
advanced_data_type="date",
extra='{"grain": "day"}',
)
assert metadata.column_name == "order_date"
assert metadata.type == "DATE"
assert metadata.is_dttm is True
assert metadata.verbose_name == "Order Date"
assert metadata.description == "Date of the order"
assert metadata.groupby is True
assert metadata.filterable is True
assert metadata.expression == "DATE(order_timestamp)"
assert metadata.python_date_format == "%Y-%m-%d"
assert metadata.advanced_data_type == "date"
assert metadata.extra == '{"grain": "day"}'
# =============================================================================
# SemanticLayer tests
# =============================================================================
def test_semantic_layer_repr_with_name() -> None:
"""Test SemanticLayer __repr__ with name."""
layer = SemanticLayer()
layer.name = "My Semantic Layer"
layer.uuid = uuid.uuid4()
assert repr(layer) == "My Semantic Layer"
def test_semantic_layer_repr_without_name() -> None:
"""Test SemanticLayer __repr__ without name (uses uuid)."""
layer = SemanticLayer()
layer.name = None
test_uuid = uuid.uuid4()
layer.uuid = test_uuid
assert repr(layer) == str(test_uuid)
def test_semantic_layer_implementation_not_implemented() -> None:
"""Test that implementation raises NotImplementedError."""
layer = SemanticLayer()
with pytest.raises(NotImplementedError):
_ = layer.implementation
# =============================================================================
# SemanticView tests
# =============================================================================
@pytest.fixture
def mock_dimensions() -> list[Dimension]:
"""Create mock dimensions for testing."""
return [
Dimension(
id="orders.order_date",
name="order_date",
type=DATE,
definition="orders.order_date",
description="Date of the order",
grain=Day,
),
Dimension(
id="products.category",
name="category",
type=STRING,
definition="products.category",
description="Product category",
grain=None,
),
]
@pytest.fixture
def mock_metrics() -> list[Metric]:
"""Create mock metrics for testing."""
return [
Metric(
id="orders.revenue",
name="revenue",
type=NUMBER,
definition="SUM(orders.amount)",
description="Total revenue",
),
Metric(
id="orders.count",
name="order_count",
type=INTEGER,
definition="COUNT(*)",
description="Number of orders",
),
]
@pytest.fixture
def mock_implementation(
mock_dimensions: list[Dimension],
mock_metrics: list[Metric],
) -> MagicMock:
"""Create a mock implementation."""
impl = MagicMock()
impl.get_dimensions.return_value = mock_dimensions
impl.get_metrics.return_value = mock_metrics
impl.uid.return_value = "semantic_view_uid_123"
return impl
@pytest.fixture
def semantic_view(mock_implementation: MagicMock) -> SemanticView:
"""Create a SemanticView with mocked implementation."""
view = SemanticView()
view.name = "Orders View"
view.description = "View of order data"
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
view.cache_timeout = 3600
view.configuration = "{}"
# Mock the implementation property
with patch.object(
SemanticView,
"implementation",
new_callable=lambda: property(lambda self: mock_implementation),
):
# We need to return the view but the patch won't persist
pass
return view
def test_semantic_view_repr_with_name() -> None:
"""Test SemanticView __repr__ with name."""
view = SemanticView()
view.name = "My View"
view.uuid = uuid.uuid4()
assert repr(view) == "My View"
def test_semantic_view_repr_without_name() -> None:
"""Test SemanticView __repr__ without name (uses uuid)."""
view = SemanticView()
view.name = None
test_uuid = uuid.uuid4()
view.uuid = test_uuid
assert repr(view) == str(test_uuid)
def test_semantic_view_type() -> None:
"""Test SemanticView type property."""
view = SemanticView()
assert view.type == "semantic_view"
def test_semantic_view_offset() -> None:
"""Test SemanticView offset property."""
view = SemanticView()
assert view.offset == 0
def test_semantic_view_is_rls_supported() -> None:
"""Test SemanticView is_rls_supported property."""
view = SemanticView()
assert view.is_rls_supported is False
def test_semantic_view_query_language() -> None:
"""Test SemanticView query_language property."""
view = SemanticView()
assert view.query_language is None
def test_semantic_view_get_query_str() -> None:
"""Test SemanticView get_query_str method."""
view = SemanticView()
result = view.get_query_str({})
assert result == "Not implemented for semantic layers"
def test_semantic_view_get_extra_cache_keys() -> None:
"""Test SemanticView get_extra_cache_keys method."""
view = SemanticView()
result = view.get_extra_cache_keys({})
assert result == []
def test_semantic_view_perm() -> None:
"""Test SemanticView perm property."""
view = SemanticView()
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
assert view.perm == "87654321432187654321876543218765::12345678123456781234567812345678"
def test_semantic_view_uid(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
mock_metrics: list[Metric],
) -> None:
"""Test SemanticView uid property."""
view = SemanticView()
view.name = "Test View"
view.uuid = uuid.uuid4()
view.semantic_layer_uuid = uuid.uuid4()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.uid == "semantic_view_uid_123"
def test_semantic_view_metrics(
mock_implementation: MagicMock,
mock_metrics: list[Metric],
) -> None:
"""Test SemanticView metrics property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
metrics = view.metrics
assert len(metrics) == 2
assert metrics[0].metric_name == "revenue"
assert metrics[0].expression == "SUM(orders.amount)"
assert metrics[0].description == "Total revenue"
assert metrics[1].metric_name == "order_count"
def test_semantic_view_columns(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test SemanticView columns property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
columns = view.columns
assert len(columns) == 2
assert columns[0].column_name == "order_date"
assert columns[0].type == "DATE"
assert columns[0].is_dttm is True
assert columns[0].description == "Date of the order"
assert columns[1].column_name == "category"
assert columns[1].type == "STRING"
assert columns[1].is_dttm is False
def test_semantic_view_column_names(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test SemanticView column_names property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
column_names = view.column_names
assert column_names == ["order_date", "category"]
def test_semantic_view_get_time_grains(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test SemanticView get_time_grains property."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
time_grains = view.get_time_grains
assert len(time_grains) == 1
assert time_grains[0]["name"] == "Day"
assert time_grains[0]["duration"] == "P1D"
def test_semantic_view_has_drill_by_columns_all_exist(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test has_drill_by_columns when all columns exist."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.has_drill_by_columns(["order_date", "category"]) is True
def test_semantic_view_has_drill_by_columns_some_missing(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test has_drill_by_columns when some columns are missing."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.has_drill_by_columns(["order_date", "nonexistent"]) is False
def test_semantic_view_has_drill_by_columns_empty(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
) -> None:
"""Test has_drill_by_columns with empty list."""
view = SemanticView()
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
assert view.has_drill_by_columns([]) is True
def test_semantic_view_data(
mock_implementation: MagicMock,
mock_dimensions: list[Dimension],
mock_metrics: list[Metric],
) -> None:
"""Test SemanticView data property."""
view = SemanticView()
view.name = "Orders View"
view.description = "View of order data"
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
view.cache_timeout = 3600
with patch.object(
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
):
data = view.data
# Check core fields
assert data["id"] == "12345678123456781234567812345678"
assert data["uid"] == "semantic_view_uid_123"
assert data["type"] == "semantic_view"
assert data["name"] == "Orders View"
assert data["description"] == "View of order data"
assert data["cache_timeout"] == 3600
# Check columns
assert len(data["columns"]) == 2
assert data["columns"][0]["column_name"] == "order_date"
assert data["columns"][0]["type"] == "DATE"
assert data["columns"][0]["is_dttm"] is True
assert data["columns"][0]["type_generic"] == GenericDataType.TEMPORAL
assert data["columns"][1]["column_name"] == "category"
assert data["columns"][1]["type"] == "STRING"
assert data["columns"][1]["type_generic"] == GenericDataType.STRING
# Check metrics
assert len(data["metrics"]) == 2
assert data["metrics"][0]["metric_name"] == "revenue"
assert data["metrics"][0]["expression"] == "SUM(orders.amount)"
assert data["metrics"][1]["metric_name"] == "order_count"
# Check column_types and column_names
assert data["column_types"] == [
GenericDataType.TEMPORAL,
GenericDataType.STRING,
]
assert data["column_names"] == {"order_date", "category"}
# Check other fields
assert data["table_name"] == "Orders View"
assert data["datasource_name"] == "Orders View"
assert data["offset"] == 0
def test_semantic_view_get_query_result(
mock_implementation: MagicMock,
) -> None:
"""Test SemanticView get_query_result method."""
view = SemanticView()
mock_query_object = MagicMock()
mock_result = MagicMock()
with patch(
"superset.semantic_layers.models.get_results",
return_value=mock_result,
) as mock_get_results:
result = view.get_query_result(mock_query_object)
mock_get_results.assert_called_once_with(mock_query_object)
assert result == mock_result
def test_semantic_view_implementation() -> None:
"""Test SemanticView implementation property."""
view = SemanticView()
view.name = "Test View"
view.configuration = '{"key": "value"}'
mock_semantic_layer = MagicMock()
mock_semantic_view_impl = MagicMock()
mock_semantic_layer.implementation.get_semantic_view.return_value = (
mock_semantic_view_impl
)
view.semantic_layer = mock_semantic_layer
# Clear cached property if it exists
if "implementation" in view.__dict__:
del view.__dict__["implementation"]
result = view.implementation
mock_semantic_layer.implementation.get_semantic_view.assert_called_once_with(
"Test View",
{"key": "value"},
)
assert result == mock_semantic_view_impl