Compare commits

..

9 Commits

Author SHA1 Message Date
Beto Dealmeida
508aad1603 Enable extension in Docker 2026-02-09 14:57:40 -05:00
Beto Dealmeida
954cf32ca4 Temporary registry 2026-02-09 14:57:10 -05:00
Beto Dealmeida
552c685a6b Fix models 2026-02-09 13:55:34 -05:00
Beto Dealmeida
a26c91c4e2 Fix mapper 2026-02-09 13:54:11 -05:00
Beto Dealmeida
3c8835bd75 Fix migration 2026-02-09 10:48:05 -05:00
Beto Dealmeida
955d8bc205 Frontend support 2026-02-06 19:02:51 -05:00
Beto Dealmeida
cd8e27d33c API integration 2026-02-06 16:28:08 -05:00
Beto Dealmeida
d0962bd32f feat: models and DAOs 2026-02-06 16:27:58 -05:00
Beto Dealmeida
28870168cd feat: semantic layer extension 2026-02-06 13:42:39 -05:00
5 changed files with 47 additions and 172 deletions

View File

@@ -21,6 +21,7 @@ import enum
from typing import Protocol, runtime_checkable
from superset_core.semantic_layers.types import (
AdhocFilter,
Dimension,
Filter,
GroupLimit,
@@ -68,7 +69,7 @@ class SemanticView(Protocol):
def get_values(
self,
dimension: Dimension,
filters: set[Filter] | None = None,
filters: set[Filter | AdhocFilter] | None = None,
) -> SemanticResult:
"""
Return distinct values for a dimension.
@@ -78,7 +79,7 @@ class SemanticView(Protocol):
self,
metrics: list[Metric],
dimensions: list[Dimension],
filters: set[Filter] | None = None,
filters: set[Filter | AdhocFilter] | None = None,
order: list[OrderTuple] | None = None,
limit: int | None = None,
offset: int | None = None,
@@ -93,7 +94,7 @@ class SemanticView(Protocol):
self,
metrics: list[Metric],
dimensions: list[Dimension],
filters: set[Filter] | None = None,
filters: set[Filter | AdhocFilter] | None = None,
order: list[OrderTuple] | None = None,
limit: int | None = None,
offset: int | None = None,

View File

@@ -239,7 +239,6 @@ class Operator(str, enum.Enum):
NOT_LIKE = "NOT LIKE"
IS_NULL = "IS NULL"
IS_NOT_NULL = "IS NOT NULL"
ADHOC = "ADHOC"
FilterValues = str | int | float | bool | datetime | date | time | timedelta | None
@@ -253,11 +252,19 @@ class PredicateType(enum.Enum):
@dataclass(frozen=True, order=True)
class Filter:
type: PredicateType
column: Dimension | Metric | None
column: Dimension | Metric
operator: Operator
value: FilterValues | frozenset[FilterValues]
# TODO (betodealmeida): convert into Operator:
# Filter(type=..., column=None, operator=Operator.AdHoc, value="some definition")
@dataclass(frozen=True, order=True)
class AdhocFilter:
type: PredicateType
definition: str
class OrderDirection(enum.Enum):
ASC = "ASC"
DESC = "DESC"
@@ -284,7 +291,7 @@ class GroupLimit:
metric: Metric | None
direction: OrderDirection = OrderDirection.DESC
group_others: bool = False
filters: set[Filter] | None = None
filters: set[Filter | AdhocFilter] | None = None
@dataclass(frozen=True)
@@ -321,7 +328,7 @@ class SemanticQuery:
metrics: list[Metric]
dimensions: list[Dimension]
filters: set[Filter] | None = None
filters: set[Filter | AdhocFilter] | None = None
order: list[OrderTuple] | None = None
limit: int | None = None
offset: int | None = None

View File

@@ -1,99 +0,0 @@
"""
Script to create a Pandas semantic layer and Sales semantic view in Superset.
Run this inside the superset_app container:
python /app/superset/create_pandas_semantic_layer.py
"""
from __future__ import annotations
import logging
import sys
from typing import TYPE_CHECKING
# Add the Superset application directory to the Python path
sys.path.insert(0, "/app")
from superset.app import create_app
from superset.extensions import db
from superset.utils import json
if TYPE_CHECKING:
from superset.semantic_layers.models import SemanticLayer, SemanticView
app = create_app()
app.app_context().push()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
def create_pandas_semantic_layer() -> SemanticLayer:
"""Create a Pandas semantic layer with minimal configuration."""
from superset.semantic_layers.models import SemanticLayer
logger.info("Creating Pandas semantic layer...")
configuration = {
"dataset": "sales",
}
semantic_layer = SemanticLayer(
name="Pandas Semantic Layer",
description="In-memory semantic layer backed by a Pandas DataFrame",
type="pandas",
configuration=json.dumps(configuration),
cache_timeout=3600,
)
db.session.add(semantic_layer)
db.session.commit()
logger.info("Created semantic layer:")
logger.info(" Name: %s", semantic_layer.name)
logger.info(" UUID: %s", semantic_layer.uuid)
logger.info(" Type: %s", semantic_layer.type)
return semantic_layer
def create_sales_semantic_view(semantic_layer: SemanticLayer) -> SemanticView:
"""Create the Sales semantic view."""
from superset.semantic_layers.models import SemanticView
logger.info("Creating Sales semantic view...")
semantic_view = SemanticView(
name="sales",
configuration="{}",
cache_timeout=1800,
semantic_layer_uuid=semantic_layer.uuid,
)
db.session.add(semantic_view)
db.session.commit()
logger.info("Created semantic view:")
logger.info(" Name: %s", semantic_view.name)
logger.info(" UUID: %s", semantic_view.uuid)
logger.info(" Semantic Layer UUID: %s", semantic_view.semantic_layer_uuid)
return semantic_view
def main() -> None:
"""Main script execution."""
logger.info("=" * 60)
logger.info("Creating Pandas Semantic Layer and Sales Semantic View")
logger.info("=" * 60)
semantic_layer = create_pandas_semantic_layer()
create_sales_semantic_view(semantic_layer)
if __name__ == "__main__":
main()

View File

@@ -32,6 +32,7 @@ import numpy as np
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
from superset_core.semantic_layers.types import (
AdhocExpression,
AdhocFilter,
Day,
Dimension,
Filter,
@@ -369,14 +370,14 @@ def _get_filters_from_query_object(
query_object: ValidatedQueryObject,
time_offset: str | None,
all_dimensions: dict[str, Dimension],
) -> set[Filter]:
) -> set[Filter | AdhocFilter]:
"""
Extract all filters from the query object, including time range filters.
This simplifies the complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm
by converting all time constraints into filters.
"""
filters: set[Filter] = set()
filters: set[Filter | AdhocFilter] = set()
# 1. Add fetch values predicate if present
if (
@@ -384,11 +385,9 @@ def _get_filters_from_query_object(
and query_object.datasource.fetch_values_predicate
):
filters.add(
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value=query_object.datasource.fetch_values_predicate,
definition=query_object.datasource.fetch_values_predicate,
)
)
@@ -416,7 +415,7 @@ def _get_filters_from_query_object(
return filters
def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
"""
Extract filters from the extras dict.
@@ -431,29 +430,25 @@ def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
Handled in _convert_time_grain() and used for dimension grain matching
Note: The WHERE and HAVING clauses from extras are SQL expressions that
are passed through as-is to the semantic layer as adhoc Filter objects.
are passed through as-is to the semantic layer as AdhocFilter objects.
"""
filters: set[Filter] = set()
filters: set[AdhocFilter] = set()
# Add WHERE clause from extras
if where_clause := extras.get("where"):
filters.add(
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value=where_clause,
definition=where_clause,
)
)
# Add HAVING clause from extras
if having_clause := extras.get("having"):
filters.add(
Filter(
AdhocFilter(
type=PredicateType.HAVING,
column=None,
operator=Operator.ADHOC,
value=having_clause,
definition=having_clause,
)
)
@@ -545,7 +540,7 @@ def _convert_query_object_filter(
all_dimensions: dict[str, Dimension],
) -> set[Filter] | None:
"""
Convert a QueryObject filter dict to a semantic layer Filter.
Convert a QueryObject filter dict to a semantic layer Filter or AdhocFilter.
"""
operator_str = filter_["op"]
@@ -569,6 +564,7 @@ def _convert_query_object_filter(
if operator_str == FilterOperator.TEMPORAL_RANGE.value:
if not isinstance(value, str) or value == NO_TIME_RANGE:
return None
start, end = value.split(" : ")
return {
Filter(
@@ -681,7 +677,7 @@ def _get_group_limit_from_query_object(
def _get_group_limit_filters(
query_object: ValidatedQueryObject,
all_dimensions: dict[str, Dimension],
) -> set[Filter] | None:
) -> set[Filter | AdhocFilter] | None:
"""
Get separate filters for the group limit subquery if needed.
@@ -704,7 +700,7 @@ def _get_group_limit_filters(
return None
# Create separate filters for the group limit subquery
filters: set[Filter] = set()
filters: set[Filter | AdhocFilter] = set()
# Add time range filter using inner bounds
if query_object.granularity:
@@ -737,11 +733,9 @@ def _get_group_limit_filters(
and query_object.datasource.fetch_values_predicate
):
filters.add(
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value=query_object.datasource.fetch_values_predicate,
definition=query_object.datasource.fetch_values_predicate,
)
)

View File

@@ -24,6 +24,7 @@ from pytest_mock import MockerFixture
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
from superset_core.semantic_layers.types import (
AdhocExpression,
AdhocFilter,
Day,
Dimension,
Filter,
@@ -201,11 +202,9 @@ def test_get_filters_from_extras_where() -> None:
assert len(result) == 1
filter_ = next(iter(result))
assert isinstance(filter_, Filter)
assert isinstance(filter_, AdhocFilter)
assert filter_.type == PredicateType.WHERE
assert filter_.column is None
assert filter_.operator == Operator.ADHOC
assert filter_.value == "customer_id > 100"
assert filter_.definition == "customer_id > 100"
def test_get_filters_from_extras_having() -> None:
@@ -216,12 +215,7 @@ def test_get_filters_from_extras_having() -> None:
result = _get_filters_from_extras(extras)
assert result == {
Filter(
type=PredicateType.HAVING,
column=None,
operator=Operator.ADHOC,
value="SUM(sales) > 1000",
),
AdhocFilter(type=PredicateType.HAVING, definition="SUM(sales) > 1000"),
}
@@ -236,18 +230,8 @@ def test_get_filters_from_extras_both() -> None:
result = _get_filters_from_extras(extras)
assert result == {
Filter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value="region = 'US'",
),
Filter(
type=PredicateType.HAVING,
column=None,
operator=Operator.ADHOC,
value="COUNT(*) > 10",
),
AdhocFilter(type=PredicateType.WHERE, definition="region = 'US'"),
AdhocFilter(type=PredicateType.HAVING, definition="COUNT(*) > 10"),
}
@@ -466,11 +450,9 @@ def test_get_filters_from_query_object_with_extras(mock_datasource: MagicMock) -
operator=Operator.LESS_THAN,
value=datetime(2025, 10, 22),
),
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value="customer_id > 100",
definition="customer_id > 100",
),
}
@@ -512,11 +494,9 @@ def test_get_filters_from_query_object_with_fetch_values(
operator=Operator.LESS_THAN,
value=datetime(2025, 10, 22),
),
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value="tenant_id = 123",
definition="tenant_id = 123",
),
}
@@ -816,11 +796,9 @@ def test_get_group_limit_filters_with_extras(mock_datasource: MagicMock) -> None
operator=Operator.LESS_THAN,
value=datetime(2025, 10, 22),
),
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value="customer_id > 100",
definition="customer_id > 100",
),
}
@@ -2041,11 +2019,9 @@ def test_get_group_limit_filters_with_fetch_values_predicate(
assert result is not None
assert (
Filter(
AdhocFilter(
type=PredicateType.WHERE,
column=None,
operator=Operator.ADHOC,
value="tenant_id = 123",
definition="tenant_id = 123",
)
in result
)
@@ -2396,7 +2372,6 @@ def test_get_filters_from_query_object_with_filter_loop(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]
@@ -2469,7 +2444,6 @@ def test_get_group_limit_filters_with_filter_loop(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]
@@ -2581,7 +2555,6 @@ def test_get_filters_from_query_object_filter_returns_none(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]
@@ -2634,7 +2607,6 @@ def test_get_group_limit_filters_filter_returns_none(
f
for f in result
if isinstance(f, Filter)
and f.column
and f.column.name == "category"
and f.operator == Operator.EQUALS
]