mirror of
https://github.com/apache/superset.git
synced 2026-07-01 20:35:35 +00:00
Compare commits
9 Commits
semantic-l
...
semantic-l
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
508aad1603 | ||
|
|
954cf32ca4 | ||
|
|
552c685a6b | ||
|
|
a26c91c4e2 | ||
|
|
3c8835bd75 | ||
|
|
955d8bc205 | ||
|
|
cd8e27d33c | ||
|
|
d0962bd32f | ||
|
|
28870168cd |
@@ -21,6 +21,7 @@ import enum
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocFilter,
|
||||
Dimension,
|
||||
Filter,
|
||||
GroupLimit,
|
||||
@@ -68,7 +69,7 @@ class SemanticView(Protocol):
|
||||
def get_values(
|
||||
self,
|
||||
dimension: Dimension,
|
||||
filters: set[Filter] | None = None,
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
) -> SemanticResult:
|
||||
"""
|
||||
Return distinct values for a dimension.
|
||||
@@ -78,7 +79,7 @@ class SemanticView(Protocol):
|
||||
self,
|
||||
metrics: list[Metric],
|
||||
dimensions: list[Dimension],
|
||||
filters: set[Filter] | None = None,
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
order: list[OrderTuple] | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
@@ -93,7 +94,7 @@ class SemanticView(Protocol):
|
||||
self,
|
||||
metrics: list[Metric],
|
||||
dimensions: list[Dimension],
|
||||
filters: set[Filter] | None = None,
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
order: list[OrderTuple] | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
|
||||
@@ -239,7 +239,6 @@ class Operator(str, enum.Enum):
|
||||
NOT_LIKE = "NOT LIKE"
|
||||
IS_NULL = "IS NULL"
|
||||
IS_NOT_NULL = "IS NOT NULL"
|
||||
ADHOC = "ADHOC"
|
||||
|
||||
|
||||
FilterValues = str | int | float | bool | datetime | date | time | timedelta | None
|
||||
@@ -253,11 +252,19 @@ class PredicateType(enum.Enum):
|
||||
@dataclass(frozen=True, order=True)
|
||||
class Filter:
|
||||
type: PredicateType
|
||||
column: Dimension | Metric | None
|
||||
column: Dimension | Metric
|
||||
operator: Operator
|
||||
value: FilterValues | frozenset[FilterValues]
|
||||
|
||||
|
||||
# TODO (betodealmeida): convert into Operator:
|
||||
# Filter(type=..., column=None, operator=Operator.AdHoc, value="some definition")
|
||||
@dataclass(frozen=True, order=True)
|
||||
class AdhocFilter:
|
||||
type: PredicateType
|
||||
definition: str
|
||||
|
||||
|
||||
class OrderDirection(enum.Enum):
|
||||
ASC = "ASC"
|
||||
DESC = "DESC"
|
||||
@@ -284,7 +291,7 @@ class GroupLimit:
|
||||
metric: Metric | None
|
||||
direction: OrderDirection = OrderDirection.DESC
|
||||
group_others: bool = False
|
||||
filters: set[Filter] | None = None
|
||||
filters: set[Filter | AdhocFilter] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -321,7 +328,7 @@ class SemanticQuery:
|
||||
|
||||
metrics: list[Metric]
|
||||
dimensions: list[Dimension]
|
||||
filters: set[Filter] | None = None
|
||||
filters: set[Filter | AdhocFilter] | None = None
|
||||
order: list[OrderTuple] | None = None
|
||||
limit: int | None = None
|
||||
offset: int | None = None
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
"""
|
||||
Script to create a Pandas semantic layer and Sales semantic view in Superset.
|
||||
|
||||
Run this inside the superset_app container:
|
||||
python /app/superset/create_pandas_semantic_layer.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Add the Superset application directory to the Python path
|
||||
sys.path.insert(0, "/app")
|
||||
|
||||
from superset.app import create_app
|
||||
from superset.extensions import db
|
||||
from superset.utils import json
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.semantic_layers.models import SemanticLayer, SemanticView
|
||||
|
||||
app = create_app()
|
||||
app.app_context().push()
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_pandas_semantic_layer() -> SemanticLayer:
|
||||
"""Create a Pandas semantic layer with minimal configuration."""
|
||||
from superset.semantic_layers.models import SemanticLayer
|
||||
|
||||
logger.info("Creating Pandas semantic layer...")
|
||||
|
||||
configuration = {
|
||||
"dataset": "sales",
|
||||
}
|
||||
|
||||
semantic_layer = SemanticLayer(
|
||||
name="Pandas Semantic Layer",
|
||||
description="In-memory semantic layer backed by a Pandas DataFrame",
|
||||
type="pandas",
|
||||
configuration=json.dumps(configuration),
|
||||
cache_timeout=3600,
|
||||
)
|
||||
|
||||
db.session.add(semantic_layer)
|
||||
db.session.commit()
|
||||
|
||||
logger.info("Created semantic layer:")
|
||||
logger.info(" Name: %s", semantic_layer.name)
|
||||
logger.info(" UUID: %s", semantic_layer.uuid)
|
||||
logger.info(" Type: %s", semantic_layer.type)
|
||||
|
||||
return semantic_layer
|
||||
|
||||
|
||||
def create_sales_semantic_view(semantic_layer: SemanticLayer) -> SemanticView:
|
||||
"""Create the Sales semantic view."""
|
||||
from superset.semantic_layers.models import SemanticView
|
||||
|
||||
logger.info("Creating Sales semantic view...")
|
||||
|
||||
semantic_view = SemanticView(
|
||||
name="sales",
|
||||
configuration="{}",
|
||||
cache_timeout=1800,
|
||||
semantic_layer_uuid=semantic_layer.uuid,
|
||||
)
|
||||
|
||||
db.session.add(semantic_view)
|
||||
db.session.commit()
|
||||
|
||||
logger.info("Created semantic view:")
|
||||
logger.info(" Name: %s", semantic_view.name)
|
||||
logger.info(" UUID: %s", semantic_view.uuid)
|
||||
logger.info(" Semantic Layer UUID: %s", semantic_view.semantic_layer_uuid)
|
||||
|
||||
return semantic_view
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main script execution."""
|
||||
logger.info("=" * 60)
|
||||
logger.info("Creating Pandas Semantic Layer and Sales Semantic View")
|
||||
logger.info("=" * 60)
|
||||
|
||||
semantic_layer = create_pandas_semantic_layer()
|
||||
create_sales_semantic_view(semantic_layer)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -32,6 +32,7 @@ import numpy as np
|
||||
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocExpression,
|
||||
AdhocFilter,
|
||||
Day,
|
||||
Dimension,
|
||||
Filter,
|
||||
@@ -369,14 +370,14 @@ def _get_filters_from_query_object(
|
||||
query_object: ValidatedQueryObject,
|
||||
time_offset: str | None,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter]:
|
||||
) -> set[Filter | AdhocFilter]:
|
||||
"""
|
||||
Extract all filters from the query object, including time range filters.
|
||||
|
||||
This simplifies the complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm
|
||||
by converting all time constraints into filters.
|
||||
"""
|
||||
filters: set[Filter] = set()
|
||||
filters: set[Filter | AdhocFilter] = set()
|
||||
|
||||
# 1. Add fetch values predicate if present
|
||||
if (
|
||||
@@ -384,11 +385,9 @@ def _get_filters_from_query_object(
|
||||
and query_object.datasource.fetch_values_predicate
|
||||
):
|
||||
filters.add(
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=query_object.datasource.fetch_values_predicate,
|
||||
definition=query_object.datasource.fetch_values_predicate,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -416,7 +415,7 @@ def _get_filters_from_query_object(
|
||||
return filters
|
||||
|
||||
|
||||
def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
|
||||
def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
|
||||
"""
|
||||
Extract filters from the extras dict.
|
||||
|
||||
@@ -431,29 +430,25 @@ def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
|
||||
Handled in _convert_time_grain() and used for dimension grain matching
|
||||
|
||||
Note: The WHERE and HAVING clauses from extras are SQL expressions that
|
||||
are passed through as-is to the semantic layer as adhoc Filter objects.
|
||||
are passed through as-is to the semantic layer as AdhocFilter objects.
|
||||
"""
|
||||
filters: set[Filter] = set()
|
||||
filters: set[AdhocFilter] = set()
|
||||
|
||||
# Add WHERE clause from extras
|
||||
if where_clause := extras.get("where"):
|
||||
filters.add(
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=where_clause,
|
||||
definition=where_clause,
|
||||
)
|
||||
)
|
||||
|
||||
# Add HAVING clause from extras
|
||||
if having_clause := extras.get("having"):
|
||||
filters.add(
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.HAVING,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=having_clause,
|
||||
definition=having_clause,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -545,7 +540,7 @@ def _convert_query_object_filter(
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter] | None:
|
||||
"""
|
||||
Convert a QueryObject filter dict to a semantic layer Filter.
|
||||
Convert a QueryObject filter dict to a semantic layer Filter or AdhocFilter.
|
||||
"""
|
||||
operator_str = filter_["op"]
|
||||
|
||||
@@ -569,6 +564,7 @@ def _convert_query_object_filter(
|
||||
if operator_str == FilterOperator.TEMPORAL_RANGE.value:
|
||||
if not isinstance(value, str) or value == NO_TIME_RANGE:
|
||||
return None
|
||||
|
||||
start, end = value.split(" : ")
|
||||
return {
|
||||
Filter(
|
||||
@@ -681,7 +677,7 @@ def _get_group_limit_from_query_object(
|
||||
def _get_group_limit_filters(
|
||||
query_object: ValidatedQueryObject,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter] | None:
|
||||
) -> set[Filter | AdhocFilter] | None:
|
||||
"""
|
||||
Get separate filters for the group limit subquery if needed.
|
||||
|
||||
@@ -704,7 +700,7 @@ def _get_group_limit_filters(
|
||||
return None
|
||||
|
||||
# Create separate filters for the group limit subquery
|
||||
filters: set[Filter] = set()
|
||||
filters: set[Filter | AdhocFilter] = set()
|
||||
|
||||
# Add time range filter using inner bounds
|
||||
if query_object.granularity:
|
||||
@@ -737,11 +733,9 @@ def _get_group_limit_filters(
|
||||
and query_object.datasource.fetch_values_predicate
|
||||
):
|
||||
filters.add(
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=query_object.datasource.fetch_values_predicate,
|
||||
definition=query_object.datasource.fetch_values_predicate,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from pytest_mock import MockerFixture
|
||||
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocExpression,
|
||||
AdhocFilter,
|
||||
Day,
|
||||
Dimension,
|
||||
Filter,
|
||||
@@ -201,11 +202,9 @@ def test_get_filters_from_extras_where() -> None:
|
||||
|
||||
assert len(result) == 1
|
||||
filter_ = next(iter(result))
|
||||
assert isinstance(filter_, Filter)
|
||||
assert isinstance(filter_, AdhocFilter)
|
||||
assert filter_.type == PredicateType.WHERE
|
||||
assert filter_.column is None
|
||||
assert filter_.operator == Operator.ADHOC
|
||||
assert filter_.value == "customer_id > 100"
|
||||
assert filter_.definition == "customer_id > 100"
|
||||
|
||||
|
||||
def test_get_filters_from_extras_having() -> None:
|
||||
@@ -216,12 +215,7 @@ def test_get_filters_from_extras_having() -> None:
|
||||
result = _get_filters_from_extras(extras)
|
||||
|
||||
assert result == {
|
||||
Filter(
|
||||
type=PredicateType.HAVING,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="SUM(sales) > 1000",
|
||||
),
|
||||
AdhocFilter(type=PredicateType.HAVING, definition="SUM(sales) > 1000"),
|
||||
}
|
||||
|
||||
|
||||
@@ -236,18 +230,8 @@ def test_get_filters_from_extras_both() -> None:
|
||||
result = _get_filters_from_extras(extras)
|
||||
|
||||
assert result == {
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="region = 'US'",
|
||||
),
|
||||
Filter(
|
||||
type=PredicateType.HAVING,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="COUNT(*) > 10",
|
||||
),
|
||||
AdhocFilter(type=PredicateType.WHERE, definition="region = 'US'"),
|
||||
AdhocFilter(type=PredicateType.HAVING, definition="COUNT(*) > 10"),
|
||||
}
|
||||
|
||||
|
||||
@@ -466,11 +450,9 @@ def test_get_filters_from_query_object_with_extras(mock_datasource: MagicMock) -
|
||||
operator=Operator.LESS_THAN,
|
||||
value=datetime(2025, 10, 22),
|
||||
),
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="customer_id > 100",
|
||||
definition="customer_id > 100",
|
||||
),
|
||||
}
|
||||
|
||||
@@ -512,11 +494,9 @@ def test_get_filters_from_query_object_with_fetch_values(
|
||||
operator=Operator.LESS_THAN,
|
||||
value=datetime(2025, 10, 22),
|
||||
),
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="tenant_id = 123",
|
||||
definition="tenant_id = 123",
|
||||
),
|
||||
}
|
||||
|
||||
@@ -816,11 +796,9 @@ def test_get_group_limit_filters_with_extras(mock_datasource: MagicMock) -> None
|
||||
operator=Operator.LESS_THAN,
|
||||
value=datetime(2025, 10, 22),
|
||||
),
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="customer_id > 100",
|
||||
definition="customer_id > 100",
|
||||
),
|
||||
}
|
||||
|
||||
@@ -2041,11 +2019,9 @@ def test_get_group_limit_filters_with_fetch_values_predicate(
|
||||
|
||||
assert result is not None
|
||||
assert (
|
||||
Filter(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="tenant_id = 123",
|
||||
definition="tenant_id = 123",
|
||||
)
|
||||
in result
|
||||
)
|
||||
@@ -2396,7 +2372,6 @@ def test_get_filters_from_query_object_with_filter_loop(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
@@ -2469,7 +2444,6 @@ def test_get_group_limit_filters_with_filter_loop(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
@@ -2581,7 +2555,6 @@ def test_get_filters_from_query_object_filter_returns_none(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
@@ -2634,7 +2607,6 @@ def test_get_group_limit_filters_filter_returns_none(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user