mirror of
https://github.com/apache/superset.git
synced 2026-04-29 21:14:22 +00:00
Compare commits
2 Commits
semantic-l
...
semantic-l
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
08de4dbb5a | ||
|
|
4460d57e48 |
@@ -105,7 +105,12 @@ class CeleryConfig:
|
||||
|
||||
CELERY_CONFIG = CeleryConfig
|
||||
|
||||
FEATURE_FLAGS = {"ALERT_REPORTS": True, "DATASET_FOLDERS": True}
|
||||
FEATURE_FLAGS = {
|
||||
"ALERT_REPORTS": True,
|
||||
"DATASET_FOLDERS": True,
|
||||
"ENABLE_EXTENSIONS": True,
|
||||
}
|
||||
EXTENSIONS_PATH = "/app/docker/extensions"
|
||||
ALERT_REPORTS_NOTIFICATION_DRY_RUN = True
|
||||
WEBDRIVER_BASEURL = f"http://superset_app{os.environ.get('SUPERSET_APP_ROOT', '/')}/" # When using docker compose baseurl should be http://superset_nginx{ENV{BASEPATH}}/ # noqa: E501
|
||||
# The base URL for the email report hyperlinks.
|
||||
|
||||
@@ -21,7 +21,6 @@ import enum
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocFilter,
|
||||
Dimension,
|
||||
Filter,
|
||||
GroupLimit,
|
||||
@@ -69,7 +68,7 @@ class SemanticView(Protocol):
|
||||
def get_values(
|
||||
self,
|
||||
dimension: Dimension,
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
filters: set[Filter] | None = None,
|
||||
) -> SemanticResult:
|
||||
"""
|
||||
Return distinct values for a dimension.
|
||||
@@ -79,7 +78,7 @@ class SemanticView(Protocol):
|
||||
self,
|
||||
metrics: list[Metric],
|
||||
dimensions: list[Dimension],
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
filters: set[Filter] | None = None,
|
||||
order: list[OrderTuple] | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
@@ -94,7 +93,7 @@ class SemanticView(Protocol):
|
||||
self,
|
||||
metrics: list[Metric],
|
||||
dimensions: list[Dimension],
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
filters: set[Filter] | None = None,
|
||||
order: list[OrderTuple] | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
|
||||
@@ -239,6 +239,7 @@ class Operator(str, enum.Enum):
|
||||
NOT_LIKE = "NOT LIKE"
|
||||
IS_NULL = "IS NULL"
|
||||
IS_NOT_NULL = "IS NOT NULL"
|
||||
ADHOC = "ADHOC"
|
||||
|
||||
|
||||
FilterValues = str | int | float | bool | datetime | date | time | timedelta | None
|
||||
@@ -252,19 +253,11 @@ class PredicateType(enum.Enum):
|
||||
@dataclass(frozen=True, order=True)
|
||||
class Filter:
|
||||
type: PredicateType
|
||||
column: Dimension | Metric
|
||||
column: Dimension | Metric | None
|
||||
operator: Operator
|
||||
value: FilterValues | frozenset[FilterValues]
|
||||
|
||||
|
||||
# TODO (betodealmeida): convert into Operator:
|
||||
# Filter(type=..., column=None, operator=Operator.AdHoc, value="some definition")
|
||||
@dataclass(frozen=True, order=True)
|
||||
class AdhocFilter:
|
||||
type: PredicateType
|
||||
definition: str
|
||||
|
||||
|
||||
class OrderDirection(enum.Enum):
|
||||
ASC = "ASC"
|
||||
DESC = "DESC"
|
||||
@@ -291,7 +284,7 @@ class GroupLimit:
|
||||
metric: Metric | None
|
||||
direction: OrderDirection = OrderDirection.DESC
|
||||
group_others: bool = False
|
||||
filters: set[Filter | AdhocFilter] | None = None
|
||||
filters: set[Filter] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -328,7 +321,7 @@ class SemanticQuery:
|
||||
|
||||
metrics: list[Metric]
|
||||
dimensions: list[Dimension]
|
||||
filters: set[Filter | AdhocFilter] | None = None
|
||||
filters: set[Filter] | None = None
|
||||
order: list[OrderTuple] | None = None
|
||||
limit: int | None = None
|
||||
offset: int | None = None
|
||||
|
||||
99
superset/create_pandas_semantic_layer.py
Normal file
99
superset/create_pandas_semantic_layer.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Script to create a Pandas semantic layer and Sales semantic view in Superset.
|
||||
|
||||
Run this inside the superset_app container:
|
||||
python /app/superset/create_pandas_semantic_layer.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Add the Superset application directory to the Python path
|
||||
sys.path.insert(0, "/app")
|
||||
|
||||
from superset.app import create_app
|
||||
from superset.extensions import db
|
||||
from superset.utils import json
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.semantic_layers.models import SemanticLayer, SemanticView
|
||||
|
||||
app = create_app()
|
||||
app.app_context().push()
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_pandas_semantic_layer() -> SemanticLayer:
|
||||
"""Create a Pandas semantic layer with minimal configuration."""
|
||||
from superset.semantic_layers.models import SemanticLayer
|
||||
|
||||
logger.info("Creating Pandas semantic layer...")
|
||||
|
||||
configuration = {
|
||||
"dataset": "sales",
|
||||
}
|
||||
|
||||
semantic_layer = SemanticLayer(
|
||||
name="Pandas Semantic Layer",
|
||||
description="In-memory semantic layer backed by a Pandas DataFrame",
|
||||
type="pandas",
|
||||
configuration=json.dumps(configuration),
|
||||
cache_timeout=3600,
|
||||
)
|
||||
|
||||
db.session.add(semantic_layer)
|
||||
db.session.commit()
|
||||
|
||||
logger.info("Created semantic layer:")
|
||||
logger.info(" Name: %s", semantic_layer.name)
|
||||
logger.info(" UUID: %s", semantic_layer.uuid)
|
||||
logger.info(" Type: %s", semantic_layer.type)
|
||||
|
||||
return semantic_layer
|
||||
|
||||
|
||||
def create_sales_semantic_view(semantic_layer: SemanticLayer) -> SemanticView:
|
||||
"""Create the Sales semantic view."""
|
||||
from superset.semantic_layers.models import SemanticView
|
||||
|
||||
logger.info("Creating Sales semantic view...")
|
||||
|
||||
semantic_view = SemanticView(
|
||||
name="sales",
|
||||
configuration="{}",
|
||||
cache_timeout=1800,
|
||||
semantic_layer_uuid=semantic_layer.uuid,
|
||||
)
|
||||
|
||||
db.session.add(semantic_view)
|
||||
db.session.commit()
|
||||
|
||||
logger.info("Created semantic view:")
|
||||
logger.info(" Name: %s", semantic_view.name)
|
||||
logger.info(" UUID: %s", semantic_view.uuid)
|
||||
logger.info(" Semantic Layer UUID: %s", semantic_view.semantic_layer_uuid)
|
||||
|
||||
return semantic_view
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main script execution."""
|
||||
logger.info("=" * 60)
|
||||
logger.info("Creating Pandas Semantic Layer and Sales Semantic View")
|
||||
logger.info("=" * 60)
|
||||
|
||||
semantic_layer = create_pandas_semantic_layer()
|
||||
create_sales_semantic_view(semantic_layer)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -32,7 +32,6 @@ import numpy as np
|
||||
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocExpression,
|
||||
AdhocFilter,
|
||||
Day,
|
||||
Dimension,
|
||||
Filter,
|
||||
@@ -370,14 +369,14 @@ def _get_filters_from_query_object(
|
||||
query_object: ValidatedQueryObject,
|
||||
time_offset: str | None,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter | AdhocFilter]:
|
||||
) -> set[Filter]:
|
||||
"""
|
||||
Extract all filters from the query object, including time range filters.
|
||||
|
||||
This simplifies the complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm
|
||||
by converting all time constraints into filters.
|
||||
"""
|
||||
filters: set[Filter | AdhocFilter] = set()
|
||||
filters: set[Filter] = set()
|
||||
|
||||
# 1. Add fetch values predicate if present
|
||||
if (
|
||||
@@ -385,9 +384,11 @@ def _get_filters_from_query_object(
|
||||
and query_object.datasource.fetch_values_predicate
|
||||
):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition=query_object.datasource.fetch_values_predicate,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=query_object.datasource.fetch_values_predicate,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -415,7 +416,7 @@ def _get_filters_from_query_object(
|
||||
return filters
|
||||
|
||||
|
||||
def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
|
||||
def _get_filters_from_extras(extras: dict[str, Any]) -> set[Filter]:
|
||||
"""
|
||||
Extract filters from the extras dict.
|
||||
|
||||
@@ -430,25 +431,29 @@ def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
|
||||
Handled in _convert_time_grain() and used for dimension grain matching
|
||||
|
||||
Note: The WHERE and HAVING clauses from extras are SQL expressions that
|
||||
are passed through as-is to the semantic layer as AdhocFilter objects.
|
||||
are passed through as-is to the semantic layer as adhoc Filter objects.
|
||||
"""
|
||||
filters: set[AdhocFilter] = set()
|
||||
filters: set[Filter] = set()
|
||||
|
||||
# Add WHERE clause from extras
|
||||
if where_clause := extras.get("where"):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition=where_clause,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=where_clause,
|
||||
)
|
||||
)
|
||||
|
||||
# Add HAVING clause from extras
|
||||
if having_clause := extras.get("having"):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.HAVING,
|
||||
definition=having_clause,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=having_clause,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -540,7 +545,7 @@ def _convert_query_object_filter(
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter] | None:
|
||||
"""
|
||||
Convert a QueryObject filter dict to a semantic layer Filter or AdhocFilter.
|
||||
Convert a QueryObject filter dict to a semantic layer Filter.
|
||||
"""
|
||||
operator_str = filter_["op"]
|
||||
|
||||
@@ -676,7 +681,7 @@ def _get_group_limit_from_query_object(
|
||||
def _get_group_limit_filters(
|
||||
query_object: ValidatedQueryObject,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter | AdhocFilter] | None:
|
||||
) -> set[Filter] | None:
|
||||
"""
|
||||
Get separate filters for the group limit subquery if needed.
|
||||
|
||||
@@ -699,7 +704,7 @@ def _get_group_limit_filters(
|
||||
return None
|
||||
|
||||
# Create separate filters for the group limit subquery
|
||||
filters: set[Filter | AdhocFilter] = set()
|
||||
filters: set[Filter] = set()
|
||||
|
||||
# Add time range filter using inner bounds
|
||||
if query_object.granularity:
|
||||
@@ -732,9 +737,11 @@ def _get_group_limit_filters(
|
||||
and query_object.datasource.fetch_values_predicate
|
||||
):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition=query_object.datasource.fetch_values_predicate,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value=query_object.datasource.fetch_values_predicate,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ from pytest_mock import MockerFixture
|
||||
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocExpression,
|
||||
AdhocFilter,
|
||||
Day,
|
||||
Dimension,
|
||||
Filter,
|
||||
@@ -202,9 +201,11 @@ def test_get_filters_from_extras_where() -> None:
|
||||
|
||||
assert len(result) == 1
|
||||
filter_ = next(iter(result))
|
||||
assert isinstance(filter_, AdhocFilter)
|
||||
assert isinstance(filter_, Filter)
|
||||
assert filter_.type == PredicateType.WHERE
|
||||
assert filter_.definition == "customer_id > 100"
|
||||
assert filter_.column is None
|
||||
assert filter_.operator == Operator.ADHOC
|
||||
assert filter_.value == "customer_id > 100"
|
||||
|
||||
|
||||
def test_get_filters_from_extras_having() -> None:
|
||||
@@ -215,7 +216,12 @@ def test_get_filters_from_extras_having() -> None:
|
||||
result = _get_filters_from_extras(extras)
|
||||
|
||||
assert result == {
|
||||
AdhocFilter(type=PredicateType.HAVING, definition="SUM(sales) > 1000"),
|
||||
Filter(
|
||||
type=PredicateType.HAVING,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="SUM(sales) > 1000",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -230,8 +236,18 @@ def test_get_filters_from_extras_both() -> None:
|
||||
result = _get_filters_from_extras(extras)
|
||||
|
||||
assert result == {
|
||||
AdhocFilter(type=PredicateType.WHERE, definition="region = 'US'"),
|
||||
AdhocFilter(type=PredicateType.HAVING, definition="COUNT(*) > 10"),
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="region = 'US'",
|
||||
),
|
||||
Filter(
|
||||
type=PredicateType.HAVING,
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="COUNT(*) > 10",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -450,9 +466,11 @@ def test_get_filters_from_query_object_with_extras(mock_datasource: MagicMock) -
|
||||
operator=Operator.LESS_THAN,
|
||||
value=datetime(2025, 10, 22),
|
||||
),
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition="customer_id > 100",
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="customer_id > 100",
|
||||
),
|
||||
}
|
||||
|
||||
@@ -494,9 +512,11 @@ def test_get_filters_from_query_object_with_fetch_values(
|
||||
operator=Operator.LESS_THAN,
|
||||
value=datetime(2025, 10, 22),
|
||||
),
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition="tenant_id = 123",
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="tenant_id = 123",
|
||||
),
|
||||
}
|
||||
|
||||
@@ -796,9 +816,11 @@ def test_get_group_limit_filters_with_extras(mock_datasource: MagicMock) -> None
|
||||
operator=Operator.LESS_THAN,
|
||||
value=datetime(2025, 10, 22),
|
||||
),
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition="customer_id > 100",
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="customer_id > 100",
|
||||
),
|
||||
}
|
||||
|
||||
@@ -2019,9 +2041,11 @@ def test_get_group_limit_filters_with_fetch_values_predicate(
|
||||
|
||||
assert result is not None
|
||||
assert (
|
||||
AdhocFilter(
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
definition="tenant_id = 123",
|
||||
column=None,
|
||||
operator=Operator.ADHOC,
|
||||
value="tenant_id = 123",
|
||||
)
|
||||
in result
|
||||
)
|
||||
@@ -2372,6 +2396,7 @@ def test_get_filters_from_query_object_with_filter_loop(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
@@ -2444,6 +2469,7 @@ def test_get_group_limit_filters_with_filter_loop(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
@@ -2555,6 +2581,7 @@ def test_get_filters_from_query_object_filter_returns_none(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
@@ -2607,6 +2634,7 @@ def test_get_group_limit_filters_filter_returns_none(
|
||||
f
|
||||
for f in result
|
||||
if isinstance(f, Filter)
|
||||
and f.column
|
||||
and f.column.name == "category"
|
||||
and f.operator == Operator.EQUALS
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user