diff --git a/superset-core/pyproject.toml b/superset-core/pyproject.toml index 30e681f0e89..a3ee4bcf45e 100644 --- a/superset-core/pyproject.toml +++ b/superset-core/pyproject.toml @@ -43,6 +43,7 @@ classifiers = [ ] dependencies = [ "flask-appbuilder>=5.0.2,<6", + "isodate>=0.7.0", "pyarrow>=16.0.0", "pydantic>=2.8.0", "sqlalchemy>=1.4.0,<2.0", diff --git a/superset-core/src/superset_core/semantic_layers/types.py b/superset-core/src/superset_core/semantic_layers/types.py index a6165872438..91c83e33dc2 100644 --- a/superset-core/src/superset_core/semantic_layers/types.py +++ b/superset-core/src/superset_core/semantic_layers/types.py @@ -20,178 +20,83 @@ from __future__ import annotations import enum from dataclasses import dataclass from datetime import date, datetime, time, timedelta -from functools import total_ordering -from typing import Type as TypeOf +import isodate import pyarrow as pa -class Type: - """ - Base class for types. - """ - - -class INTEGER(Type): - """ - Represents an integer type. - """ - - -class NUMBER(Type): - """ - Represents a number type. - """ - - -class DECIMAL(Type): - """ - Represents a decimal type. - """ - - -class STRING(Type): - """ - Represents a string type. - """ - - -class BOOLEAN(Type): - """ - Represents a boolean type. - """ - - -class DATE(Type): - """ - Represents a date type. - """ - - -class TIME(Type): - """ - Represents a time type. - """ - - -class DATETIME(DATE, TIME): - """ - Represents a datetime type. - """ - - -class INTERVAL(Type): - """ - Represents an interval type. - """ - - -class OBJECT(Type): - """ - Represents an object type. - """ - - -class BINARY(Type): - """ - Represents a binary type. - """ - - @dataclass(frozen=True) -@total_ordering class Grain: """ - Base class for time and date grains with comparison support. + Represents a time grain (e.g., day, month, year). Attributes: name: Human-readable name of the grain (e.g., "Second") - representation: ISO 8601 representation (e.g., "PT1S") - value: Time period as a timedelta + representation: ISO 8601 duration (e.g., "PT1S", "P1D", "P1M") """ name: str representation: str - value: timedelta + + def __post_init__(self) -> None: + isodate.parse_duration(self.representation) def __eq__(self, other: object) -> bool: if isinstance(other, Grain): - return self.value == other.value - return NotImplemented - - def __lt__(self, other: object) -> bool: - if isinstance(other, Grain): - return self.value < other.value + return self.representation == other.representation return NotImplemented def __hash__(self) -> int: - return hash((self.name, self.representation, self.value)) + return hash(self.representation) -class Second(Grain): - name = "Second" - representation = "PT1S" - value = timedelta(seconds=1) +class Grains: + """Pre-defined common grains and factory for custom ones.""" + SECOND = Grain("Second", "PT1S") + MINUTE = Grain("Minute", "PT1M") + HOUR = Grain("Hour", "PT1H") + DAY = Grain("Day", "P1D") + WEEK = Grain("Week", "P1W") + MONTH = Grain("Month", "P1M") + QUARTER = Grain("Quarter", "P3M") + YEAR = Grain("Year", "P1Y") -class Minute(Grain): - name = "Minute" - representation = "PT1M" - value = timedelta(minutes=1) + _REGISTRY: dict[str, Grain] = { + "PT1S": SECOND, + "PT1M": MINUTE, + "PT1H": HOUR, + "P1D": DAY, + "P1W": WEEK, + "P1M": MONTH, + "P3M": QUARTER, + "P1Y": YEAR, + } - -class Hour(Grain): - name = "Hour" - representation = "PT1H" - value = timedelta(hours=1) - - -class Day(Grain): - name = "Day" - representation = "P1D" - value = timedelta(days=1) - - -class Week(Grain): - name = "Week" - representation = "P1W" - value = timedelta(weeks=1) - - -class Month(Grain): - name = "Month" - representation = "P1M" - value = timedelta(days=30) - - -class Quarter(Grain): - name = "Quarter" - representation = "P3M" - value = timedelta(days=90) - - -class Year(Grain): - name = "Year" - representation = "P1Y" - value = timedelta(days=365) + @classmethod + def get(cls, representation: str, name: str | None = None) -> Grain: + """Return a pre-defined grain or create a custom one.""" + if grain := cls._REGISTRY.get(representation): + return grain + return Grain(name or representation, representation) @dataclass(frozen=True) class Dimension: id: str name: str - type: TypeOf[Type] + type: pa.DataType definition: str | None = None description: str | None = None - grain: TypeOf[Grain] | None = None + grain: Grain | None = None @dataclass(frozen=True) class Metric: id: str name: str - type: TypeOf[Type] + type: pa.DataType definition: str | None = None description: str | None = None diff --git a/superset/semantic_layers/mapper.py b/superset/semantic_layers/mapper.py index 66c86cf6d4e..f2c13865cdf 100644 --- a/superset/semantic_layers/mapper.py +++ b/superset/semantic_layers/mapper.py @@ -28,30 +28,24 @@ from datetime import datetime, timedelta from time import time from typing import Any, cast, Sequence, TypeGuard +import isodate import numpy as np import pyarrow as pa from superset_core.semantic_layers.types import ( AdhocExpression, - Day, Dimension, Filter, FilterValues, Grain, + Grains, GroupLimit, - Hour, Metric, - Minute, - Month, Operator, OrderDirection, OrderTuple, PredicateType, - Quarter, - Second, SemanticQuery, SemanticResult, - Week, - Year, ) from superset_core.semantic_layers.view import SemanticViewFeature @@ -746,25 +740,14 @@ def _get_group_limit_filters( return filters if filters else None -def _convert_time_grain(time_grain: str) -> type[Grain] | None: +def _convert_time_grain(time_grain: str) -> Grain | None: """ - Convert a time grain string from the query object to a Grain enum. + Convert a time grain string (ISO 8601 duration) to a Grain instance. """ - mapping = { - grain.representation: grain - for grain in [ - Second, - Minute, - Hour, - Day, - Week, - Month, - Quarter, - Year, - ] - } - - return mapping.get(time_grain) + try: + return Grains.get(time_grain) + except (ValueError, isodate.ISO8601Error): + return None def validate_query_object( diff --git a/tests/unit_tests/semantic_layers/mapper_test.py b/tests/unit_tests/semantic_layers/mapper_test.py index 1d9028ee41f..59587722065 100644 --- a/tests/unit_tests/semantic_layers/mapper_test.py +++ b/tests/unit_tests/semantic_layers/mapper_test.py @@ -24,28 +24,18 @@ import pytest from pytest_mock import MockerFixture from superset_core.semantic_layers.types import ( AdhocExpression, - Day, Dimension, Filter, Grain, + Grains, GroupLimit, - Hour, - INTEGER, Metric, - Minute, - Month, - NUMBER, Operator, OrderDirection, PredicateType, - Quarter, - Second, SemanticQuery, SemanticRequest, SemanticResult, - STRING, - Week, - Year, ) from superset_core.semantic_layers.view import SemanticViewFeature @@ -113,21 +103,21 @@ def mock_datasource(mocker: MockerFixture) -> MagicMock: time_dim = Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), description="Order date", definition="order_date", ) category_dim = Dimension( id="products.category", name="category", - type=STRING, + type=pa.utf8(), description="Product category", definition="category", ) region_dim = Dimension( id="customers.region", name="region", - type=STRING, + type=pa.utf8(), description="Customer region", definition="region", ) @@ -136,14 +126,14 @@ def mock_datasource(mocker: MockerFixture) -> MagicMock: sales_metric = Metric( id="orders.total_sales", name="total_sales", - type=NUMBER, + type=pa.float64(), definition="SUM(amount)", description="Total sales", ) count_metric = Metric( id="orders.order_count", name="order_count", - type=INTEGER, + type=pa.int64(), definition="COUNT(*)", description="Order count", ) @@ -169,14 +159,14 @@ def mock_datasource(mocker: MockerFixture) -> MagicMock: @pytest.mark.parametrize( "input_grain, expected_grain", [ - ("PT1S", Second), - ("PT1M", Minute), - ("PT1H", Hour), - ("P1D", Day), - ("P1W", Week), - ("P1M", Month), - ("P1Y", Year), - ("P3M", Quarter), + ("PT1S", Grains.SECOND), + ("PT1M", Grains.MINUTE), + ("PT1H", Grains.HOUR), + ("P1D", Grains.DAY), + ("P1W", Grains.WEEK), + ("P1M", Grains.MONTH), + ("P1Y", Grains.YEAR), + ("P3M", Grains.QUARTER), ("INVALID", None), ("", None), ], @@ -855,7 +845,7 @@ def test_map_query_object_basic(mock_datasource: MagicMock) -> None: Metric( id="orders.total_sales", name="total_sales", - type=NUMBER, + type=pa.float64(), definition="SUM(amount)", description="Total sales", ), @@ -864,7 +854,7 @@ def test_map_query_object_basic(mock_datasource: MagicMock) -> None: Dimension( id="products.category", name="category", - type=STRING, + type=pa.utf8(), definition="category", description="Product category", grain=None, @@ -876,7 +866,7 @@ def test_map_query_object_basic(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -889,7 +879,7 @@ def test_map_query_object_basic(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -930,7 +920,7 @@ def test_map_query_object_with_time_offsets(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -943,7 +933,7 @@ def test_map_query_object_with_time_offsets(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -958,7 +948,7 @@ def test_map_query_object_with_time_offsets(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -971,7 +961,7 @@ def test_map_query_object_with_time_offsets(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -986,7 +976,7 @@ def test_map_query_object_with_time_offsets(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -999,7 +989,7 @@ def test_map_query_object_with_time_offsets(mock_datasource: MagicMock) -> None: column=Dimension( id="orders.order_date", name="order_date", - type=STRING, + type=pa.utf8(), definition="order_date", description="Order date", grain=None, @@ -1111,9 +1101,11 @@ def test_validate_query_object_group_limit_not_supported_error( Test validation error when group limit not supported. """ mock_datasource = mocker.Mock() - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") - sales_metric = Metric("total_sales", "total_sales", NUMBER, "SUM(amount)", "Sales") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") + sales_metric = Metric( + "total_sales", "total_sales", pa.float64(), "SUM(amount)", "Sales" + ) mock_datasource.implementation.dimensions = {time_dim, category_dim} mock_datasource.implementation.metrics = {sales_metric} @@ -1168,7 +1160,7 @@ def test_convert_query_object_filter( Test filter with different operators. """ all_dimensions = { - "category": Dimension("category", "category", STRING, "category", "Category") + "category": Dimension("category", "category", pa.utf8(), "category", "Category") } filter_: ValidatedQueryObjectFilterClause = { @@ -1193,7 +1185,7 @@ def test_convert_query_object_filter_like() -> None: """ Test filter with LIKE operator. """ - all_dimensions = {"name": Dimension("name", "name", STRING, "name", "Name")} + all_dimensions = {"name": Dimension("name", "name", pa.utf8(), "name", "Name")} filter_: ValidatedQueryObjectFilterClause = { "op": "LIKE", @@ -1905,7 +1897,7 @@ def test_convert_query_object_filter_temporal_range_with_value() -> None: """ all_dimensions = { "order_date": Dimension( - "order_date", "order_date", STRING, "order_date", "Order date" + "order_date", "order_date", pa.utf8(), "order_date", "Order date" ) } filter_: ValidatedQueryObjectFilterClause = { @@ -2113,8 +2105,10 @@ def test_validate_metrics_adhoc_error( Test validation error for adhoc metrics. """ mock_datasource = mocker.Mock() - category_dim = Dimension("category", "category", STRING, "category", "Category") - sales_metric = Metric("total_sales", "total_sales", NUMBER, "SUM(amount)", "Sales") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") + sales_metric = Metric( + "total_sales", "total_sales", pa.float64(), "SUM(amount)", "Sales" + ) mock_datasource.implementation.dimensions = {category_dim} mock_datasource.implementation.metrics = {sales_metric} @@ -2235,9 +2229,11 @@ def test_validate_query_object_group_others_not_supported_error( Test validation error when group_others feature not supported. """ mock_datasource = mocker.Mock() - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") - sales_metric = Metric("total_sales", "total_sales", NUMBER, "SUM(amount)", "Sales") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") + sales_metric = Metric( + "total_sales", "total_sales", pa.float64(), "SUM(amount)", "Sales" + ) mock_datasource.implementation.dimensions = {time_dim, category_dim} mock_datasource.implementation.metrics = {sales_metric} @@ -2268,8 +2264,10 @@ def test_validate_query_object_adhoc_orderby_not_supported_error( Test validation error when adhoc expressions in orderby not supported. """ mock_datasource = mocker.Mock() - category_dim = Dimension("category", "category", STRING, "category", "Category") - sales_metric = Metric("total_sales", "total_sales", NUMBER, "SUM(amount)", "Sales") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") + sales_metric = Metric( + "total_sales", "total_sales", pa.float64(), "SUM(amount)", "Sales" + ) mock_datasource.implementation.dimensions = {category_dim} mock_datasource.implementation.metrics = {sales_metric} @@ -2353,8 +2351,8 @@ def test_get_filters_from_query_object_with_filter_loop( Test _get_filters_from_query_object processes filter array correctly. """ # Create dimensions - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"order_date": time_dim, "category": category_dim} # Create mock query object with filters @@ -2403,7 +2401,7 @@ def test_convert_query_object_filter_temporal_range_non_string_value() -> None: """ all_dimensions = { "order_date": Dimension( - "order_date", "order_date", STRING, "order_date", "Order date" + "order_date", "order_date", pa.utf8(), "order_date", "Order date" ) } filter_: ValidatedQueryObjectFilterClause = { @@ -2425,8 +2423,8 @@ def test_get_group_limit_filters_with_filter_loop( Test _get_group_limit_filters processes filter array correctly. """ # Create dimensions - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"order_date": time_dim, "category": category_dim} # Create mock query object with filters @@ -2488,7 +2486,9 @@ def test_validate_granularity_valid(mocker: MockerFixture) -> None: """ mock_datasource = mocker.Mock() - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date", Day) + time_dim = Dimension( + "order_date", "order_date", pa.utf8(), "order_date", "Date", Grains.DAY + ) mock_datasource.implementation.dimensions = {time_dim} @@ -2507,8 +2507,10 @@ def test_validate_group_limit_valid(mocker: MockerFixture) -> None: """ mock_datasource = mocker.Mock() - category_dim = Dimension("category", "category", STRING, "category", "Category") - sales_metric = Metric("total_sales", "total_sales", NUMBER, "SUM(amount)", "Sales") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") + sales_metric = Metric( + "total_sales", "total_sales", pa.float64(), "SUM(amount)", "Sales" + ) mock_datasource.implementation.dimensions = {category_dim} mock_datasource.implementation.metrics = {sales_metric} @@ -2535,8 +2537,8 @@ def test_get_filters_from_query_object_filter_returns_none( This covers the branch where the filter conversion fails and loop continues. """ # Create dimensions - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"order_date": time_dim, "category": category_dim} # Create mock query object with a filter that will return None @@ -2587,8 +2589,8 @@ def test_get_group_limit_filters_filter_returns_none( This covers the branch where the filter conversion fails and loop continues. """ # Create dimensions - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"order_date": time_dim, "category": category_dim} # Create mock query object with filters @@ -2664,8 +2666,8 @@ def test_get_group_limit_filters_granularity_missing_inner_from( Covers branch 704->729 where time_dimension exists but inner_from_dttm is None. """ # Create dimensions - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"order_date": time_dim, "category": category_dim} # Create mock query object with granularity but missing inner_from_dttm @@ -2693,8 +2695,8 @@ def test_get_group_limit_filters_granularity_missing_inner_to( Covers branch 704->729 where time_dimension exists but inner_to_dttm is None. """ # Create dimensions - time_dim = Dimension("order_date", "order_date", STRING, "order_date", "Date") - category_dim = Dimension("category", "category", STRING, "category", "Category") + time_dim = Dimension("order_date", "order_date", pa.utf8(), "order_date", "Date") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"order_date": time_dim, "category": category_dim} # Create mock query object with granularity but missing inner_to_dttm @@ -2722,7 +2724,7 @@ def test_get_group_limit_filters_no_granularity( This explicitly covers the branch 704->729 where granularity is Falsy. """ # Create dimensions - category_dim = Dimension("category", "category", STRING, "category", "Category") + category_dim = Dimension("category", "category", pa.utf8(), "category", "Category") all_dimensions = {"category": category_dim} # Create mock query object with no granularity