mirror of
https://github.com/apache/superset.git
synced 2026-06-14 12:09:14 +00:00
Compare commits
26 Commits
fix/deckgl
...
semantic-l
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
508aad1603 | ||
|
|
954cf32ca4 | ||
|
|
552c685a6b | ||
|
|
a26c91c4e2 | ||
|
|
3c8835bd75 | ||
|
|
955d8bc205 | ||
|
|
cd8e27d33c | ||
|
|
d0962bd32f | ||
|
|
28870168cd | ||
|
|
87d15d32c4 | ||
|
|
7d9a8a0c5a | ||
|
|
ddba88ffad | ||
|
|
1e50422a66 | ||
|
|
246dbd7f5c | ||
|
|
9b861b2848 | ||
|
|
1c35c3f6d0 | ||
|
|
b71654877f | ||
|
|
cd447ca1fd | ||
|
|
01ac966b83 | ||
|
|
97e5f0631d | ||
|
|
b7acb7984f | ||
|
|
d3919cf24f | ||
|
|
27889651b3 | ||
|
|
361fe6fe89 | ||
|
|
8506d70242 | ||
|
|
00a53eec2d |
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@@ -20,7 +20,7 @@
|
||||
|
||||
# Notify PMC members of changes to GitHub Actions
|
||||
|
||||
/.github/ @villebro @geido @eschutho @rusackas @betodealmeida @nytai @mistercrunch @craig-rueda @kgabryje @dpgaspar @sadpandajoe
|
||||
/.github/ @villebro @geido @eschutho @rusackas @betodealmeida @nytai @mistercrunch @craig-rueda @kgabryje @dpgaspar @sadpandajoe @hainenber
|
||||
|
||||
# Notify PMC members of changes to required GitHub Actions
|
||||
|
||||
|
||||
2
.github/workflows/ephemeral-env-pr-close.yml
vendored
2
.github/workflows/ephemeral-env-pr-close.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v5
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
|
||||
4
.github/workflows/ephemeral-env.yml
vendored
4
.github/workflows/ephemeral-env.yml
vendored
@@ -189,7 +189,7 @@ jobs:
|
||||
--extra-flags "--build-arg INCLUDE_CHROMIUM=false"
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v5
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
@@ -225,7 +225,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v5
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
|
||||
@@ -52,6 +52,7 @@ jobs:
|
||||
SUPERSET_SECRET_KEY: not-a-secret
|
||||
run: |
|
||||
pytest --durations-min=0.5 --cov=superset/sql/ ./tests/unit_tests/sql/ --cache-clear --cov-fail-under=100
|
||||
pytest --durations-min=0.5 --cov=superset/semantic_layers/ ./tests/unit_tests/semantic_layers/ --cache-clear --cov-fail-under=100
|
||||
- name: Upload code coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
|
||||
@@ -159,8 +159,8 @@ services:
|
||||
SCARF_ANALYTICS: "${SCARF_ANALYTICS:-}"
|
||||
# configuring the dev-server to use the host.docker.internal to connect to the backend
|
||||
superset: "http://superset-light:8088"
|
||||
# Webpack dev server configuration
|
||||
WEBPACK_DEVSERVER_HOST: "${WEBPACK_DEVSERVER_HOST:-127.0.0.1}"
|
||||
# Webpack dev server must bind to 0.0.0.0 to be accessible from outside the container
|
||||
WEBPACK_DEVSERVER_HOST: "${WEBPACK_DEVSERVER_HOST:-0.0.0.0}"
|
||||
WEBPACK_DEVSERVER_PORT: "${WEBPACK_DEVSERVER_PORT:-9000}"
|
||||
ports:
|
||||
- "${NODE_PORT:-9001}:9000" # Parameterized port, accessible on all interfaces
|
||||
|
||||
@@ -175,7 +175,7 @@ services:
|
||||
SCARF_ANALYTICS: "${SCARF_ANALYTICS:-}"
|
||||
# configuring the dev-server to use the host.docker.internal to connect to the backend
|
||||
superset: "http://superset:8088"
|
||||
# Bind to all interfaces so Docker port mapping works
|
||||
# Webpack dev server must bind to 0.0.0.0 to be accessible from outside the container
|
||||
WEBPACK_DEVSERVER_HOST: "0.0.0.0"
|
||||
ports:
|
||||
- "127.0.0.1:${NODE_PORT:-9000}:9000" # exposing the dynamic webpack dev server
|
||||
|
||||
@@ -105,7 +105,12 @@ class CeleryConfig:
|
||||
|
||||
CELERY_CONFIG = CeleryConfig
|
||||
|
||||
FEATURE_FLAGS = {"ALERT_REPORTS": True, "DATASET_FOLDERS": True}
|
||||
FEATURE_FLAGS = {
|
||||
"ALERT_REPORTS": True,
|
||||
"DATASET_FOLDERS": True,
|
||||
"ENABLE_EXTENSIONS": True,
|
||||
}
|
||||
EXTENSIONS_PATH = "/app/docker/extensions"
|
||||
ALERT_REPORTS_NOTIFICATION_DRY_RUN = True
|
||||
WEBDRIVER_BASEURL = f"http://superset_app{os.environ.get('SUPERSET_APP_ROOT', '/')}/" # When using docker compose baseurl should be http://superset_nginx{ENV{BASEPATH}}/ # noqa: E501
|
||||
# The base URL for the email report hyperlinks.
|
||||
|
||||
@@ -66,7 +66,7 @@
|
||||
"@swc/core": "^1.15.11",
|
||||
"antd": "^6.2.3",
|
||||
"baseline-browser-mapping": "^2.9.19",
|
||||
"caniuse-lite": "^1.0.30001768",
|
||||
"caniuse-lite": "^1.0.30001769",
|
||||
"docusaurus-plugin-openapi-docs": "^4.6.0",
|
||||
"docusaurus-theme-openapi-docs": "^4.6.0",
|
||||
"js-yaml": "^4.1.1",
|
||||
@@ -76,7 +76,7 @@
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-github-btn": "^1.4.0",
|
||||
"react-resize-detector": "7.1.2",
|
||||
"react-resize-detector": "^9.1.1",
|
||||
"react-svg-pan-zoom": "^3.13.1",
|
||||
"react-table": "^7.8.0",
|
||||
"remark-import-partial": "^0.0.2",
|
||||
|
||||
3836
docs/yarn.lock
3836
docs/yarn.lock
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,114 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Protocol, runtime_checkable, TypeVar
|
||||
|
||||
from pydantic import BaseModel
|
||||
from superset_core.semantic_layers.semantic_view import SemanticView
|
||||
|
||||
ConfigT = TypeVar("ConfigT", bound=BaseModel, contravariant=True)
|
||||
SemanticViewT = TypeVar("SemanticViewT", bound="SemanticView")
|
||||
|
||||
|
||||
# TODO (betodealmeida): convert to ABC
|
||||
@runtime_checkable
|
||||
class SemanticLayer(Protocol[ConfigT, SemanticViewT]):
|
||||
"""
|
||||
A protocol for semantic layers.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_configuration(
|
||||
cls,
|
||||
configuration: dict[str, Any],
|
||||
) -> SemanticLayer[ConfigT, SemanticViewT]:
|
||||
"""
|
||||
Create a semantic layer from its configuration.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def get_configuration_schema(
|
||||
cls,
|
||||
configuration: ConfigT | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Get the JSON schema for the configuration needed to add the semantic layer.
|
||||
|
||||
A partial configuration `configuration` can be sent to improve the schema,
|
||||
allowing for progressive validation and better UX. For example, a semantic
|
||||
layer might require:
|
||||
|
||||
- auth information
|
||||
- a database
|
||||
|
||||
If the user provides the auth information, a client can send the partial
|
||||
configuration to this method, and the resulting JSON schema would include
|
||||
the list of databases the user has access to, allowing a dropdown to be
|
||||
populated.
|
||||
|
||||
The Snowflake semantic layer has an example implementation of this method, where
|
||||
database and schema names are populated based on the provided connection info.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def get_runtime_schema(
|
||||
cls,
|
||||
configuration: ConfigT,
|
||||
runtime_data: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Get the JSON schema for the runtime parameters needed to load semantic views.
|
||||
|
||||
This returns the schema needed to connect to a semantic view given the
|
||||
configuration for the semantic layer. For example, a semantic layer might
|
||||
be configured by:
|
||||
|
||||
- auth information
|
||||
- an optional database
|
||||
|
||||
If the user does not provide a database when creating the semantic layer, the
|
||||
runtime schema would require the database name to be provided before loading any
|
||||
semantic views. This allows users to create semantic layers that connect to a
|
||||
specific database (or project, account, etc.), or that allow users to select it
|
||||
at query time.
|
||||
|
||||
The Snowflake semantic layer has an example implementation of this method, where
|
||||
database and schema names are required if they were not provided in the initial
|
||||
configuration.
|
||||
"""
|
||||
|
||||
def get_semantic_views(
|
||||
self,
|
||||
runtime_configuration: dict[str, Any],
|
||||
) -> set[SemanticViewT]:
|
||||
"""
|
||||
Get the semantic views available in the semantic layer.
|
||||
|
||||
The runtime configuration can provide information like a given project or
|
||||
schema, used to restrict the semantic views returned.
|
||||
"""
|
||||
|
||||
def get_semantic_view(
|
||||
self,
|
||||
name: str,
|
||||
additional_configuration: dict[str, Any],
|
||||
) -> SemanticViewT:
|
||||
"""
|
||||
Get a specific semantic view by its name and additional configuration.
|
||||
"""
|
||||
106
superset-core/src/superset_core/semantic_layers/semantic_view.py
Normal file
106
superset-core/src/superset_core/semantic_layers/semantic_view.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocFilter,
|
||||
Dimension,
|
||||
Filter,
|
||||
GroupLimit,
|
||||
Metric,
|
||||
OrderTuple,
|
||||
SemanticResult,
|
||||
)
|
||||
|
||||
|
||||
# TODO (betodealmeida): move to the extension JSON
|
||||
class SemanticViewFeature(enum.Enum):
|
||||
"""
|
||||
Custom features supported by semantic layers.
|
||||
"""
|
||||
|
||||
ADHOC_EXPRESSIONS_IN_ORDERBY = "ADHOC_EXPRESSIONS_IN_ORDERBY"
|
||||
GROUP_LIMIT = "GROUP_LIMIT"
|
||||
GROUP_OTHERS = "GROUP_OTHERS"
|
||||
|
||||
|
||||
# TODO (betodealmeida): convert to ABC
|
||||
@runtime_checkable
|
||||
class SemanticView(Protocol):
|
||||
"""
|
||||
A protocol for semantic views.
|
||||
"""
|
||||
|
||||
features: frozenset[SemanticViewFeature]
|
||||
|
||||
def uid(self) -> str:
|
||||
"""
|
||||
Returns a unique identifier for the semantic view.
|
||||
"""
|
||||
|
||||
def get_dimensions(self) -> set[Dimension]:
|
||||
"""
|
||||
Get the dimensions defined in the semantic view.
|
||||
"""
|
||||
|
||||
def get_metrics(self) -> set[Metric]:
|
||||
"""
|
||||
Get the metrics defined in the semantic view.
|
||||
"""
|
||||
|
||||
def get_values(
|
||||
self,
|
||||
dimension: Dimension,
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
) -> SemanticResult:
|
||||
"""
|
||||
Return distinct values for a dimension.
|
||||
"""
|
||||
|
||||
def get_dataframe(
|
||||
self,
|
||||
metrics: list[Metric],
|
||||
dimensions: list[Dimension],
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
order: list[OrderTuple] | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
*,
|
||||
group_limit: GroupLimit | None = None,
|
||||
) -> SemanticResult:
|
||||
"""
|
||||
Execute a semantic query and return the results as a DataFrame.
|
||||
"""
|
||||
|
||||
def get_row_count(
|
||||
self,
|
||||
metrics: list[Metric],
|
||||
dimensions: list[Dimension],
|
||||
filters: set[Filter | AdhocFilter] | None = None,
|
||||
order: list[OrderTuple] | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
*,
|
||||
group_limit: GroupLimit | None = None,
|
||||
) -> SemanticResult:
|
||||
"""
|
||||
Execute a query and return the number of rows the result would have.
|
||||
"""
|
||||
335
superset-core/src/superset_core/semantic_layers/types.py
Normal file
335
superset-core/src/superset_core/semantic_layers/types.py
Normal file
@@ -0,0 +1,335 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, time, timedelta
|
||||
from functools import total_ordering
|
||||
from typing import Type as TypeOf
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
__all__ = [
|
||||
"BINARY",
|
||||
"BOOLEAN",
|
||||
"DATE",
|
||||
"DATETIME",
|
||||
"DECIMAL",
|
||||
"Day",
|
||||
"Dimension",
|
||||
"Hour",
|
||||
"INTEGER",
|
||||
"INTERVAL",
|
||||
"Minute",
|
||||
"Month",
|
||||
"NUMBER",
|
||||
"OBJECT",
|
||||
"Quarter",
|
||||
"Second",
|
||||
"STRING",
|
||||
"TIME",
|
||||
"Week",
|
||||
"Year",
|
||||
]
|
||||
|
||||
|
||||
class Type:
|
||||
"""
|
||||
Base class for types.
|
||||
"""
|
||||
|
||||
|
||||
class INTEGER(Type):
|
||||
"""
|
||||
Represents an integer type.
|
||||
"""
|
||||
|
||||
|
||||
class NUMBER(Type):
|
||||
"""
|
||||
Represents a number type.
|
||||
"""
|
||||
|
||||
|
||||
class DECIMAL(Type):
|
||||
"""
|
||||
Represents a decimal type.
|
||||
"""
|
||||
|
||||
|
||||
class STRING(Type):
|
||||
"""
|
||||
Represents a string type.
|
||||
"""
|
||||
|
||||
|
||||
class BOOLEAN(Type):
|
||||
"""
|
||||
Represents a boolean type.
|
||||
"""
|
||||
|
||||
|
||||
class DATE(Type):
|
||||
"""
|
||||
Represents a date type.
|
||||
"""
|
||||
|
||||
|
||||
class TIME(Type):
|
||||
"""
|
||||
Represents a time type.
|
||||
"""
|
||||
|
||||
|
||||
class DATETIME(DATE, TIME):
|
||||
"""
|
||||
Represents a datetime type.
|
||||
"""
|
||||
|
||||
|
||||
class INTERVAL(Type):
|
||||
"""
|
||||
Represents an interval type.
|
||||
"""
|
||||
|
||||
|
||||
class OBJECT(Type):
|
||||
"""
|
||||
Represents an object type.
|
||||
"""
|
||||
|
||||
|
||||
class BINARY(Type):
|
||||
"""
|
||||
Represents a binary type.
|
||||
"""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@total_ordering
|
||||
class Grain:
|
||||
"""
|
||||
Base class for time and date grains with comparison support.
|
||||
|
||||
Attributes:
|
||||
name: Human-readable name of the grain (e.g., "Second")
|
||||
representation: ISO 8601 representation (e.g., "PT1S")
|
||||
value: Time period as a timedelta
|
||||
"""
|
||||
|
||||
name: str
|
||||
representation: str
|
||||
value: timedelta
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if isinstance(other, Grain):
|
||||
return self.value == other.value
|
||||
return NotImplemented
|
||||
|
||||
def __lt__(self, other: object) -> bool:
|
||||
if isinstance(other, Grain):
|
||||
return self.value < other.value
|
||||
return NotImplemented
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.name, self.representation, self.value))
|
||||
|
||||
|
||||
class Second(Grain):
|
||||
name = "Second"
|
||||
representation = "PT1S"
|
||||
value = timedelta(seconds=1)
|
||||
|
||||
|
||||
class Minute(Grain):
|
||||
name = "Minute"
|
||||
representation = "PT1M"
|
||||
value = timedelta(minutes=1)
|
||||
|
||||
|
||||
class Hour(Grain):
|
||||
name = "Hour"
|
||||
representation = "PT1H"
|
||||
value = timedelta(hours=1)
|
||||
|
||||
|
||||
class Day(Grain):
|
||||
name = "Day"
|
||||
representation = "P1D"
|
||||
value = timedelta(days=1)
|
||||
|
||||
|
||||
class Week(Grain):
|
||||
name = "Week"
|
||||
representation = "P1W"
|
||||
value = timedelta(weeks=1)
|
||||
|
||||
|
||||
class Month(Grain):
|
||||
name = "Month"
|
||||
representation = "P1M"
|
||||
value = timedelta(days=30)
|
||||
|
||||
|
||||
class Quarter(Grain):
|
||||
name = "Quarter"
|
||||
representation = "P3M"
|
||||
value = timedelta(days=90)
|
||||
|
||||
|
||||
class Year(Grain):
|
||||
name = "Year"
|
||||
representation = "P1Y"
|
||||
value = timedelta(days=365)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Dimension:
|
||||
id: str
|
||||
name: str
|
||||
type: TypeOf[Type]
|
||||
|
||||
definition: str | None = None
|
||||
description: str | None = None
|
||||
grain: TypeOf[Grain] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Metric:
|
||||
id: str
|
||||
name: str
|
||||
type: TypeOf[Type]
|
||||
|
||||
definition: str
|
||||
description: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AdhocExpression:
|
||||
id: str
|
||||
definition: str
|
||||
|
||||
|
||||
class Operator(str, enum.Enum):
|
||||
EQUALS = "="
|
||||
NOT_EQUALS = "!="
|
||||
GREATER_THAN = ">"
|
||||
LESS_THAN = "<"
|
||||
GREATER_THAN_OR_EQUAL = ">="
|
||||
LESS_THAN_OR_EQUAL = "<="
|
||||
IN = "IN"
|
||||
NOT_IN = "NOT IN"
|
||||
LIKE = "LIKE"
|
||||
NOT_LIKE = "NOT LIKE"
|
||||
IS_NULL = "IS NULL"
|
||||
IS_NOT_NULL = "IS NOT NULL"
|
||||
|
||||
|
||||
FilterValues = str | int | float | bool | datetime | date | time | timedelta | None
|
||||
|
||||
|
||||
class PredicateType(enum.Enum):
|
||||
WHERE = "WHERE"
|
||||
HAVING = "HAVING"
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True)
|
||||
class Filter:
|
||||
type: PredicateType
|
||||
column: Dimension | Metric
|
||||
operator: Operator
|
||||
value: FilterValues | frozenset[FilterValues]
|
||||
|
||||
|
||||
# TODO (betodealmeida): convert into Operator:
|
||||
# Filter(type=..., column=None, operator=Operator.AdHoc, value="some definition")
|
||||
@dataclass(frozen=True, order=True)
|
||||
class AdhocFilter:
|
||||
type: PredicateType
|
||||
definition: str
|
||||
|
||||
|
||||
class OrderDirection(enum.Enum):
|
||||
ASC = "ASC"
|
||||
DESC = "DESC"
|
||||
|
||||
|
||||
OrderTuple = tuple[Metric | Dimension | AdhocExpression, OrderDirection]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GroupLimit:
|
||||
"""
|
||||
Limit query to top/bottom N combinations of specified dimensions.
|
||||
|
||||
The `filters` parameter allows specifying separate filter constraints for the
|
||||
group limit subquery. This is useful when you want to determine the top N groups
|
||||
using different criteria (e.g., a different time range) than the main query.
|
||||
|
||||
For example, you might want to find the top 10 products by sales over the last
|
||||
30 days, but then show daily sales for those products over the last 7 days.
|
||||
"""
|
||||
|
||||
dimensions: list[Dimension]
|
||||
top: int
|
||||
metric: Metric | None
|
||||
direction: OrderDirection = OrderDirection.DESC
|
||||
group_others: bool = False
|
||||
filters: set[Filter | AdhocFilter] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SemanticRequest:
|
||||
"""
|
||||
Represents a request made to obtain semantic results.
|
||||
|
||||
This could be a SQL query, an HTTP request, etc.
|
||||
"""
|
||||
|
||||
type: str
|
||||
definition: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SemanticResult:
|
||||
"""
|
||||
Represents the results of a semantic query.
|
||||
|
||||
This includes any requests (SQL queries, HTTP requests) that were performed in order
|
||||
to obtain the results, in order to help troubleshooting.
|
||||
"""
|
||||
|
||||
requests: list[SemanticRequest]
|
||||
# TODO (betodealmeida): convert to PyArrow Table
|
||||
results: DataFrame
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SemanticQuery:
|
||||
"""
|
||||
Represents a semantic query.
|
||||
"""
|
||||
|
||||
metrics: list[Metric]
|
||||
dimensions: list[Dimension]
|
||||
filters: set[Filter | AdhocFilter] | None = None
|
||||
order: list[OrderTuple] | None = None
|
||||
limit: int | None = None
|
||||
offset: int | None = None
|
||||
group_limit: GroupLimit | None = None
|
||||
1114
superset-embedded-sdk/package-lock.json
generated
1114
superset-embedded-sdk/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -54,7 +54,6 @@ module.exports = {
|
||||
['@babel/plugin-transform-runtime', { corejs: 3 }],
|
||||
// only used in packages/superset-ui-core/src/chart/components/reactify.tsx
|
||||
['babel-plugin-typescript-to-proptypes', { loose: true }],
|
||||
'react-hot-loader/babel',
|
||||
[
|
||||
'@emotion/babel-plugin',
|
||||
{
|
||||
|
||||
212
superset-frontend/package-lock.json
generated
212
superset-frontend/package-lock.json
generated
@@ -79,7 +79,7 @@
|
||||
"geostyler-openlayers-parser": "^4.3.0",
|
||||
"geostyler-style": "7.5.0",
|
||||
"geostyler-wfs-parser": "^2.0.3",
|
||||
"googleapis": "^171.2.0",
|
||||
"googleapis": "^171.4.0",
|
||||
"immer": "^11.1.3",
|
||||
"interweave": "^13.1.1",
|
||||
"jquery": "^4.0.0",
|
||||
@@ -89,7 +89,7 @@
|
||||
"json-stringify-pretty-compact": "^2.0.0",
|
||||
"lodash": "^4.17.23",
|
||||
"mapbox-gl": "^3.18.1",
|
||||
"markdown-to-jsx": "^9.6.1",
|
||||
"markdown-to-jsx": "^9.7.3",
|
||||
"match-sorter": "^6.3.4",
|
||||
"memoize-one": "^5.2.1",
|
||||
"mousetrap": "^1.6.5",
|
||||
@@ -107,13 +107,12 @@
|
||||
"react-dnd-html5-backend": "^11.1.3",
|
||||
"react-dom": "^17.0.2",
|
||||
"react-google-recaptcha": "^3.1.0",
|
||||
"react-hot-loader": "^4.13.1",
|
||||
"react-intersection-observer": "^10.0.2",
|
||||
"react-json-tree": "^0.20.0",
|
||||
"react-lines-ellipsis": "^0.16.1",
|
||||
"react-loadable": "^5.5.0",
|
||||
"react-redux": "^7.2.9",
|
||||
"react-resize-detector": "^7.1.2",
|
||||
"react-resize-detector": "^9.1.1",
|
||||
"react-reverse-portal": "^2.3.0",
|
||||
"react-router-dom": "^5.3.4",
|
||||
"react-search-input": "^0.11.3",
|
||||
@@ -144,9 +143,9 @@
|
||||
"@applitools/eyes-storybook": "^3.63.10",
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/compat-data": "^7.28.4",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/eslint-parser": "^7.28.6",
|
||||
"@babel/node": "^7.28.6",
|
||||
"@babel/node": "^7.29.0",
|
||||
"@babel/plugin-syntax-dynamic-import": "^7.8.3",
|
||||
"@babel/plugin-transform-export-namespace-from": "^7.27.1",
|
||||
"@babel/plugin-transform-modules-commonjs": "^7.28.6",
|
||||
@@ -161,7 +160,6 @@
|
||||
"@cypress/react": "^8.0.2",
|
||||
"@emotion/babel-plugin": "^11.13.5",
|
||||
"@emotion/jest": "^11.14.2",
|
||||
"@hot-loader/react-dom": "^17.0.2",
|
||||
"@istanbuljs/nyc-config-typescript": "^1.0.1",
|
||||
"@mihkeleidast/storybook-addon-source": "^1.0.1",
|
||||
"@playwright/test": "^1.58.1",
|
||||
@@ -192,7 +190,7 @@
|
||||
"@types/js-levenshtein": "^1.1.3",
|
||||
"@types/json-bigint": "^1.0.4",
|
||||
"@types/mousetrap": "^1.6.15",
|
||||
"@types/node": "^25.1.0",
|
||||
"@types/node": "^25.2.1",
|
||||
"@types/react": "^17.0.83",
|
||||
"@types/react-dom": "^17.0.26",
|
||||
"@types/react-loadable": "^5.5.11",
|
||||
@@ -1184,21 +1182,21 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/core": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.6.tgz",
|
||||
"integrity": "sha512-H3mcG6ZDLTlYfaSNi0iOKkigqMFvkTKlGUYlD8GW7nNOYRrevuA46iTypPyv+06V3fEmvvazfntkBU34L0azAw==",
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz",
|
||||
"integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.28.6",
|
||||
"@babel/generator": "^7.28.6",
|
||||
"@babel/code-frame": "^7.29.0",
|
||||
"@babel/generator": "^7.29.0",
|
||||
"@babel/helper-compilation-targets": "^7.28.6",
|
||||
"@babel/helper-module-transforms": "^7.28.6",
|
||||
"@babel/helpers": "^7.28.6",
|
||||
"@babel/parser": "^7.28.6",
|
||||
"@babel/parser": "^7.29.0",
|
||||
"@babel/template": "^7.28.6",
|
||||
"@babel/traverse": "^7.28.6",
|
||||
"@babel/types": "^7.28.6",
|
||||
"@babel/traverse": "^7.29.0",
|
||||
"@babel/types": "^7.29.0",
|
||||
"@jridgewell/remapping": "^2.3.5",
|
||||
"convert-source-map": "^2.0.0",
|
||||
"debug": "^4.1.0",
|
||||
@@ -1596,15 +1594,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/node": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/node/-/node-7.28.6.tgz",
|
||||
"integrity": "sha512-X3i8Zq/3Au5of74DzuYfgKS2oKRrYGcd5O5QlrB7AqspYKWy0XRTG69HDYnower3HRH/lL38IYAZVPh3OQMXVg==",
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/node/-/node-7.29.0.tgz",
|
||||
"integrity": "sha512-9UeU8F3rx2lOZXneEW2HTnTYdA8+fXP0kr54tk7d0fPomWNlZ6WJ2H9lunr5dSvr8FNY0CDnop3Km6jZ5NAUsQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/register": "^7.28.6",
|
||||
"commander": "^6.2.0",
|
||||
"core-js": "^3.30.2",
|
||||
"core-js": "^3.48.0",
|
||||
"node-environment-flags": "^1.0.5",
|
||||
"regenerator-runtime": "^0.14.0",
|
||||
"v8flags": "^3.1.1"
|
||||
@@ -5162,21 +5160,6 @@
|
||||
"@hapi/hoek": "^11.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@hot-loader/react-dom": {
|
||||
"version": "17.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@hot-loader/react-dom/-/react-dom-17.0.2.tgz",
|
||||
"integrity": "sha512-G2RZrFhsQClS+bdDh/Ojpk3SgocLPUGnvnJDTQYnmKSSwXtU+Yh+8QMs+Ia3zaAvBiOSpIIDSUxuN69cvKqrWg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.1.0",
|
||||
"object-assign": "^4.1.1",
|
||||
"scheduler": "^0.20.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "17.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@humanwhocodes/config-array": {
|
||||
"version": "0.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz",
|
||||
@@ -19949,9 +19932,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.0.tgz",
|
||||
"integrity": "sha512-DZ8VwRFUNzuqJ5khrvwMXHmvPe+zGayJhr2CDNiKB1WBE1ST8Djl00D0IC4vvNmHMdj6DlbYRIaFE7WHjlDl5w==",
|
||||
"version": "25.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.1.tgz",
|
||||
"integrity": "sha512-CPrnr8voK8vC6eEtyRzvMpgp3VyVRhgclonE7qYi6P9sXwYb59ucfrnmFBTaP0yUi8Gk4yZg/LlTJULGxvTNsg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
@@ -29209,11 +29192,6 @@
|
||||
"jspdf": "^2.5.1"
|
||||
}
|
||||
},
|
||||
"node_modules/dom-walk": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/dom-walk/-/dom-walk-0.1.2.tgz",
|
||||
"integrity": "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w=="
|
||||
},
|
||||
"node_modules/domelementtype": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
|
||||
@@ -34387,16 +34365,6 @@
|
||||
"dev": true,
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/global": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmjs.org/global/-/global-4.4.0.tgz",
|
||||
"integrity": "sha512-wv/LAoHdRE3BeTGz53FAamhGlPLhlssK45usmGFThIi4XqnBmjKQ16u+RNbP7WvigRZDxUsM0J3gcQ5yicaL0w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-document": "^2.19.0",
|
||||
"process": "^0.11.10"
|
||||
}
|
||||
},
|
||||
"node_modules/global-directory": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz",
|
||||
@@ -34592,9 +34560,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/googleapis": {
|
||||
"version": "171.2.0",
|
||||
"resolved": "https://registry.npmjs.org/googleapis/-/googleapis-171.2.0.tgz",
|
||||
"integrity": "sha512-6VGd2GpMBKs/qEMRjPRMh/y55xM8mMRMBBWEEzNJRMRo4arYMem2w+Bncct1+Xl2xq6ftL75acWE3Qo52f1RHw==",
|
||||
"version": "171.4.0",
|
||||
"resolved": "https://registry.npmjs.org/googleapis/-/googleapis-171.4.0.tgz",
|
||||
"integrity": "sha512-xybFL2SmmUgIifgsbsRQYRdNrSAYwxWZDmkZTGjUIaRnX5jPqR8el/cEvo6rCqh7iaZx6MfEPS/lrDgZ0bymkg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"google-auth-library": "^10.2.0",
|
||||
@@ -41301,6 +41269,7 @@
|
||||
"version": "2.2.3",
|
||||
"resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
|
||||
"integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"json5": "lib/cli.js"
|
||||
@@ -42619,20 +42588,6 @@
|
||||
"url": "https://opencollective.com/webpack"
|
||||
}
|
||||
},
|
||||
"node_modules/loader-utils": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz",
|
||||
"integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"big.js": "^5.2.2",
|
||||
"emojis-list": "^3.0.0",
|
||||
"json5": "^2.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/locate-path": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
|
||||
@@ -43189,9 +43144,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/markdown-to-jsx": {
|
||||
"version": "9.6.1",
|
||||
"resolved": "https://registry.npmjs.org/markdown-to-jsx/-/markdown-to-jsx-9.6.1.tgz",
|
||||
"integrity": "sha512-FLZPygHQzEGNUwIJ3Egdf3Lzm5M4ebswji8aMfM5tAq1vxkSw31a7OBmYXA3tg264PPyJw5UMDHw1fU+wFaQ9Q==",
|
||||
"version": "9.7.3",
|
||||
"resolved": "https://registry.npmjs.org/markdown-to-jsx/-/markdown-to-jsx-9.7.3.tgz",
|
||||
"integrity": "sha512-F+1BmeeUKNM7K2eDDaAOyrs1iusNNKbt3YyxYP2Al1Dr1op6hpk3/6wukArwPWh9d9O0C2ybiCTXc6L5CwKIHQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
@@ -44118,15 +44073,6 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/min-document": {
|
||||
"version": "2.19.1",
|
||||
"resolved": "https://registry.npmjs.org/min-document/-/min-document-2.19.1.tgz",
|
||||
"integrity": "sha512-8lqe85PkqQJzIcs2iD7xW/WSxcncC3/DPVbTOafKNJDIMXwGfwXS350mH4SJslomntN2iYtFBuC0yNO3CEap6g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dom-walk": "^0.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/min-indent": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
|
||||
@@ -50035,35 +49981,6 @@
|
||||
"react": ">=16.4.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-hot-loader": {
|
||||
"version": "4.13.1",
|
||||
"resolved": "https://registry.npmjs.org/react-hot-loader/-/react-hot-loader-4.13.1.tgz",
|
||||
"integrity": "sha512-ZlqCfVRqDJmMXTulUGic4lN7Ic1SXgHAFw7y/Jb7t25GBgTR0fYAJ8uY4mrpxjRyWGWmqw77qJQGnYbzCvBU7g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fast-levenshtein": "^2.0.6",
|
||||
"global": "^4.3.0",
|
||||
"hoist-non-react-statics": "^3.3.0",
|
||||
"loader-utils": "^2.0.3",
|
||||
"prop-types": "^15.6.1",
|
||||
"react-lifecycles-compat": "^3.0.4",
|
||||
"shallowequal": "^1.1.0",
|
||||
"source-map": "^0.7.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "^15.0.0 || ^16.0.0 || ^17.0.0",
|
||||
"react": "^15.0.0 || ^16.0.0 || ^17.0.0",
|
||||
"react-dom": "^15.0.0 || ^16.0.0 || ^17.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/react-intersection-observer": {
|
||||
"version": "10.0.2",
|
||||
"resolved": "https://registry.npmjs.org/react-intersection-observer/-/react-intersection-observer-10.0.2.tgz",
|
||||
@@ -50099,12 +50016,6 @@
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-lifecycles-compat": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/react-lifecycles-compat/-/react-lifecycles-compat-3.0.4.tgz",
|
||||
"integrity": "sha512-fBASbA6LnOU9dOU2eW7aQ8xmYBSXUIWr+UmF9b1efZBazGNO+rcXT/icdKnYm2pTwcRylVUYwW7H1PHfLekVzA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/react-lines-ellipsis": {
|
||||
"version": "0.16.1",
|
||||
"resolved": "https://registry.npmjs.org/react-lines-ellipsis/-/react-lines-ellipsis-0.16.1.tgz",
|
||||
@@ -50305,9 +50216,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/react-resize-detector": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/react-resize-detector/-/react-resize-detector-7.1.2.tgz",
|
||||
"integrity": "sha512-zXnPJ2m8+6oq9Nn8zsep/orts9vQv3elrpA+R8XTcW7DVVUJ9vwDwMXaBtykAYjMnkCIaOoK9vObyR7ZgFNlOw==",
|
||||
"version": "9.1.1",
|
||||
"resolved": "https://registry.npmjs.org/react-resize-detector/-/react-resize-detector-9.1.1.tgz",
|
||||
"integrity": "sha512-siLzop7i4xIvZIACE/PHTvRegA8QRCEt0TfmvJ/qCIFQJ4U+3NuYcF8tNDmDWxfIn+X1eNCyY2rauH4KRxge8w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.21"
|
||||
@@ -53409,12 +53320,6 @@
|
||||
"integrity": "sha512-b6i4ZpVuUxB9h5gfCxPiusKYkqTMOjEbBs4wMaFbkfia4yFv92UKZ6Df8WXcKbn08JNL/abvg3FnMAOfakDvUw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/shallowequal": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/shallowequal/-/shallowequal-1.1.0.tgz",
|
||||
"integrity": "sha512-y0m1JoUZSlPAjXVtPPW70aZWfIL/dSP7AFkRnniLCrK/8MDKog3TySTBmckD+RObVxH0v4Tox67+F14PdED2oQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/shapefile": {
|
||||
"version": "0.3.1",
|
||||
"resolved": "https://registry.npmjs.org/shapefile/-/shapefile-0.3.1.tgz",
|
||||
@@ -61020,7 +60925,7 @@
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
@@ -63820,7 +63725,7 @@
|
||||
"@types/d3-time-format": "^4.0.3",
|
||||
"@types/jquery": "^3.5.33",
|
||||
"@types/lodash": "^4.17.23",
|
||||
"@types/node": "^25.1.0",
|
||||
"@types/node": "^25.2.1",
|
||||
"@types/prop-types": "^15.7.15",
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@types/react-table": "^7.7.20",
|
||||
@@ -64880,6 +64785,19 @@
|
||||
"react-dom": ">=17.0.0"
|
||||
}
|
||||
},
|
||||
"packages/superset-ui-core/node_modules/react-resize-detector": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/react-resize-detector/-/react-resize-detector-7.1.2.tgz",
|
||||
"integrity": "sha512-zXnPJ2m8+6oq9Nn8zsep/orts9vQv3elrpA+R8XTcW7DVVUJ9vwDwMXaBtykAYjMnkCIaOoK9vObyR7ZgFNlOw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.21"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.0.0 || ^17.0.0 || ^18.0.0",
|
||||
"react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0"
|
||||
}
|
||||
},
|
||||
"packages/superset-ui-core/node_modules/react-syntax-highlighter": {
|
||||
"version": "16.1.0",
|
||||
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-16.1.0.tgz",
|
||||
@@ -64998,7 +64916,7 @@
|
||||
"react-resizable": "^3.1.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
@@ -65901,7 +65819,7 @@
|
||||
"@luma.gl/core": "~9.2.2",
|
||||
"@luma.gl/engine": "~9.2.6",
|
||||
"@luma.gl/shadertools": "~9.2.6",
|
||||
"@luma.gl/webgl": "~9.2.2",
|
||||
"@luma.gl/webgl": "~9.2.6",
|
||||
"@mapbox/geojson-extent": "^1.0.1",
|
||||
"@mapbox/tiny-sdf": "^2.0.7",
|
||||
"@math.gl/web-mercator": "^4.1.0",
|
||||
@@ -66052,9 +65970,9 @@
|
||||
}
|
||||
},
|
||||
"plugins/legacy-preset-chart-deckgl/node_modules/@luma.gl/constants": {
|
||||
"version": "9.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@luma.gl/constants/-/constants-9.2.4.tgz",
|
||||
"integrity": "sha512-Cy0OAg3uJbbHhPJGeik6ZhII0EMokTXmo6MtP7dyUrS+pSG5N176G4WYD9zS4DaJm2cLUW4UJzsu5B4Cd8rBuw==",
|
||||
"version": "9.2.6",
|
||||
"resolved": "https://registry.npmjs.org/@luma.gl/constants/-/constants-9.2.6.tgz",
|
||||
"integrity": "sha512-rvFFrJuSm5JIWbDHFuR4Q2s4eudO3Ggsv0TsGKn9eqvO7bBiPm/ANugHredvh3KviEyYuMZZxtfJvBdr3kzldg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"plugins/legacy-preset-chart-deckgl/node_modules/@luma.gl/engine": {
|
||||
@@ -66105,6 +66023,20 @@
|
||||
"@luma.gl/core": "~9.2.0"
|
||||
}
|
||||
},
|
||||
"plugins/legacy-preset-chart-deckgl/node_modules/@luma.gl/webgl": {
|
||||
"version": "9.2.6",
|
||||
"resolved": "https://registry.npmjs.org/@luma.gl/webgl/-/webgl-9.2.6.tgz",
|
||||
"integrity": "sha512-NGBTdxJMk7j8Ygr1zuTyAvr1Tw+EpupMIQo7RelFjEsZXg6pujFqiDMM+rgxex8voCeuhWBJc7Rs+MoSqd46UQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@luma.gl/constants": "9.2.6",
|
||||
"@math.gl/types": "^4.1.0",
|
||||
"@probe.gl/env": "^4.0.8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@luma.gl/core": "~9.2.0"
|
||||
}
|
||||
},
|
||||
"plugins/legacy-preset-chart-deckgl/node_modules/@mapbox/tiny-sdf": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@mapbox/tiny-sdf/-/tiny-sdf-2.0.7.tgz",
|
||||
@@ -66400,20 +66332,6 @@
|
||||
"react-dom": "^17.0.2"
|
||||
}
|
||||
},
|
||||
"plugins/plugin-chart-pivot-table/node_modules/@babel/types": {
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
|
||||
"integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-string-parser": "^7.27.1",
|
||||
"@babel/helper-validator-identifier": "^7.28.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"plugins/plugin-chart-table": {
|
||||
"name": "@superset-ui/plugin-chart-table",
|
||||
"version": "0.20.3",
|
||||
|
||||
@@ -161,7 +161,7 @@
|
||||
"geostyler-openlayers-parser": "^4.3.0",
|
||||
"geostyler-style": "7.5.0",
|
||||
"geostyler-wfs-parser": "^2.0.3",
|
||||
"googleapis": "^171.2.0",
|
||||
"googleapis": "^171.4.0",
|
||||
"immer": "^11.1.3",
|
||||
"interweave": "^13.1.1",
|
||||
"jquery": "^4.0.0",
|
||||
@@ -171,7 +171,7 @@
|
||||
"json-stringify-pretty-compact": "^2.0.0",
|
||||
"lodash": "^4.17.23",
|
||||
"mapbox-gl": "^3.18.1",
|
||||
"markdown-to-jsx": "^9.6.1",
|
||||
"markdown-to-jsx": "^9.7.3",
|
||||
"match-sorter": "^6.3.4",
|
||||
"memoize-one": "^5.2.1",
|
||||
"mousetrap": "^1.6.5",
|
||||
@@ -189,13 +189,12 @@
|
||||
"react-dnd-html5-backend": "^11.1.3",
|
||||
"react-dom": "^17.0.2",
|
||||
"react-google-recaptcha": "^3.1.0",
|
||||
"react-hot-loader": "^4.13.1",
|
||||
"react-intersection-observer": "^10.0.2",
|
||||
"react-json-tree": "^0.20.0",
|
||||
"react-lines-ellipsis": "^0.16.1",
|
||||
"react-loadable": "^5.5.0",
|
||||
"react-redux": "^7.2.9",
|
||||
"react-resize-detector": "^7.1.2",
|
||||
"react-resize-detector": "^9.1.1",
|
||||
"react-reverse-portal": "^2.3.0",
|
||||
"react-router-dom": "^5.3.4",
|
||||
"react-search-input": "^0.11.3",
|
||||
@@ -226,9 +225,9 @@
|
||||
"@applitools/eyes-storybook": "^3.63.10",
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/compat-data": "^7.28.4",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/eslint-parser": "^7.28.6",
|
||||
"@babel/node": "^7.28.6",
|
||||
"@babel/node": "^7.29.0",
|
||||
"@babel/plugin-syntax-dynamic-import": "^7.8.3",
|
||||
"@babel/plugin-transform-export-namespace-from": "^7.27.1",
|
||||
"@babel/plugin-transform-modules-commonjs": "^7.28.6",
|
||||
@@ -243,7 +242,6 @@
|
||||
"@cypress/react": "^8.0.2",
|
||||
"@emotion/babel-plugin": "^11.13.5",
|
||||
"@emotion/jest": "^11.14.2",
|
||||
"@hot-loader/react-dom": "^17.0.2",
|
||||
"@istanbuljs/nyc-config-typescript": "^1.0.1",
|
||||
"@mihkeleidast/storybook-addon-source": "^1.0.1",
|
||||
"@playwright/test": "^1.58.1",
|
||||
@@ -274,7 +272,7 @@
|
||||
"@types/js-levenshtein": "^1.1.3",
|
||||
"@types/json-bigint": "^1.0.4",
|
||||
"@types/mousetrap": "^1.6.15",
|
||||
"@types/node": "^25.1.0",
|
||||
"@types/node": "^25.2.1",
|
||||
"@types/react": "^17.0.83",
|
||||
"@types/react-dom": "^17.0.26",
|
||||
"@types/react-loadable": "^5.5.11",
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
|
||||
@@ -78,7 +78,7 @@
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@types/jquery": "^3.5.33",
|
||||
"@types/lodash": "^4.17.23",
|
||||
"@types/node": "^25.1.0",
|
||||
"@types/node": "^25.2.1",
|
||||
"@types/prop-types": "^15.7.15",
|
||||
"@types/rison": "0.1.0",
|
||||
"@types/seedrandom": "^3.0.8",
|
||||
|
||||
@@ -19,16 +19,25 @@
|
||||
|
||||
import { DatasourceType } from './types/Datasource';
|
||||
|
||||
const DATASOURCE_TYPE_MAP: Record<string, DatasourceType> = {
|
||||
table: DatasourceType.Table,
|
||||
query: DatasourceType.Query,
|
||||
dataset: DatasourceType.Dataset,
|
||||
sl_table: DatasourceType.SlTable,
|
||||
saved_query: DatasourceType.SavedQuery,
|
||||
semantic_view: DatasourceType.SemanticView,
|
||||
};
|
||||
|
||||
export default class DatasourceKey {
|
||||
readonly id: number;
|
||||
readonly id: number | string;
|
||||
|
||||
readonly type: DatasourceType;
|
||||
|
||||
constructor(key: string) {
|
||||
const [idStr, typeStr] = key.split('__');
|
||||
this.id = parseInt(idStr, 10);
|
||||
this.type = DatasourceType.Table; // default to SqlaTable model
|
||||
this.type = typeStr === 'query' ? DatasourceType.Query : this.type;
|
||||
const isNumeric = /^\d+$/.test(idStr);
|
||||
this.id = isNumeric ? parseInt(idStr, 10) : idStr;
|
||||
this.type = DATASOURCE_TYPE_MAP[typeStr] ?? DatasourceType.Table;
|
||||
}
|
||||
|
||||
public toString() {
|
||||
|
||||
@@ -26,6 +26,7 @@ export enum DatasourceType {
|
||||
Dataset = 'dataset',
|
||||
SlTable = 'sl_table',
|
||||
SavedQuery = 'saved_query',
|
||||
SemanticView = 'semantic_view',
|
||||
}
|
||||
|
||||
export interface Currency {
|
||||
@@ -37,7 +38,7 @@ export interface Currency {
|
||||
* Datasource metadata.
|
||||
*/
|
||||
export interface Datasource {
|
||||
id: number;
|
||||
id: number | string;
|
||||
name: string;
|
||||
type: DatasourceType;
|
||||
columns: Column[];
|
||||
|
||||
@@ -159,7 +159,7 @@ export interface QueryObject
|
||||
|
||||
export interface QueryContext {
|
||||
datasource: {
|
||||
id: number;
|
||||
id: number | string;
|
||||
type: DatasourceType;
|
||||
};
|
||||
/** Force refresh of all queries */
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
"react-resizable": "^3.1.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
"@luma.gl/core": "~9.2.2",
|
||||
"@luma.gl/engine": "~9.2.6",
|
||||
"@luma.gl/shadertools": "~9.2.6",
|
||||
"@luma.gl/webgl": "~9.2.2",
|
||||
"@luma.gl/webgl": "~9.2.6",
|
||||
"@mapbox/tiny-sdf": "^2.0.7",
|
||||
"@mapbox/geojson-extent": "^1.0.1",
|
||||
"@math.gl/web-mercator": "^4.1.0",
|
||||
|
||||
@@ -35,7 +35,7 @@ import {
|
||||
getStandardizedControls,
|
||||
} from '@superset-ui/chart-controls';
|
||||
import { DEFAULT_FORM_DATA } from './types';
|
||||
import { LABEL_POSITION } from '../constants';
|
||||
import { LabelPositionEnum } from '../types';
|
||||
import { legendSection } from '../controls';
|
||||
|
||||
const { labelType, labelPosition, numberFormat, showLabels, isCircle } =
|
||||
@@ -72,6 +72,22 @@ const radarMetricMinValue: { name: string; config: ControlFormItemSpec } = {
|
||||
},
|
||||
};
|
||||
|
||||
const getLabelPositionOptions = (): [LabelPositionEnum, string][] => [
|
||||
[LabelPositionEnum.Top, t('Top')],
|
||||
[LabelPositionEnum.Left, t('Left')],
|
||||
[LabelPositionEnum.Right, t('Right')],
|
||||
[LabelPositionEnum.Bottom, t('Bottom')],
|
||||
[LabelPositionEnum.Inside, t('Inside')],
|
||||
[LabelPositionEnum.InsideLeft, t('Inside left')],
|
||||
[LabelPositionEnum.InsideRight, t('Inside right')],
|
||||
[LabelPositionEnum.InsideTop, t('Inside top')],
|
||||
[LabelPositionEnum.InsideBottom, t('Inside bottom')],
|
||||
[LabelPositionEnum.InsideTopLeft, t('Inside top left')],
|
||||
[LabelPositionEnum.InsideBottomLeft, t('Inside bottom left')],
|
||||
[LabelPositionEnum.InsideTopRight, t('Inside top right')],
|
||||
[LabelPositionEnum.InsideBottomRight, t('Inside bottom right')],
|
||||
];
|
||||
|
||||
const config: ControlPanelConfig = {
|
||||
controlPanelSections: [
|
||||
{
|
||||
@@ -136,7 +152,7 @@ const config: ControlPanelConfig = {
|
||||
freeForm: false,
|
||||
label: t('Label position'),
|
||||
renderTrigger: true,
|
||||
choices: LABEL_POSITION,
|
||||
choices: getLabelPositionOptions(),
|
||||
default: labelPosition,
|
||||
description: D3_FORMAT_DOCS,
|
||||
},
|
||||
|
||||
@@ -21,7 +21,6 @@ import { t } from '@apache-superset/core';
|
||||
import { JsonValue, TimeGranularity } from '@superset-ui/core';
|
||||
import { ReactNode } from 'react';
|
||||
import {
|
||||
LabelPositionEnum,
|
||||
LegendFormData,
|
||||
LegendOrientation,
|
||||
LegendType,
|
||||
@@ -50,22 +49,6 @@ export const TIMESERIES_CONSTANTS = {
|
||||
horizontalBarLabelRightPadding: 70,
|
||||
};
|
||||
|
||||
export const LABEL_POSITION: [LabelPositionEnum, string][] = [
|
||||
[LabelPositionEnum.Top, 'Top'],
|
||||
[LabelPositionEnum.Left, 'Left'],
|
||||
[LabelPositionEnum.Right, 'Right'],
|
||||
[LabelPositionEnum.Bottom, 'Bottom'],
|
||||
[LabelPositionEnum.Inside, 'Inside'],
|
||||
[LabelPositionEnum.InsideLeft, 'Inside left'],
|
||||
[LabelPositionEnum.InsideRight, 'Inside right'],
|
||||
[LabelPositionEnum.InsideTop, 'Inside top'],
|
||||
[LabelPositionEnum.InsideBottom, 'Inside bottom'],
|
||||
[LabelPositionEnum.InsideTopLeft, 'Inside top left'],
|
||||
[LabelPositionEnum.InsideBottomLeft, 'Inside bottom left'],
|
||||
[LabelPositionEnum.InsideTopRight, 'Inside top right'],
|
||||
[LabelPositionEnum.InsideBottomRight, 'Inside bottom right'],
|
||||
];
|
||||
|
||||
export enum OpacityEnum {
|
||||
Transparent = 0,
|
||||
SemiTransparent = 0.3,
|
||||
|
||||
@@ -90,11 +90,15 @@ const ModalFooter = ({ formData, closeModal }: ModalFooterProps) => {
|
||||
findPermission('can_explore', 'Superset', state.user?.roles),
|
||||
);
|
||||
|
||||
const [datasource_id, datasource_type] = formData.datasource.split('__');
|
||||
const [datasourceIdStr, datasource_type] = formData.datasource.split('__');
|
||||
const isNumeric = /^\d+$/.test(datasourceIdStr);
|
||||
const datasource_id = isNumeric
|
||||
? parseInt(datasourceIdStr, 10)
|
||||
: datasourceIdStr;
|
||||
useEffect(() => {
|
||||
// short circuit if the user is embedded as explore is not available
|
||||
if (isEmbedded()) return;
|
||||
postFormData(Number(datasource_id), datasource_type, formData, 0)
|
||||
postFormData(datasource_id, datasource_type, formData, 0)
|
||||
.then(key => {
|
||||
setUrl(
|
||||
`/explore/?form_data_key=${key}&dashboard_page_id=${dashboardPageId}`,
|
||||
|
||||
@@ -272,7 +272,7 @@ export type Slice = {
|
||||
changed_on: number;
|
||||
changed_on_humanized: string;
|
||||
modified: string;
|
||||
datasource_id: number;
|
||||
datasource_id: number | string;
|
||||
datasource_type: DatasourceType;
|
||||
datasource_url: string;
|
||||
datasource_name: string;
|
||||
|
||||
@@ -144,12 +144,13 @@ export const getSlicePayload = async (
|
||||
...adhocFilters,
|
||||
dashboards,
|
||||
};
|
||||
let datasourceId = 0;
|
||||
let datasourceId: number | string = 0;
|
||||
let datasourceType: DatasourceType = DatasourceType.Table;
|
||||
|
||||
if (formData.datasource) {
|
||||
const [id, typeString] = formData.datasource.split('__');
|
||||
datasourceId = parseInt(id, 10);
|
||||
const isNumeric = /^\d+$/.test(id);
|
||||
datasourceId = isNumeric ? parseInt(id, 10) : id;
|
||||
|
||||
const formattedTypeString =
|
||||
typeString.charAt(0).toUpperCase() + typeString.slice(1);
|
||||
|
||||
@@ -20,7 +20,7 @@ import { SupersetClient, JsonObject, JsonResponse } from '@superset-ui/core';
|
||||
import { sanitizeFormData } from 'src/utils/sanitizeFormData';
|
||||
|
||||
type Payload = {
|
||||
datasource_id: number;
|
||||
datasource_id: number | string;
|
||||
datasource_type: string;
|
||||
form_data: string;
|
||||
chart_id?: number;
|
||||
@@ -36,7 +36,7 @@ const assembleEndpoint = (key?: string, tabId?: string) => {
|
||||
};
|
||||
|
||||
const assemblePayload = (
|
||||
datasourceId: number,
|
||||
datasourceId: number | string,
|
||||
datasourceType: string,
|
||||
formData: JsonObject,
|
||||
chartId?: number,
|
||||
@@ -53,7 +53,7 @@ const assemblePayload = (
|
||||
};
|
||||
|
||||
export const postFormData = (
|
||||
datasourceId: number,
|
||||
datasourceId: number | string,
|
||||
datasourceType: string,
|
||||
formData: JsonObject,
|
||||
chartId?: number,
|
||||
@@ -70,7 +70,7 @@ export const postFormData = (
|
||||
}).then((r: JsonResponse) => r.json.key);
|
||||
|
||||
export const putFormData = (
|
||||
datasourceId: number,
|
||||
datasourceId: number | string,
|
||||
datasourceType: string,
|
||||
key: string,
|
||||
formData: JsonObject,
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import { setConfig as setHotLoaderConfig } from 'react-hot-loader';
|
||||
import { configure, LanguagePack } from '@apache-superset/core/ui';
|
||||
import { makeApi, initFeatureFlags, SupersetClient } from '@superset-ui/core';
|
||||
import { extendedDayjs as dayjs } from '@superset-ui/core/utils/dates';
|
||||
@@ -40,11 +39,6 @@ import 'dayjs/plugin/localizedFormat';
|
||||
|
||||
configure();
|
||||
|
||||
// Set hot reloader config
|
||||
if (process.env.WEBPACK_MODE === 'development') {
|
||||
setHotLoaderConfig({ logLevel: 'debug', trackTailUpdates: false });
|
||||
}
|
||||
|
||||
// Grab initial bootstrap data
|
||||
const bootstrapData = getBootstrapData();
|
||||
|
||||
|
||||
@@ -17,3 +17,9 @@
|
||||
* under the License.
|
||||
*/
|
||||
import 'src/public-path';
|
||||
|
||||
// Accept HMR updates for this entry point
|
||||
declare const module: { hot?: { accept: () => void } };
|
||||
if (module.hot) {
|
||||
module.hot.accept();
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
* under the License.
|
||||
*/
|
||||
import { Suspense, useEffect } from 'react';
|
||||
import { hot } from 'react-hot-loader/root';
|
||||
import {
|
||||
BrowserRouter as Router,
|
||||
Switch,
|
||||
@@ -110,4 +109,4 @@ const App = () => (
|
||||
</Router>
|
||||
);
|
||||
|
||||
export default hot(App);
|
||||
export default App;
|
||||
|
||||
@@ -239,16 +239,10 @@ if (!isDevMode) {
|
||||
);
|
||||
}
|
||||
|
||||
const PREAMBLE = [path.join(APP_DIR, '/src/preamble.ts')];
|
||||
if (isDevMode) {
|
||||
// A Superset webpage normally includes two JS bundles in dev, `theme.ts` and
|
||||
// the main entrypoint. Only the main entry should have the dev server client,
|
||||
// otherwise the websocket client will initialize twice, creating two sockets.
|
||||
// Ref: https://github.com/gaearon/react-hot-loader/issues/141
|
||||
PREAMBLE.unshift(
|
||||
`webpack-dev-server/client?http://localhost:${devserverPort}`,
|
||||
);
|
||||
}
|
||||
// In dev mode, include theme.ts in preamble to avoid separate chunk HMR issues
|
||||
const PREAMBLE = isDevMode
|
||||
? [path.join(APP_DIR, 'src/theme.ts'), path.join(APP_DIR, 'src/preamble.ts')]
|
||||
: [path.join(APP_DIR, 'src/preamble.ts')];
|
||||
|
||||
function addPreamble(entry) {
|
||||
return PREAMBLE.concat([path.join(APP_DIR, entry)]);
|
||||
@@ -316,18 +310,29 @@ function createSwcLoader(syntax = 'typescript', tsx = true) {
|
||||
const config = {
|
||||
entry: {
|
||||
preamble: PREAMBLE,
|
||||
theme: path.join(APP_DIR, '/src/theme.ts'),
|
||||
// In dev mode, theme is included in preamble to avoid separate chunk HMR issues
|
||||
...(isDevMode ? {} : { theme: path.join(APP_DIR, 'src/theme.ts') }),
|
||||
menu: addPreamble('src/views/menu.tsx'),
|
||||
spa: addPreamble('/src/views/index.tsx'),
|
||||
embedded: addPreamble('/src/embedded/index.tsx'),
|
||||
spa: addPreamble('src/views/index.tsx'),
|
||||
embedded: addPreamble('src/embedded/index.tsx'),
|
||||
'service-worker': path.join(APP_DIR, 'src/service-worker.ts'),
|
||||
},
|
||||
cache: {
|
||||
type: 'filesystem', // Enable filesystem caching
|
||||
type: 'filesystem',
|
||||
cacheDirectory: path.resolve(__dirname, '.temp_cache'),
|
||||
// Separate cache for dev vs prod builds
|
||||
name: `${isDevMode ? 'development' : 'production'}-cache`,
|
||||
// Invalidate cache when these files change
|
||||
buildDependencies: {
|
||||
config: [__filename],
|
||||
config: [
|
||||
__filename,
|
||||
path.resolve(__dirname, 'package-lock.json'),
|
||||
path.resolve(__dirname, 'babel.config.js'),
|
||||
path.resolve(__dirname, 'tsconfig.json'),
|
||||
],
|
||||
},
|
||||
// Compress cache for smaller disk usage (slight CPU tradeoff)
|
||||
compression: isDevMode ? false : 'gzip',
|
||||
},
|
||||
output,
|
||||
stats: 'minimal',
|
||||
@@ -377,11 +382,9 @@ const config = {
|
||||
'prop-types-extra',
|
||||
'redux',
|
||||
'react-redux',
|
||||
'react-hot-loader',
|
||||
'react-sortable-hoc',
|
||||
'react-table',
|
||||
'react-ace',
|
||||
'@hot-loader.*',
|
||||
'webpack.*',
|
||||
'@?babel.*',
|
||||
'lodash.*',
|
||||
@@ -490,7 +493,10 @@ const config = {
|
||||
{
|
||||
test: /\.tsx?$/,
|
||||
exclude: [/\.test.tsx?$/, /node_modules/],
|
||||
use: ['thread-loader', createSwcLoader('typescript', true)],
|
||||
// Skip thread-loader in dev mode - it breaks HMR by running in worker threads
|
||||
use: isDevMode
|
||||
? [createSwcLoader('typescript', true)]
|
||||
: ['thread-loader', createSwcLoader('typescript', true)],
|
||||
},
|
||||
{
|
||||
test: /\.jsx?$/,
|
||||
@@ -647,10 +653,12 @@ if (isDevMode) {
|
||||
|
||||
config.devServer = {
|
||||
devMiddleware: {
|
||||
publicPath: '/static/assets/',
|
||||
writeToDisk: true,
|
||||
},
|
||||
historyApiFallback: true,
|
||||
hot: true,
|
||||
hot: 'only', // HMR only, no page reload fallback
|
||||
liveReload: false,
|
||||
host: devserverHost,
|
||||
port: devserverPort,
|
||||
allowedHosts: [
|
||||
@@ -670,7 +678,7 @@ if (isDevMode) {
|
||||
warnings: false,
|
||||
runtimeErrors: error => !/ResizeObserver/.test(error.message),
|
||||
},
|
||||
logging: 'error',
|
||||
logging: 'info', // Show HMR messages
|
||||
webSocketURL: {
|
||||
hostname: '0.0.0.0',
|
||||
pathname: '/ws',
|
||||
|
||||
@@ -50,6 +50,7 @@ function toDevHTML(originalHtml) {
|
||||
/(<head>\s*<title>)([\s\S]*)(<\/title>)/i,
|
||||
'$1[DEV] $2 $3',
|
||||
);
|
||||
|
||||
if (manifest) {
|
||||
const loaded = new Set();
|
||||
// replace bundled asset files, HTML comment tags generated by Jinja macros
|
||||
@@ -153,7 +154,17 @@ function processHTML(proxyResponse, response) {
|
||||
module.exports = newManifest => {
|
||||
manifest = newManifest;
|
||||
return {
|
||||
context: '/',
|
||||
context: path => {
|
||||
// Don't proxy hot update files - webpack-dev-server needs to serve these directly for HMR
|
||||
if (path.includes('.hot-update.')) {
|
||||
return false;
|
||||
}
|
||||
// Don't proxy WebSocket connections for HMR
|
||||
if (path === '/ws') {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
target: backend,
|
||||
hostRewrite: true,
|
||||
changeOrigin: true,
|
||||
@@ -163,6 +174,9 @@ module.exports = newManifest => {
|
||||
try {
|
||||
copyHeaders(proxyResponse, response);
|
||||
if (isHTML(response)) {
|
||||
// For HTML responses, flush headers before processing starts
|
||||
// processHTML sets up async handlers that will call response.end()
|
||||
response.flushHeaders();
|
||||
processHTML(proxyResponse, response);
|
||||
} else {
|
||||
const isCSV = (proxyResponse.headers['content-type'] || '').includes(
|
||||
@@ -170,6 +184,7 @@ module.exports = newManifest => {
|
||||
);
|
||||
|
||||
if (isCSV) {
|
||||
response.flushHeaders();
|
||||
proxyResponse.on('data', chunk => {
|
||||
response.write(chunk);
|
||||
if (response.flush) {
|
||||
@@ -183,12 +198,15 @@ module.exports = newManifest => {
|
||||
response.end();
|
||||
});
|
||||
} else {
|
||||
response.flushHeaders();
|
||||
proxyResponse.pipe(response);
|
||||
}
|
||||
}
|
||||
response.flushHeaders();
|
||||
} catch (e) {
|
||||
response.setHeader('content-type', 'text/plain');
|
||||
// Only try to set headers if they haven't been sent yet
|
||||
if (!response.headersSent) {
|
||||
response.setHeader('content-type', 'text/plain');
|
||||
}
|
||||
response.write(`Error requesting ${request.path} from proxy:\n\n`);
|
||||
response.end(e.stack);
|
||||
}
|
||||
|
||||
14
superset-websocket/package-lock.json
generated
14
superset-websocket/package-lock.json
generated
@@ -25,7 +25,7 @@
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/jsonwebtoken": "^9.0.10",
|
||||
"@types/lodash": "^4.17.23",
|
||||
"@types/node": "^25.2.0",
|
||||
"@types/node": "^25.2.1",
|
||||
"@types/uuid": "^10.0.0",
|
||||
"@types/ws": "^8.18.1",
|
||||
"@typescript-eslint/eslint-plugin": "^8.54.0",
|
||||
@@ -1823,9 +1823,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.0.tgz",
|
||||
"integrity": "sha512-DZ8VwRFUNzuqJ5khrvwMXHmvPe+zGayJhr2CDNiKB1WBE1ST8Djl00D0IC4vvNmHMdj6DlbYRIaFE7WHjlDl5w==",
|
||||
"version": "25.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.1.tgz",
|
||||
"integrity": "sha512-CPrnr8voK8vC6eEtyRzvMpgp3VyVRhgclonE7qYi6P9sXwYb59ucfrnmFBTaP0yUi8Gk4yZg/LlTJULGxvTNsg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@@ -7944,9 +7944,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"@types/node": {
|
||||
"version": "25.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.0.tgz",
|
||||
"integrity": "sha512-DZ8VwRFUNzuqJ5khrvwMXHmvPe+zGayJhr2CDNiKB1WBE1ST8Djl00D0IC4vvNmHMdj6DlbYRIaFE7WHjlDl5w==",
|
||||
"version": "25.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.1.tgz",
|
||||
"integrity": "sha512-CPrnr8voK8vC6eEtyRzvMpgp3VyVRhgclonE7qYi6P9sXwYb59ucfrnmFBTaP0yUi8Gk4yZg/LlTJULGxvTNsg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"undici-types": "~7.16.0"
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/jsonwebtoken": "^9.0.10",
|
||||
"@types/lodash": "^4.17.23",
|
||||
"@types/node": "^25.2.0",
|
||||
"@types/node": "^25.2.1",
|
||||
"@types/uuid": "^10.0.0",
|
||||
"@types/ws": "^8.18.1",
|
||||
"@typescript-eslint/eslint-plugin": "^8.54.0",
|
||||
|
||||
@@ -18,6 +18,7 @@ import contextlib
|
||||
import logging
|
||||
from abc import ABC
|
||||
from typing import Any, cast, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from flask import request
|
||||
from flask_babel import lazy_gettext as _
|
||||
@@ -100,9 +101,12 @@ class GetExploreCommand(BaseCommand, ABC):
|
||||
use_slice_data=True,
|
||||
initial_form_data=initial_form_data,
|
||||
)
|
||||
ds_id: int | UUID | None = None
|
||||
try:
|
||||
self._datasource_id, self._datasource_type = get_datasource_info(
|
||||
self._datasource_id, self._datasource_type, form_data
|
||||
ds_id, self._datasource_type = get_datasource_info(
|
||||
self._datasource_id,
|
||||
self._datasource_type,
|
||||
form_data,
|
||||
)
|
||||
except SupersetException:
|
||||
self._datasource_id = None
|
||||
@@ -111,10 +115,11 @@ class GetExploreCommand(BaseCommand, ABC):
|
||||
|
||||
datasource: Optional[BaseDatasource] = None
|
||||
|
||||
if self._datasource_id is not None:
|
||||
if ds_id is not None:
|
||||
with contextlib.suppress(DatasourceNotFound):
|
||||
datasource = DatasourceDAO.get_datasource(
|
||||
cast(str, self._datasource_type), self._datasource_id
|
||||
cast(str, self._datasource_type),
|
||||
ds_id,
|
||||
)
|
||||
|
||||
datasource_name = _("[Missing Dataset]")
|
||||
@@ -124,7 +129,11 @@ class GetExploreCommand(BaseCommand, ABC):
|
||||
security_manager.raise_for_access(datasource=datasource)
|
||||
|
||||
viz_type = form_data.get("viz_type")
|
||||
if not viz_type and datasource and datasource.default_endpoint:
|
||||
if (
|
||||
not viz_type
|
||||
and datasource
|
||||
and getattr(datasource, "default_endpoint", None)
|
||||
):
|
||||
raise WrongEndpointError(redirect=datasource.default_endpoint)
|
||||
|
||||
form_data["datasource"] = (
|
||||
|
||||
@@ -14,14 +14,17 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CommandParameters:
|
||||
permalink_key: Optional[str]
|
||||
form_data_key: Optional[str]
|
||||
datasource_id: Optional[int]
|
||||
datasource_type: Optional[str]
|
||||
permalink_key: str | None
|
||||
form_data_key: str | None
|
||||
datasource_id: int | str | None
|
||||
datasource_type: str | None
|
||||
slice_id: Optional[int]
|
||||
|
||||
@@ -107,6 +107,8 @@ from superset.sql.parse import Table
|
||||
from superset.superset_typing import (
|
||||
AdhocColumn,
|
||||
AdhocMetric,
|
||||
DatasetColumnData,
|
||||
DatasetMetricData,
|
||||
ExplorableData,
|
||||
Metric,
|
||||
QueryObjectDict,
|
||||
@@ -463,8 +465,8 @@ class BaseDatasource(
|
||||
# sqla-specific
|
||||
"sql": self.sql,
|
||||
# one to many
|
||||
"columns": [o.data for o in self.columns],
|
||||
"metrics": [o.data for o in self.metrics],
|
||||
"columns": [cast(DatasetColumnData, o.data) for o in self.columns],
|
||||
"metrics": [cast(DatasetMetricData, o.data) for o in self.metrics],
|
||||
"folders": self.folders,
|
||||
# TODO deprecate, move logic to JS
|
||||
"order_by_choices": self.order_by_choices,
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Union
|
||||
from uuid import UUID
|
||||
|
||||
from superset import db
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
@@ -28,6 +28,7 @@ from superset.daos.exceptions import (
|
||||
DatasourceValueIsIncorrect,
|
||||
)
|
||||
from superset.models.sql_lab import Query, SavedQuery
|
||||
from superset.semantic_layers.models import SemanticView
|
||||
from superset.utils.core import DatasourceType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -40,13 +41,14 @@ class DatasourceDAO(BaseDAO[Datasource]):
|
||||
DatasourceType.TABLE: SqlaTable,
|
||||
DatasourceType.QUERY: Query,
|
||||
DatasourceType.SAVEDQUERY: SavedQuery,
|
||||
DatasourceType.SEMANTIC_VIEW: SemanticView,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_datasource(
|
||||
cls,
|
||||
datasource_type: Union[DatasourceType, str],
|
||||
database_id_or_uuid: int | str,
|
||||
database_id_or_uuid: int | str | UUID,
|
||||
) -> Datasource:
|
||||
if datasource_type not in cls.sources:
|
||||
raise DatasourceTypeNotSupportedError()
|
||||
@@ -57,7 +59,7 @@ class DatasourceDAO(BaseDAO[Datasource]):
|
||||
filter = model.id == int(database_id_or_uuid)
|
||||
else:
|
||||
try:
|
||||
uuid.UUID(str(database_id_or_uuid)) # uuid validation
|
||||
UUID(str(database_id_or_uuid)) # uuid validation
|
||||
filter = model.uuid == database_id_or_uuid
|
||||
except ValueError as err:
|
||||
logger.warning(
|
||||
|
||||
152
superset/daos/semantic_layer.py
Normal file
152
superset/daos/semantic_layer.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""DAOs for semantic layer models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from superset.daos.base import BaseDAO
|
||||
from superset.extensions import db
|
||||
from superset.semantic_layers.models import SemanticLayer, SemanticView
|
||||
|
||||
|
||||
class SemanticLayerDAO(BaseDAO[SemanticLayer]):
|
||||
"""
|
||||
Data Access Object for SemanticLayer model.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def validate_uniqueness(name: str) -> bool:
|
||||
"""
|
||||
Validate that semantic layer name is unique.
|
||||
|
||||
:param name: Semantic layer name
|
||||
:return: True if name is unique, False otherwise
|
||||
"""
|
||||
query = db.session.query(SemanticLayer).filter(SemanticLayer.name == name)
|
||||
return not db.session.query(query.exists()).scalar()
|
||||
|
||||
@staticmethod
|
||||
def validate_update_uniqueness(layer_uuid: str, name: str) -> bool:
|
||||
"""
|
||||
Validate that semantic layer name is unique for updates.
|
||||
|
||||
:param layer_uuid: UUID of the semantic layer being updated
|
||||
:param name: New name to validate
|
||||
:return: True if name is unique, False otherwise
|
||||
"""
|
||||
query = db.session.query(SemanticLayer).filter(
|
||||
SemanticLayer.name == name,
|
||||
SemanticLayer.uuid != layer_uuid,
|
||||
)
|
||||
return not db.session.query(query.exists()).scalar()
|
||||
|
||||
@staticmethod
|
||||
def find_by_name(name: str) -> SemanticLayer | None:
|
||||
"""
|
||||
Find semantic layer by name.
|
||||
|
||||
:param name: Semantic layer name
|
||||
:return: SemanticLayer instance or None
|
||||
"""
|
||||
return (
|
||||
db.session.query(SemanticLayer)
|
||||
.filter(SemanticLayer.name == name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_semantic_views(cls, layer_uuid: str) -> list[SemanticView]:
|
||||
"""
|
||||
Get all semantic views for a semantic layer.
|
||||
|
||||
:param layer_uuid: UUID of the semantic layer
|
||||
:return: List of SemanticView instances
|
||||
"""
|
||||
return (
|
||||
db.session.query(SemanticView)
|
||||
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
class SemanticViewDAO(BaseDAO[SemanticView]):
|
||||
"""Data Access Object for SemanticView model."""
|
||||
|
||||
@staticmethod
|
||||
def find_by_semantic_layer(layer_uuid: str) -> list[SemanticView]:
|
||||
"""
|
||||
Find all views for a semantic layer.
|
||||
|
||||
:param layer_uuid: UUID of the semantic layer
|
||||
:return: List of SemanticView instances
|
||||
"""
|
||||
return (
|
||||
db.session.query(SemanticView)
|
||||
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
|
||||
.all()
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def validate_uniqueness(name: str, layer_uuid: str) -> bool:
|
||||
"""
|
||||
Validate that view name is unique within semantic layer.
|
||||
|
||||
:param name: View name
|
||||
:param layer_uuid: UUID of the semantic layer
|
||||
:return: True if name is unique within layer, False otherwise
|
||||
"""
|
||||
query = db.session.query(SemanticView).filter(
|
||||
SemanticView.name == name,
|
||||
SemanticView.semantic_layer_uuid == layer_uuid,
|
||||
)
|
||||
return not db.session.query(query.exists()).scalar()
|
||||
|
||||
@staticmethod
|
||||
def validate_update_uniqueness(view_uuid: str, name: str, layer_uuid: str) -> bool:
|
||||
"""
|
||||
Validate that view name is unique within semantic layer for updates.
|
||||
|
||||
:param view_uuid: UUID of the view being updated
|
||||
:param name: New name to validate
|
||||
:param layer_uuid: UUID of the semantic layer
|
||||
:return: True if name is unique within layer, False otherwise
|
||||
"""
|
||||
query = db.session.query(SemanticView).filter(
|
||||
SemanticView.name == name,
|
||||
SemanticView.semantic_layer_uuid == layer_uuid,
|
||||
SemanticView.uuid != view_uuid,
|
||||
)
|
||||
return not db.session.query(query.exists()).scalar()
|
||||
|
||||
@staticmethod
|
||||
def find_by_name(name: str, layer_uuid: str) -> SemanticView | None:
|
||||
"""
|
||||
Find semantic view by name within a semantic layer.
|
||||
|
||||
:param name: View name
|
||||
:param layer_uuid: UUID of the semantic layer
|
||||
:return: SemanticView instance or None
|
||||
"""
|
||||
return (
|
||||
db.session.query(SemanticView)
|
||||
.filter(
|
||||
SemanticView.name == name,
|
||||
SemanticView.semantic_layer_uuid == layer_uuid,
|
||||
)
|
||||
.one_or_none()
|
||||
)
|
||||
@@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict):
|
||||
duration: str | None
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class MetricMetadata(Protocol):
|
||||
"""
|
||||
Protocol for metric metadata objects.
|
||||
|
||||
Represents a metric that's available on an explorable data source.
|
||||
Metrics contain SQL expressions or references to semantic layer measures.
|
||||
|
||||
Attributes:
|
||||
metric_name: Unique identifier for the metric
|
||||
expression: SQL expression or reference for calculating the metric
|
||||
verbose_name: Human-readable name for display in the UI
|
||||
description: Description of what the metric represents
|
||||
d3format: D3 format string for formatting numeric values
|
||||
currency: Currency configuration for the metric (JSON object)
|
||||
warning_text: Warning message to display when using this metric
|
||||
certified_by: Person or entity that certified this metric
|
||||
certification_details: Details about the certification
|
||||
"""
|
||||
|
||||
@property
|
||||
def metric_name(self) -> str:
|
||||
"""Unique identifier for the metric."""
|
||||
|
||||
@property
|
||||
def expression(self) -> str:
|
||||
"""SQL expression or reference for calculating the metric."""
|
||||
|
||||
@property
|
||||
def verbose_name(self) -> str | None:
|
||||
"""Human-readable name for display in the UI."""
|
||||
|
||||
@property
|
||||
def description(self) -> str | None:
|
||||
"""Description of what the metric represents."""
|
||||
|
||||
@property
|
||||
def d3format(self) -> str | None:
|
||||
"""D3 format string for formatting numeric values."""
|
||||
|
||||
@property
|
||||
def currency(self) -> dict[str, Any] | None:
|
||||
"""Currency configuration for the metric (JSON object)."""
|
||||
|
||||
@property
|
||||
def warning_text(self) -> str | None:
|
||||
"""Warning message to display when using this metric."""
|
||||
|
||||
@property
|
||||
def certified_by(self) -> str | None:
|
||||
"""Person or entity that certified this metric."""
|
||||
|
||||
@property
|
||||
def certification_details(self) -> str | None:
|
||||
"""Details about the certification."""
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class ColumnMetadata(Protocol):
|
||||
"""
|
||||
Protocol for column metadata objects.
|
||||
|
||||
Represents a column/dimension that's available on an explorable data source.
|
||||
Used for grouping, filtering, and dimension-based analysis.
|
||||
|
||||
Attributes:
|
||||
column_name: Unique identifier for the column
|
||||
type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME')
|
||||
is_dttm: Whether this column represents a date or time value
|
||||
verbose_name: Human-readable name for display in the UI
|
||||
description: Description of what the column represents
|
||||
groupby: Whether this column is allowed for grouping/aggregation
|
||||
filterable: Whether this column can be used in filters
|
||||
expression: SQL expression if this is a calculated column
|
||||
python_date_format: Python datetime format string for temporal columns
|
||||
advanced_data_type: Advanced data type classification
|
||||
extra: Additional metadata stored as JSON
|
||||
"""
|
||||
|
||||
@property
|
||||
def column_name(self) -> str:
|
||||
"""Unique identifier for the column."""
|
||||
|
||||
@property
|
||||
def type(self) -> str:
|
||||
"""SQL data type of the column."""
|
||||
|
||||
@property
|
||||
def is_dttm(self) -> bool:
|
||||
"""Whether this column represents a date or time value."""
|
||||
|
||||
@property
|
||||
def verbose_name(self) -> str | None:
|
||||
"""Human-readable name for display in the UI."""
|
||||
|
||||
@property
|
||||
def description(self) -> str | None:
|
||||
"""Description of what the column represents."""
|
||||
|
||||
@property
|
||||
def groupby(self) -> bool:
|
||||
"""Whether this column is allowed for grouping/aggregation."""
|
||||
|
||||
@property
|
||||
def filterable(self) -> bool:
|
||||
"""Whether this column can be used in filters."""
|
||||
|
||||
@property
|
||||
def expression(self) -> str | None:
|
||||
"""SQL expression if this is a calculated column."""
|
||||
|
||||
@property
|
||||
def python_date_format(self) -> str | None:
|
||||
"""Python datetime format string for temporal columns."""
|
||||
|
||||
@property
|
||||
def advanced_data_type(self) -> str | None:
|
||||
"""Advanced data type classification."""
|
||||
|
||||
@property
|
||||
def extra(self) -> str | None:
|
||||
"""Additional metadata stored as JSON."""
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Explorable(Protocol):
|
||||
"""
|
||||
@@ -132,7 +256,7 @@ class Explorable(Protocol):
|
||||
"""
|
||||
|
||||
@property
|
||||
def metrics(self) -> list[Any]:
|
||||
def metrics(self) -> list[MetricMetadata]:
|
||||
"""
|
||||
List of metric metadata objects.
|
||||
|
||||
@@ -147,7 +271,7 @@ class Explorable(Protocol):
|
||||
|
||||
# TODO: rename to dimensions
|
||||
@property
|
||||
def columns(self) -> list[Any]:
|
||||
def columns(self) -> list[ColumnMetadata]:
|
||||
"""
|
||||
List of column metadata objects.
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@ class ExploreRestApi(BaseSupersetApi):
|
||||
params = CommandParameters(
|
||||
permalink_key=request.args.get("permalink_key", type=str),
|
||||
form_data_key=request.args.get("form_data_key", type=str),
|
||||
datasource_id=request.args.get("datasource_id", type=int),
|
||||
datasource_id=request.args.get("datasource_id"),
|
||||
datasource_type=request.args.get("datasource_type", type=str),
|
||||
slice_id=request.args.get("slice_id", type=int),
|
||||
)
|
||||
|
||||
@@ -21,8 +21,6 @@ Pydantic schemas for chart-related responses
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Annotated, Any, Dict, List, Literal, Protocol
|
||||
|
||||
@@ -50,6 +48,10 @@ from superset.mcp_service.system.schemas import (
|
||||
TagInfo,
|
||||
UserInfo,
|
||||
)
|
||||
from superset.mcp_service.utils.sanitization import (
|
||||
sanitize_filter_value,
|
||||
sanitize_user_input,
|
||||
)
|
||||
|
||||
|
||||
class ChartLike(Protocol):
|
||||
@@ -357,113 +359,17 @@ class ColumnRef(BaseModel):
|
||||
@classmethod
|
||||
def sanitize_name(cls, v: str) -> str:
|
||||
"""Sanitize column name to prevent XSS and SQL injection."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Column name cannot be empty")
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 255:
|
||||
raise ValueError(
|
||||
f"Column name too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 255 characters."
|
||||
)
|
||||
|
||||
# Remove HTML tags and decode entities
|
||||
sanitized = html.escape(v.strip())
|
||||
|
||||
# Check for dangerous HTML tags using substring checks (safe)
|
||||
dangerous_tags = ["<script", "</script>", "<iframe", "<object", "<embed"]
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Column name contains potentially malicious script content"
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries to match only actual URLs
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Column name contains potentially malicious URL scheme")
|
||||
|
||||
# Basic SQL injection patterns (basic protection)
|
||||
# Use simple patterns without backtracking
|
||||
dangerous_patterns = [
|
||||
r"[;|&$`]", # Dangerous shell characters
|
||||
r"\b(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
r"--", # SQL comment
|
||||
r"/\*", # SQL comment start (just check for start, not full pattern)
|
||||
]
|
||||
|
||||
for pattern in dangerous_patterns:
|
||||
if re.search(pattern, v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Column name contains potentially unsafe characters or SQL keywords"
|
||||
)
|
||||
|
||||
return sanitized
|
||||
# sanitize_user_input raises ValueError when allow_empty=False (default)
|
||||
# so the return value is guaranteed to be a non-None str
|
||||
return sanitize_user_input(
|
||||
v, "Column name", max_length=255, check_sql_keywords=True
|
||||
) # type: ignore[return-value]
|
||||
|
||||
@field_validator("label")
|
||||
@classmethod
|
||||
def sanitize_label(cls, v: str | None) -> str | None:
|
||||
"""Sanitize display label to prevent XSS attacks."""
|
||||
if v is None:
|
||||
return v
|
||||
|
||||
# Strip whitespace
|
||||
v = v.strip()
|
||||
if not v:
|
||||
return None
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 500:
|
||||
raise ValueError(
|
||||
f"Label too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 500 characters."
|
||||
)
|
||||
|
||||
# Check for dangerous HTML tags and JavaScript protocols using substring checks
|
||||
# This avoids ReDoS vulnerabilities from regex patterns
|
||||
dangerous_tags = [
|
||||
"<script",
|
||||
"</script>",
|
||||
"<iframe",
|
||||
"</iframe>",
|
||||
"<object",
|
||||
"</object>",
|
||||
"<embed",
|
||||
"</embed>",
|
||||
"<link",
|
||||
"<meta",
|
||||
]
|
||||
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Label contains potentially malicious content. "
|
||||
"HTML tags, JavaScript, and event handlers are not allowed "
|
||||
"in labels."
|
||||
)
|
||||
|
||||
# Check URL schemes and event handlers with word boundaries
|
||||
dangerous_patterns = [
|
||||
r"\b(javascript|vbscript|data):", # URL schemes
|
||||
r"on\w+\s*=", # Event handlers
|
||||
]
|
||||
for pattern in dangerous_patterns:
|
||||
if re.search(pattern, v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Label contains potentially malicious content. "
|
||||
"HTML tags, JavaScript, and event handlers are not allowed."
|
||||
)
|
||||
|
||||
# Filter dangerous Unicode characters
|
||||
v = re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", v
|
||||
)
|
||||
|
||||
# HTML escape the cleaned content
|
||||
sanitized = html.escape(v)
|
||||
|
||||
return sanitized if sanitized else None
|
||||
return sanitize_user_input(v, "Label", max_length=500, allow_empty=True)
|
||||
|
||||
|
||||
class AxisConfig(BaseModel):
|
||||
@@ -496,112 +402,15 @@ class FilterConfig(BaseModel):
|
||||
@classmethod
|
||||
def sanitize_column(cls, v: str) -> str:
|
||||
"""Sanitize filter column name to prevent injection attacks."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Filter column name cannot be empty")
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 255:
|
||||
raise ValueError(
|
||||
f"Filter column name too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 255 characters."
|
||||
)
|
||||
|
||||
# Remove HTML tags and decode entities
|
||||
sanitized = html.escape(v.strip())
|
||||
|
||||
# Check for dangerous HTML tags using substring checks (safe)
|
||||
dangerous_tags = ["<script", "</script>"]
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Filter column contains potentially malicious script content"
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Filter column contains potentially malicious URL scheme")
|
||||
|
||||
return sanitized
|
||||
|
||||
@staticmethod
|
||||
def _validate_string_value(v: str) -> None:
|
||||
"""Validate string filter value for security issues."""
|
||||
# Check for dangerous HTML tags and SQL procedures
|
||||
dangerous_substrings = [
|
||||
"<script",
|
||||
"</script>",
|
||||
"<iframe",
|
||||
"<object",
|
||||
"<embed",
|
||||
"xp_cmdshell",
|
||||
"sp_executesql",
|
||||
]
|
||||
v_lower = v.lower()
|
||||
for substring in dangerous_substrings:
|
||||
if substring in v_lower:
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious content. "
|
||||
"HTML tags and JavaScript are not allowed."
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Filter value contains potentially malicious URL scheme")
|
||||
|
||||
# SQL injection patterns
|
||||
sql_patterns = [
|
||||
r";\s*(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
r"'\s*OR\s*'",
|
||||
r"'\s*AND\s*'",
|
||||
r"--\s*",
|
||||
r"/\*",
|
||||
r"UNION\s+SELECT",
|
||||
]
|
||||
for pattern in sql_patterns:
|
||||
if re.search(pattern, v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious SQL patterns."
|
||||
)
|
||||
|
||||
# Check for other dangerous patterns
|
||||
if re.search(r"[;&|`$()]", v):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially unsafe shell characters."
|
||||
)
|
||||
if re.search(r"on\w+\s*=", v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious event handlers."
|
||||
)
|
||||
if re.search(r"\\x[0-9a-fA-F]{2}", v):
|
||||
raise ValueError("Filter value contains hex encoding which is not allowed.")
|
||||
# sanitize_user_input raises ValueError when allow_empty=False (default)
|
||||
# so the return value is guaranteed to be a non-None str
|
||||
return sanitize_user_input(v, "Filter column", max_length=255) # type: ignore[return-value]
|
||||
|
||||
@field_validator("value")
|
||||
@classmethod
|
||||
def sanitize_value(cls, v: str | int | float | bool) -> str | int | float | bool:
|
||||
"""Sanitize filter value to prevent XSS and SQL injection attacks."""
|
||||
if isinstance(v, str):
|
||||
v = v.strip()
|
||||
|
||||
# Length check FIRST to prevent ReDoS attacks
|
||||
if len(v) > 1000:
|
||||
raise ValueError(
|
||||
f"Filter value too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 1000 characters."
|
||||
)
|
||||
|
||||
# Validate security
|
||||
cls._validate_string_value(v)
|
||||
|
||||
# Filter dangerous Unicode characters
|
||||
v = re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", v
|
||||
)
|
||||
|
||||
# HTML escape the cleaned content
|
||||
return html.escape(v)
|
||||
|
||||
return v # Return non-string values as-is
|
||||
return sanitize_filter_value(v, max_length=1000)
|
||||
|
||||
|
||||
# Actual chart types
|
||||
@@ -848,6 +657,11 @@ class ListChartsRequest(MetadataCacheControl):
|
||||
class GenerateChartRequest(QueryCacheControl):
|
||||
dataset_id: int | str = Field(..., description="Dataset identifier (ID, UUID)")
|
||||
config: ChartConfig = Field(..., description="Chart configuration")
|
||||
chart_name: str | None = Field(
|
||||
None,
|
||||
description="Custom chart name (optional, auto-generates if not provided)",
|
||||
max_length=255,
|
||||
)
|
||||
save_chart: bool = Field(
|
||||
default=False,
|
||||
description="Whether to permanently save the chart in Superset",
|
||||
@@ -861,6 +675,12 @@ class GenerateChartRequest(QueryCacheControl):
|
||||
description="List of preview formats to generate",
|
||||
)
|
||||
|
||||
@field_validator("chart_name")
|
||||
@classmethod
|
||||
def sanitize_chart_name(cls, v: str | None) -> str | None:
|
||||
"""Sanitize chart name to prevent XSS attacks."""
|
||||
return sanitize_user_input(v, "Chart name", max_length=255, allow_empty=True)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_cache_timeout(self) -> "GenerateChartRequest":
|
||||
"""Validate cache timeout is non-negative."""
|
||||
@@ -911,62 +731,7 @@ class UpdateChartRequest(QueryCacheControl):
|
||||
@classmethod
|
||||
def sanitize_chart_name(cls, v: str | None) -> str | None:
|
||||
"""Sanitize chart name to prevent XSS attacks."""
|
||||
if v is None:
|
||||
return v
|
||||
|
||||
# Strip whitespace
|
||||
v = v.strip()
|
||||
if not v:
|
||||
return None
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 255:
|
||||
raise ValueError(
|
||||
f"Chart name too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 255 characters."
|
||||
)
|
||||
|
||||
# Check for dangerous HTML tags using substring checks (safe)
|
||||
dangerous_tags = [
|
||||
"<script",
|
||||
"</script>",
|
||||
"<iframe",
|
||||
"</iframe>",
|
||||
"<object",
|
||||
"</object>",
|
||||
"<embed",
|
||||
"</embed>",
|
||||
"<link",
|
||||
"<meta",
|
||||
]
|
||||
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Chart name contains potentially malicious content. "
|
||||
"HTML tags and JavaScript are not allowed in chart names."
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Chart name contains potentially malicious URL scheme")
|
||||
|
||||
# Check for event handlers with simple regex
|
||||
if re.search(r"on\w+\s*=", v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Chart name contains potentially malicious event handlers."
|
||||
)
|
||||
|
||||
# Filter dangerous Unicode characters
|
||||
v = re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", v
|
||||
)
|
||||
|
||||
# HTML escape the cleaned content
|
||||
sanitized = html.escape(v)
|
||||
|
||||
return sanitized if sanitized else None
|
||||
return sanitize_user_input(v, "Chart name", max_length=255, allow_empty=True)
|
||||
|
||||
|
||||
class UpdateChartPreviewRequest(FormDataCacheControl):
|
||||
|
||||
@@ -189,9 +189,9 @@ async def generate_chart( # noqa: C901
|
||||
await ctx.report_progress(2, 5, "Creating chart in database")
|
||||
from superset.commands.chart.create import CreateChartCommand
|
||||
|
||||
# Generate a chart name
|
||||
chart_name = generate_chart_name(request.config)
|
||||
await ctx.debug("Generated chart name: chart_name=%s" % (chart_name,))
|
||||
# Use custom chart name if provided, otherwise auto-generate
|
||||
chart_name = request.chart_name or generate_chart_name(request.config)
|
||||
await ctx.debug("Chart name: chart_name=%s" % (chart_name,))
|
||||
|
||||
# Find the dataset to get its numeric ID
|
||||
from superset.daos.dataset import DatasetDAO
|
||||
|
||||
283
superset/mcp_service/utils/sanitization.py
Normal file
283
superset/mcp_service/utils/sanitization.py
Normal file
@@ -0,0 +1,283 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Centralized sanitization utilities for MCP service user inputs.
|
||||
|
||||
This module uses the nh3 library (Rust-based HTML sanitizer) to strip malicious
|
||||
HTML tags and protocols from user inputs. nh3 is faster and safer than manual
|
||||
regex-based sanitization.
|
||||
|
||||
Key features:
|
||||
- Strips all HTML tags using nh3.clean() with no allowed tags
|
||||
- Blocks dangerous URL schemes (javascript:, vbscript:, data:)
|
||||
- Preserves safe text content (e.g., '&' stays as '&', not '&')
|
||||
- Additional SQL injection protection for database-facing inputs
|
||||
"""
|
||||
|
||||
import html
|
||||
import re
|
||||
|
||||
import nh3
|
||||
|
||||
|
||||
def _strip_html_tags(value: str) -> str:
|
||||
"""
|
||||
Strip all HTML tags from the input using nh3.
|
||||
|
||||
Decodes all layers of HTML entity encoding BEFORE passing to nh3,
|
||||
so entity-encoded tags (e.g., ``<script>``) are decoded into
|
||||
real tags that nh3 can detect and strip. After nh3 removes all tags,
|
||||
we only restore ``&`` back to ``&`` (not a full html.unescape)
|
||||
to preserve ampersands in display text without risking XSS from
|
||||
re-introducing angle brackets or other HTML-significant characters.
|
||||
|
||||
Args:
|
||||
value: The input string that may contain HTML
|
||||
|
||||
Returns:
|
||||
String with all HTML tags removed and ampersands preserved
|
||||
"""
|
||||
# Decode all layers of HTML entity encoding to prevent bypass
|
||||
# via entity-encoded tags (e.g., <script> or &lt;script&gt;)
|
||||
# The loop terminates when unescape produces no change (idempotent on decoded text).
|
||||
# Max iterations cap provides defense-in-depth against pathological inputs.
|
||||
max_iterations = 100
|
||||
decoded = value
|
||||
prev = None
|
||||
iterations = 0
|
||||
while prev != decoded and iterations < max_iterations:
|
||||
prev = decoded
|
||||
decoded = html.unescape(decoded)
|
||||
iterations += 1
|
||||
|
||||
# nh3.clean with tags=set() strips ALL HTML tags from the decoded input
|
||||
# url_schemes=set() blocks all URL schemes in any remaining attributes
|
||||
cleaned = nh3.clean(decoded, tags=set(), url_schemes=set())
|
||||
|
||||
# Only restore & → & to preserve ampersands in display text (e.g. "A & B").
|
||||
# Do NOT use html.unescape() here: nh3 may pass through HTML entities from
|
||||
# the input (e.g. <script>), and a full unescape would re-introduce
|
||||
# raw angle brackets, creating an XSS vector.
|
||||
return cleaned.replace("&", "&")
|
||||
|
||||
|
||||
def _check_dangerous_patterns(value: str, field_name: str) -> None:
|
||||
"""
|
||||
Check for dangerous patterns that nh3 doesn't catch.
|
||||
|
||||
This includes URL schemes in plain text (not in HTML attributes),
|
||||
event handler patterns, and dangerous Unicode characters.
|
||||
|
||||
Args:
|
||||
value: The input string to check
|
||||
field_name: Name of the field (for error messages)
|
||||
|
||||
Raises:
|
||||
ValueError: If dangerous patterns are found
|
||||
"""
|
||||
# Block dangerous URL schemes in plain text (word boundary check)
|
||||
if re.search(r"\b(javascript|vbscript|data):", value, re.IGNORECASE):
|
||||
raise ValueError(f"{field_name} contains potentially malicious URL scheme")
|
||||
|
||||
# Block event handler patterns (onclick=, onerror=, etc.)
|
||||
if re.search(r"on\w+\s*=", value, re.IGNORECASE):
|
||||
raise ValueError(f"{field_name} contains potentially malicious event handler")
|
||||
|
||||
|
||||
def _check_sql_patterns(value: str, field_name: str) -> None:
|
||||
"""
|
||||
Check for SQL injection patterns.
|
||||
|
||||
Args:
|
||||
value: The input string to check
|
||||
field_name: Name of the field (for error messages)
|
||||
|
||||
Raises:
|
||||
ValueError: If SQL injection patterns are found
|
||||
"""
|
||||
# Check for dangerous SQL keywords
|
||||
if re.search(
|
||||
r"\b(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
value,
|
||||
re.IGNORECASE,
|
||||
):
|
||||
raise ValueError(f"{field_name} contains potentially unsafe SQL keywords")
|
||||
|
||||
# Check for shell metacharacters and SQL comments
|
||||
if re.search(r"[;|&$`]|--", value):
|
||||
raise ValueError(f"{field_name} contains potentially unsafe characters")
|
||||
|
||||
# Check for SQL comment start
|
||||
if "/*" in value:
|
||||
raise ValueError(f"{field_name} contains potentially unsafe SQL comment syntax")
|
||||
|
||||
|
||||
def _remove_dangerous_unicode(value: str) -> str:
|
||||
"""
|
||||
Remove dangerous Unicode characters (zero-width, control chars).
|
||||
|
||||
Args:
|
||||
value: The input string
|
||||
|
||||
Returns:
|
||||
String with dangerous Unicode characters removed
|
||||
"""
|
||||
return re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", value
|
||||
)
|
||||
|
||||
|
||||
def sanitize_user_input(
|
||||
value: str | None,
|
||||
field_name: str,
|
||||
max_length: int = 255,
|
||||
check_sql_keywords: bool = False,
|
||||
allow_empty: bool = False,
|
||||
) -> str | None:
|
||||
"""
|
||||
Centralized sanitization for user-provided text inputs.
|
||||
|
||||
Uses nh3 to strip HTML tags and performs additional security checks.
|
||||
|
||||
Args:
|
||||
value: The input string to sanitize
|
||||
field_name: Name of the field (for error messages)
|
||||
max_length: Maximum allowed length
|
||||
check_sql_keywords: Whether to check for SQL injection keywords
|
||||
allow_empty: Whether to allow empty/None values
|
||||
|
||||
Returns:
|
||||
Sanitized string, or None if allow_empty=True and value is empty
|
||||
|
||||
Raises:
|
||||
ValueError: If value fails security validation
|
||||
|
||||
Security checks performed:
|
||||
- Strips all HTML tags using nh3 (Rust-based sanitizer)
|
||||
- Blocks JavaScript/VBScript/data URL schemes
|
||||
- Blocks event handlers (onclick=, onerror=, etc.)
|
||||
- Removes dangerous Unicode characters (zero-width, control chars)
|
||||
- SQL keywords and shell metacharacters (when check_sql_keywords=True)
|
||||
"""
|
||||
if value is None:
|
||||
if allow_empty:
|
||||
return None
|
||||
raise ValueError(f"{field_name} cannot be empty")
|
||||
|
||||
value = value.strip()
|
||||
|
||||
if not value:
|
||||
if allow_empty:
|
||||
return None
|
||||
raise ValueError(f"{field_name} cannot be empty")
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(value) > max_length:
|
||||
raise ValueError(
|
||||
f"{field_name} too long ({len(value)} characters). "
|
||||
f"Maximum allowed length is {max_length} characters."
|
||||
)
|
||||
|
||||
# Strip all HTML tags using nh3
|
||||
value = _strip_html_tags(value)
|
||||
|
||||
# Check for dangerous patterns (URL schemes, event handlers)
|
||||
_check_dangerous_patterns(value, field_name)
|
||||
|
||||
# SQL keyword and shell metacharacter checks (for column names, etc.)
|
||||
if check_sql_keywords:
|
||||
_check_sql_patterns(value, field_name)
|
||||
|
||||
# Remove dangerous Unicode characters
|
||||
value = _remove_dangerous_unicode(value)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def sanitize_filter_value(
|
||||
value: str | int | float | bool,
|
||||
max_length: int = 1000,
|
||||
) -> str | int | float | bool:
|
||||
"""
|
||||
Sanitize filter values which can be strings or other types.
|
||||
|
||||
For non-string values, returns as-is (no sanitization needed).
|
||||
For strings, uses nh3 to strip HTML and applies security validation.
|
||||
|
||||
Args:
|
||||
value: The filter value (string, int, float, or bool)
|
||||
max_length: Maximum length for string values
|
||||
|
||||
Returns:
|
||||
Sanitized value
|
||||
|
||||
Raises:
|
||||
ValueError: If string value fails security validation
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
|
||||
value = value.strip()
|
||||
|
||||
# Length check first
|
||||
if len(value) > max_length:
|
||||
raise ValueError(
|
||||
f"Filter value too long ({len(value)} characters). "
|
||||
f"Maximum allowed length is {max_length} characters."
|
||||
)
|
||||
|
||||
# Strip all HTML tags using nh3
|
||||
value = _strip_html_tags(value)
|
||||
|
||||
# Check for dangerous patterns
|
||||
_check_dangerous_patterns(value, "Filter value")
|
||||
|
||||
# Check for dangerous SQL procedures (filter-specific)
|
||||
v_lower = value.lower()
|
||||
if "xp_cmdshell" in v_lower or "sp_executesql" in v_lower:
|
||||
raise ValueError("Filter value contains potentially malicious SQL procedures.")
|
||||
|
||||
# SQL injection patterns specific to filter values
|
||||
sql_patterns = [
|
||||
r";\s*(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
r"'\s*OR\s*'",
|
||||
r"'\s*AND\s*'",
|
||||
r"--\s*",
|
||||
r"/\*",
|
||||
r"UNION\s+SELECT",
|
||||
]
|
||||
for pattern in sql_patterns:
|
||||
if re.search(pattern, value, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious SQL patterns."
|
||||
)
|
||||
|
||||
# Check for shell metacharacters that could indicate injection attempts
|
||||
# Note: We allow '&' alone as it's common in text ("A & B") and is only
|
||||
# dangerous in shell contexts, not in database queries
|
||||
if re.search(r"[;|`$()]", value):
|
||||
raise ValueError("Filter value contains potentially unsafe shell characters.")
|
||||
|
||||
# Check for hex encoding
|
||||
if re.search(r"\\x[0-9a-fA-F]{2}", value):
|
||||
raise ValueError("Filter value contains hex encoding which is not allowed.")
|
||||
|
||||
# Remove dangerous Unicode characters
|
||||
value = _remove_dangerous_unicode(value)
|
||||
|
||||
return value
|
||||
@@ -0,0 +1,126 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""add_semantic_layers_and_views
|
||||
|
||||
Revision ID: 33d7e0e21daa
|
||||
Revises: 9787190b3d89
|
||||
Create Date: 2025-11-04 11:26:00.000000
|
||||
|
||||
"""
|
||||
|
||||
import uuid
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy_utils import UUIDType
|
||||
from sqlalchemy_utils.types.json import JSONType
|
||||
|
||||
from superset.extensions import encrypted_field_factory
|
||||
from superset.migrations.shared.utils import (
|
||||
create_fks_for_table,
|
||||
create_table,
|
||||
drop_table,
|
||||
)
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "33d7e0e21daa"
|
||||
down_revision = "9787190b3d89"
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Create semantic_layers table
|
||||
create_table(
|
||||
"semantic_layers",
|
||||
sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False),
|
||||
sa.Column("created_on", sa.DateTime(), nullable=True),
|
||||
sa.Column("changed_on", sa.DateTime(), nullable=True),
|
||||
sa.Column("name", sa.String(length=250), nullable=False),
|
||||
sa.Column("description", sa.Text(), nullable=True),
|
||||
sa.Column("type", sa.String(length=250), nullable=False),
|
||||
sa.Column(
|
||||
"configuration",
|
||||
encrypted_field_factory.create(JSONType),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column("cache_timeout", sa.Integer(), nullable=True),
|
||||
sa.Column("created_by_fk", sa.Integer(), nullable=True),
|
||||
sa.Column("changed_by_fk", sa.Integer(), nullable=True),
|
||||
sa.PrimaryKeyConstraint("uuid"),
|
||||
)
|
||||
|
||||
# Create foreign key constraints for semantic_layers
|
||||
create_fks_for_table(
|
||||
"fk_semantic_layers_created_by_fk_ab_user",
|
||||
"semantic_layers",
|
||||
"ab_user",
|
||||
["created_by_fk"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
create_fks_for_table(
|
||||
"fk_semantic_layers_changed_by_fk_ab_user",
|
||||
"semantic_layers",
|
||||
"ab_user",
|
||||
["changed_by_fk"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
# Create semantic_views table
|
||||
create_table(
|
||||
"semantic_views",
|
||||
sa.Column("uuid", UUIDType(binary=True), default=uuid.uuid4, nullable=False),
|
||||
sa.Column("created_on", sa.DateTime(), nullable=True),
|
||||
sa.Column("changed_on", sa.DateTime(), nullable=True),
|
||||
sa.Column("name", sa.String(length=250), nullable=False),
|
||||
sa.Column("description", sa.Text(), nullable=True),
|
||||
sa.Column(
|
||||
"configuration",
|
||||
encrypted_field_factory.create(JSONType),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column("cache_timeout", sa.Integer(), nullable=True),
|
||||
sa.Column(
|
||||
"semantic_layer_uuid",
|
||||
UUIDType(binary=True),
|
||||
sa.ForeignKey("semantic_layers.uuid", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("created_by_fk", sa.Integer(), nullable=True),
|
||||
sa.Column("changed_by_fk", sa.Integer(), nullable=True),
|
||||
sa.PrimaryKeyConstraint("uuid"),
|
||||
)
|
||||
|
||||
# Create foreign key constraints for semantic_views
|
||||
create_fks_for_table(
|
||||
"fk_semantic_views_created_by_fk_ab_user",
|
||||
"semantic_views",
|
||||
"ab_user",
|
||||
["created_by_fk"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
create_fks_for_table(
|
||||
"fk_semantic_views_changed_by_fk_ab_user",
|
||||
"semantic_views",
|
||||
"ab_user",
|
||||
["changed_by_fk"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade():
|
||||
drop_table("semantic_views")
|
||||
drop_table("semantic_layers")
|
||||
@@ -22,7 +22,7 @@ import logging
|
||||
import re
|
||||
from collections.abc import Hashable
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional, TYPE_CHECKING
|
||||
from typing import Any, cast, Optional, TYPE_CHECKING
|
||||
|
||||
import sqlalchemy as sqla
|
||||
from flask import current_app as app
|
||||
@@ -64,7 +64,7 @@ from superset.sql.parse import (
|
||||
Table,
|
||||
)
|
||||
from superset.sqllab.limiting_factor import LimitingFactor
|
||||
from superset.superset_typing import ExplorableData, QueryObjectDict
|
||||
from superset.superset_typing import DatasetColumnData, ExplorableData, QueryObjectDict
|
||||
from superset.utils import json
|
||||
from superset.utils.core import (
|
||||
get_column_name,
|
||||
@@ -258,7 +258,7 @@ class Query(
|
||||
],
|
||||
"filter_select": True,
|
||||
"name": self.tab_name,
|
||||
"columns": [o.data for o in self.columns],
|
||||
"columns": [cast(DatasetColumnData, o.data) for o in self.columns],
|
||||
"metrics": [],
|
||||
"id": self.id,
|
||||
"type": self.type,
|
||||
|
||||
16
superset/semantic_layers/__init__.py
Normal file
16
superset/semantic_layers/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
941
superset/semantic_layers/mapper.py
Normal file
941
superset/semantic_layers/mapper.py
Normal file
@@ -0,0 +1,941 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Functions for mapping `QueryObject` to semantic layers.
|
||||
|
||||
These functions validate and convert a `QueryObject` into one or more `SemanticQuery`,
|
||||
which are then passed to semantic layer implementations for execution, returning a
|
||||
single dataframe.
|
||||
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from time import time
|
||||
from typing import Any, cast, Sequence, TypeGuard
|
||||
|
||||
import numpy as np
|
||||
from superset_core.semantic_layers.semantic_view import SemanticViewFeature
|
||||
from superset_core.semantic_layers.types import (
|
||||
AdhocExpression,
|
||||
AdhocFilter,
|
||||
Day,
|
||||
Dimension,
|
||||
Filter,
|
||||
FilterValues,
|
||||
Grain,
|
||||
GroupLimit,
|
||||
Hour,
|
||||
Metric,
|
||||
Minute,
|
||||
Month,
|
||||
Operator,
|
||||
OrderDirection,
|
||||
OrderTuple,
|
||||
PredicateType,
|
||||
Quarter,
|
||||
Second,
|
||||
SemanticQuery,
|
||||
SemanticResult,
|
||||
Week,
|
||||
Year,
|
||||
)
|
||||
|
||||
from superset.common.db_query_status import QueryStatus
|
||||
from superset.common.query_object import QueryObject
|
||||
from superset.common.utils.time_range_utils import get_since_until_from_query_object
|
||||
from superset.connectors.sqla.models import BaseDatasource
|
||||
from superset.constants import NO_TIME_RANGE
|
||||
from superset.models.helpers import QueryResult
|
||||
from superset.superset_typing import AdhocColumn
|
||||
from superset.utils.core import (
|
||||
FilterOperator,
|
||||
QueryObjectFilterClause,
|
||||
TIME_COMPARISON,
|
||||
)
|
||||
from superset.utils.date_parser import get_past_or_future
|
||||
|
||||
|
||||
class ValidatedQueryObjectFilterClause(QueryObjectFilterClause):
|
||||
"""
|
||||
A validated QueryObject filter clause with a string column name.
|
||||
|
||||
The `col` in a `QueryObjectFilterClause` can be either a string (column name) or an
|
||||
adhoc column, but we only support the former in semantic layers.
|
||||
"""
|
||||
|
||||
# overwrite to narrow type; mypy complains about more restrictive typed dicts,
|
||||
# but the alternative would be to redefine the object
|
||||
col: str # type: ignore[misc]
|
||||
op: str # type: ignore[misc]
|
||||
|
||||
|
||||
class ValidatedQueryObject(QueryObject):
|
||||
"""
|
||||
A query object that has a datasource defined.
|
||||
"""
|
||||
|
||||
datasource: BaseDatasource
|
||||
|
||||
# overwrite to narrow type; mypy complains about the assignment since the base type
|
||||
# allows adhoc filters, but we only support validated filters here
|
||||
filter: list[ValidatedQueryObjectFilterClause] # type: ignore[assignment]
|
||||
series_columns: Sequence[str] # type: ignore[assignment]
|
||||
series_limit_metric: str | None
|
||||
|
||||
|
||||
def get_results(query_object: QueryObject) -> QueryResult:
|
||||
"""
|
||||
Run 1+ queries based on `QueryObject` and return the results.
|
||||
|
||||
:param query_object: The QueryObject containing query specifications
|
||||
:return: QueryResult compatible with Superset's query interface
|
||||
"""
|
||||
if not validate_query_object(query_object):
|
||||
raise ValueError("QueryObject must have a datasource defined.")
|
||||
|
||||
# Track execution time
|
||||
start_time = time()
|
||||
|
||||
semantic_view = query_object.datasource.implementation
|
||||
dispatcher = (
|
||||
semantic_view.get_row_count
|
||||
if query_object.is_rowcount
|
||||
else semantic_view.get_dataframe
|
||||
)
|
||||
|
||||
# Step 1: Convert QueryObject to list of SemanticQuery objects
|
||||
# The first query is the main query, subsequent queries are for time offsets
|
||||
queries = map_query_object(query_object)
|
||||
|
||||
# Step 2: Execute the main query (first in the list)
|
||||
main_query = queries[0]
|
||||
main_result = dispatcher(
|
||||
metrics=main_query.metrics,
|
||||
dimensions=main_query.dimensions,
|
||||
filters=main_query.filters,
|
||||
order=main_query.order,
|
||||
limit=main_query.limit,
|
||||
offset=main_query.offset,
|
||||
group_limit=main_query.group_limit,
|
||||
)
|
||||
|
||||
main_df = main_result.results
|
||||
|
||||
# Collect all requests (SQL queries, HTTP requests, etc.) for troubleshooting
|
||||
all_requests = list(main_result.requests)
|
||||
|
||||
# If no time offsets, return the main result as-is
|
||||
if not query_object.time_offsets or len(queries) <= 1:
|
||||
semantic_result = SemanticResult(
|
||||
requests=all_requests,
|
||||
results=main_df,
|
||||
)
|
||||
duration = timedelta(seconds=time() - start_time)
|
||||
return map_semantic_result_to_query_result(
|
||||
semantic_result,
|
||||
query_object,
|
||||
duration,
|
||||
)
|
||||
|
||||
# Get metric names from the main query
|
||||
# These are the columns that will be renamed with offset suffixes
|
||||
metric_names = [metric.name for metric in main_query.metrics]
|
||||
|
||||
# Join keys are all columns except metrics
|
||||
# These will be used to match rows between main and offset DataFrames
|
||||
join_keys = [col for col in main_df.columns if col not in metric_names]
|
||||
|
||||
# Step 3 & 4: Execute each time offset query and join results
|
||||
for offset_query, time_offset in zip(
|
||||
queries[1:],
|
||||
query_object.time_offsets,
|
||||
strict=False,
|
||||
):
|
||||
# Execute the offset query
|
||||
result = dispatcher(
|
||||
metrics=offset_query.metrics,
|
||||
dimensions=offset_query.dimensions,
|
||||
filters=offset_query.filters,
|
||||
order=offset_query.order,
|
||||
limit=offset_query.limit,
|
||||
offset=offset_query.offset,
|
||||
group_limit=offset_query.group_limit,
|
||||
)
|
||||
|
||||
# Add this query's requests to the collection
|
||||
all_requests.extend(result.requests)
|
||||
|
||||
offset_df = result.results
|
||||
|
||||
# Handle empty results - add NaN columns directly instead of merging
|
||||
# This avoids dtype mismatch issues with empty DataFrames
|
||||
if offset_df.empty:
|
||||
# Add offset metric columns with NaN values directly to main_df
|
||||
for metric in metric_names:
|
||||
offset_col_name = TIME_COMPARISON.join([metric, time_offset])
|
||||
main_df[offset_col_name] = np.nan
|
||||
else:
|
||||
# Rename metric columns with time offset suffix
|
||||
# Format: "{metric_name}__{time_offset}"
|
||||
# Example: "revenue" -> "revenue__1 week ago"
|
||||
offset_df = offset_df.rename(
|
||||
columns={
|
||||
metric: TIME_COMPARISON.join([metric, time_offset])
|
||||
for metric in metric_names
|
||||
}
|
||||
)
|
||||
|
||||
# Step 5: Perform left join on dimension columns
|
||||
# This preserves all rows from main_df and adds offset metrics
|
||||
# where they match
|
||||
main_df = main_df.merge(
|
||||
offset_df,
|
||||
on=join_keys,
|
||||
how="left",
|
||||
suffixes=("", "__duplicate"),
|
||||
)
|
||||
|
||||
# Clean up any duplicate columns that might have been created
|
||||
# (shouldn't happen with proper join keys, but defensive programming)
|
||||
duplicate_cols = [
|
||||
col for col in main_df.columns if col.endswith("__duplicate")
|
||||
]
|
||||
if duplicate_cols:
|
||||
main_df = main_df.drop(columns=duplicate_cols)
|
||||
|
||||
# Convert final result to QueryResult
|
||||
semantic_result = SemanticResult(requests=all_requests, results=main_df)
|
||||
duration = timedelta(seconds=time() - start_time)
|
||||
return map_semantic_result_to_query_result(
|
||||
semantic_result,
|
||||
query_object,
|
||||
duration,
|
||||
)
|
||||
|
||||
|
||||
def map_semantic_result_to_query_result(
|
||||
semantic_result: SemanticResult,
|
||||
query_object: ValidatedQueryObject,
|
||||
duration: timedelta,
|
||||
) -> QueryResult:
|
||||
"""
|
||||
Convert a SemanticResult to a QueryResult.
|
||||
|
||||
:param semantic_result: Result from the semantic layer
|
||||
:param query_object: Original QueryObject (for passthrough attributes)
|
||||
:param duration: Time taken to execute the query
|
||||
:return: QueryResult compatible with Superset's query interface
|
||||
"""
|
||||
# Get the query string from requests (typically one or more SQL queries)
|
||||
query_str = ""
|
||||
if semantic_result.requests:
|
||||
# Join all requests for display (could be multiple for time comparisons)
|
||||
query_str = "\n\n".join(
|
||||
f"-- {req.type}\n{req.definition}" for req in semantic_result.requests
|
||||
)
|
||||
|
||||
return QueryResult(
|
||||
# Core data
|
||||
df=semantic_result.results,
|
||||
query=query_str,
|
||||
duration=duration,
|
||||
# Template filters - not applicable to semantic layers
|
||||
# (semantic layers don't use Jinja templates)
|
||||
applied_template_filters=None,
|
||||
# Filter columns - not applicable to semantic layers
|
||||
# (semantic layers handle filter validation internally)
|
||||
applied_filter_columns=None,
|
||||
rejected_filter_columns=None,
|
||||
# Status - always success if we got here
|
||||
# (errors would raise exceptions before reaching this point)
|
||||
status=QueryStatus.SUCCESS,
|
||||
error_message=None,
|
||||
errors=None,
|
||||
# Time range - pass through from original query_object
|
||||
from_dttm=query_object.from_dttm,
|
||||
to_dttm=query_object.to_dttm,
|
||||
)
|
||||
|
||||
|
||||
def _normalize_column(column: str | AdhocColumn, dimension_names: set[str]) -> str:
|
||||
"""
|
||||
Normalize a column to its dimension name.
|
||||
|
||||
Columns can be either:
|
||||
- A string (dimension name directly)
|
||||
- An AdhocColumn with isColumnReference=True and sqlExpression containing the
|
||||
dimension name
|
||||
"""
|
||||
if isinstance(column, str):
|
||||
return column
|
||||
|
||||
# Handle column references (e.g., from time-series charts)
|
||||
if column.get("isColumnReference") and (sql_expr := column.get("sqlExpression")):
|
||||
if sql_expr in dimension_names:
|
||||
return sql_expr
|
||||
|
||||
raise ValueError("Adhoc dimensions are not supported in Semantic Views.")
|
||||
|
||||
|
||||
def map_query_object(query_object: ValidatedQueryObject) -> list[SemanticQuery]:
|
||||
"""
|
||||
Convert a `QueryObject` into a list of `SemanticQuery`.
|
||||
|
||||
This function maps the `QueryObject` into query objects that focus less on
|
||||
visualization and more on semantics.
|
||||
"""
|
||||
semantic_view = query_object.datasource.implementation
|
||||
|
||||
all_metrics = {metric.name: metric for metric in semantic_view.metrics}
|
||||
all_dimensions = {
|
||||
dimension.name: dimension for dimension in semantic_view.dimensions
|
||||
}
|
||||
|
||||
# Normalize columns (may be dicts with isColumnReference=True for time-series)
|
||||
dimension_names = set(all_dimensions.keys())
|
||||
normalized_columns = {
|
||||
_normalize_column(column, dimension_names) for column in query_object.columns
|
||||
}
|
||||
|
||||
metrics = [all_metrics[metric] for metric in (query_object.metrics or [])]
|
||||
|
||||
grain = (
|
||||
_convert_time_grain(query_object.extras["time_grain_sqla"])
|
||||
if "time_grain_sqla" in query_object.extras
|
||||
else None
|
||||
)
|
||||
dimensions = [
|
||||
dimension
|
||||
for dimension in semantic_view.dimensions
|
||||
if dimension.name in normalized_columns
|
||||
and (
|
||||
# if a grain is specified, only include the time dimension if its grain
|
||||
# matches the requested grain
|
||||
grain is None
|
||||
or dimension.name != query_object.granularity
|
||||
or dimension.grain == grain
|
||||
)
|
||||
]
|
||||
|
||||
order = _get_order_from_query_object(query_object, all_metrics, all_dimensions)
|
||||
limit = query_object.row_limit
|
||||
offset = query_object.row_offset
|
||||
|
||||
group_limit = _get_group_limit_from_query_object(
|
||||
query_object,
|
||||
all_metrics,
|
||||
all_dimensions,
|
||||
)
|
||||
|
||||
queries = []
|
||||
for time_offset in [None] + query_object.time_offsets:
|
||||
filters = _get_filters_from_query_object(
|
||||
query_object,
|
||||
time_offset,
|
||||
all_dimensions,
|
||||
)
|
||||
print(">>", filters)
|
||||
|
||||
queries.append(
|
||||
SemanticQuery(
|
||||
metrics=metrics,
|
||||
dimensions=dimensions,
|
||||
filters=filters,
|
||||
order=order,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
group_limit=group_limit,
|
||||
)
|
||||
)
|
||||
|
||||
return queries
|
||||
|
||||
|
||||
def _get_filters_from_query_object(
|
||||
query_object: ValidatedQueryObject,
|
||||
time_offset: str | None,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter | AdhocFilter]:
|
||||
"""
|
||||
Extract all filters from the query object, including time range filters.
|
||||
|
||||
This simplifies the complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm
|
||||
by converting all time constraints into filters.
|
||||
"""
|
||||
filters: set[Filter | AdhocFilter] = set()
|
||||
|
||||
# 1. Add fetch values predicate if present
|
||||
if (
|
||||
query_object.apply_fetch_values_predicate
|
||||
and query_object.datasource.fetch_values_predicate
|
||||
):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
definition=query_object.datasource.fetch_values_predicate,
|
||||
)
|
||||
)
|
||||
|
||||
# 2. Add time range filter based on from_dttm/to_dttm
|
||||
# For time offsets, this automatically calculates the shifted bounds
|
||||
time_filters = _get_time_filter(query_object, time_offset, all_dimensions)
|
||||
filters.update(time_filters)
|
||||
|
||||
# 3. Add filters from query_object.extras (WHERE and HAVING clauses)
|
||||
extras_filters = _get_filters_from_extras(query_object.extras)
|
||||
filters.update(extras_filters)
|
||||
|
||||
# 4. Add all other filters from query_object.filter
|
||||
for filter_ in query_object.filter:
|
||||
# Skip temporal range filters - we're using inner bounds instead
|
||||
if (
|
||||
filter_.get("op") == FilterOperator.TEMPORAL_RANGE.value
|
||||
and query_object.granularity
|
||||
):
|
||||
continue
|
||||
|
||||
if converted_filters := _convert_query_object_filter(filter_, all_dimensions):
|
||||
filters.update(converted_filters)
|
||||
|
||||
return filters
|
||||
|
||||
|
||||
def _get_filters_from_extras(extras: dict[str, Any]) -> set[AdhocFilter]:
|
||||
"""
|
||||
Extract filters from the extras dict.
|
||||
|
||||
The extras dict can contain various keys that affect query behavior:
|
||||
|
||||
Supported keys (converted to filters):
|
||||
- "where": SQL WHERE clause expression (e.g., "customer_id > 100")
|
||||
- "having": SQL HAVING clause expression (e.g., "SUM(sales) > 1000")
|
||||
|
||||
Other keys in extras (handled elsewhere in the mapper):
|
||||
- "time_grain_sqla": Time granularity (e.g., "P1D", "PT1H")
|
||||
Handled in _convert_time_grain() and used for dimension grain matching
|
||||
|
||||
Note: The WHERE and HAVING clauses from extras are SQL expressions that
|
||||
are passed through as-is to the semantic layer as AdhocFilter objects.
|
||||
"""
|
||||
filters: set[AdhocFilter] = set()
|
||||
|
||||
# Add WHERE clause from extras
|
||||
if where_clause := extras.get("where"):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
definition=where_clause,
|
||||
)
|
||||
)
|
||||
|
||||
# Add HAVING clause from extras
|
||||
if having_clause := extras.get("having"):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
type=PredicateType.HAVING,
|
||||
definition=having_clause,
|
||||
)
|
||||
)
|
||||
|
||||
return filters
|
||||
|
||||
|
||||
def _get_time_filter(
|
||||
query_object: ValidatedQueryObject,
|
||||
time_offset: str | None,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter]:
|
||||
"""
|
||||
Create a time range filter from the query object.
|
||||
|
||||
This handles both regular queries and time offset queries, simplifying the
|
||||
complexity of from_dttm/to_dttm/inner_from_dttm/inner_to_dttm by using the
|
||||
same time bounds for both the main query and series limit subqueries.
|
||||
"""
|
||||
filters: set[Filter] = set()
|
||||
|
||||
if not query_object.granularity:
|
||||
return filters
|
||||
|
||||
time_dimension = all_dimensions.get(query_object.granularity)
|
||||
if not time_dimension:
|
||||
return filters
|
||||
|
||||
# Get the appropriate time bounds based on whether this is a time offset query
|
||||
from_dttm, to_dttm = _get_time_bounds(query_object, time_offset)
|
||||
|
||||
if not from_dttm or not to_dttm:
|
||||
return filters
|
||||
|
||||
# Create a filter with >= and < operators
|
||||
return {
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=time_dimension,
|
||||
operator=Operator.GREATER_THAN_OR_EQUAL,
|
||||
value=from_dttm,
|
||||
),
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=time_dimension,
|
||||
operator=Operator.LESS_THAN,
|
||||
value=to_dttm,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _get_time_bounds(
|
||||
query_object: ValidatedQueryObject,
|
||||
time_offset: str | None,
|
||||
) -> tuple[datetime | None, datetime | None]:
|
||||
"""
|
||||
Get the appropriate time bounds for the query.
|
||||
|
||||
For regular queries (time_offset is None), returns from_dttm/to_dttm.
|
||||
For time offset queries, calculates the shifted bounds.
|
||||
|
||||
This simplifies the inner_from_dttm/inner_to_dttm complexity by using
|
||||
the same bounds for both main queries and series limit subqueries (Option 1).
|
||||
"""
|
||||
if time_offset is None:
|
||||
# Main query: use from_dttm/to_dttm directly
|
||||
return query_object.from_dttm, query_object.to_dttm
|
||||
|
||||
# Time offset query: calculate shifted bounds
|
||||
# Use from_dttm/to_dttm if available, otherwise try to get from time_range
|
||||
outer_from = query_object.from_dttm
|
||||
outer_to = query_object.to_dttm
|
||||
|
||||
if not outer_from or not outer_to:
|
||||
# Fall back to parsing time_range if from_dttm/to_dttm not set
|
||||
outer_from, outer_to = get_since_until_from_query_object(query_object)
|
||||
|
||||
if not outer_from or not outer_to:
|
||||
return None, None
|
||||
|
||||
# Apply the offset to both bounds
|
||||
offset_from = get_past_or_future(time_offset, outer_from)
|
||||
offset_to = get_past_or_future(time_offset, outer_to)
|
||||
|
||||
return offset_from, offset_to
|
||||
|
||||
|
||||
def _convert_query_object_filter(
|
||||
filter_: ValidatedQueryObjectFilterClause,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter] | None:
|
||||
"""
|
||||
Convert a QueryObject filter dict to a semantic layer Filter or AdhocFilter.
|
||||
"""
|
||||
operator_str = filter_["op"]
|
||||
|
||||
# Handle simple column filters
|
||||
col = filter_.get("col")
|
||||
if col not in all_dimensions:
|
||||
return None
|
||||
|
||||
dimension = all_dimensions[col]
|
||||
|
||||
val_str = filter_["val"]
|
||||
value: FilterValues | frozenset[FilterValues]
|
||||
if val_str is None:
|
||||
value = None
|
||||
elif isinstance(val_str, (list, tuple)):
|
||||
value = frozenset(val_str)
|
||||
else:
|
||||
value = val_str
|
||||
|
||||
# Special case for temporal range
|
||||
if operator_str == FilterOperator.TEMPORAL_RANGE.value:
|
||||
if not isinstance(value, str) or value == NO_TIME_RANGE:
|
||||
return None
|
||||
|
||||
start, end = value.split(" : ")
|
||||
return {
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=dimension,
|
||||
operator=Operator.GREATER_THAN_OR_EQUAL,
|
||||
value=start,
|
||||
),
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=dimension,
|
||||
operator=Operator.LESS_THAN,
|
||||
value=end,
|
||||
),
|
||||
}
|
||||
|
||||
# Map QueryObject operators to semantic layer operators
|
||||
operator_mapping = {
|
||||
FilterOperator.EQUALS.value: Operator.EQUALS,
|
||||
FilterOperator.NOT_EQUALS.value: Operator.NOT_EQUALS,
|
||||
FilterOperator.GREATER_THAN.value: Operator.GREATER_THAN,
|
||||
FilterOperator.LESS_THAN.value: Operator.LESS_THAN,
|
||||
FilterOperator.GREATER_THAN_OR_EQUALS.value: Operator.GREATER_THAN_OR_EQUAL,
|
||||
FilterOperator.LESS_THAN_OR_EQUALS.value: Operator.LESS_THAN_OR_EQUAL,
|
||||
FilterOperator.IN.value: Operator.IN,
|
||||
FilterOperator.NOT_IN.value: Operator.NOT_IN,
|
||||
FilterOperator.LIKE.value: Operator.LIKE,
|
||||
FilterOperator.NOT_LIKE.value: Operator.NOT_LIKE,
|
||||
FilterOperator.IS_NULL.value: Operator.IS_NULL,
|
||||
FilterOperator.IS_NOT_NULL.value: Operator.IS_NOT_NULL,
|
||||
}
|
||||
|
||||
operator = operator_mapping.get(operator_str)
|
||||
if not operator:
|
||||
# Unknown operator - create adhoc filter
|
||||
return None
|
||||
|
||||
return {
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=dimension,
|
||||
operator=operator,
|
||||
value=value,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def _get_order_from_query_object(
|
||||
query_object: ValidatedQueryObject,
|
||||
all_metrics: dict[str, Metric],
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> list[OrderTuple]:
|
||||
order: list[OrderTuple] = []
|
||||
for element, ascending in query_object.orderby:
|
||||
direction = OrderDirection.ASC if ascending else OrderDirection.DESC
|
||||
|
||||
# adhoc
|
||||
if isinstance(element, dict):
|
||||
if element["sqlExpression"] is not None:
|
||||
order.append(
|
||||
(
|
||||
AdhocExpression(
|
||||
id=element["label"] or element["sqlExpression"],
|
||||
definition=element["sqlExpression"],
|
||||
),
|
||||
direction,
|
||||
)
|
||||
)
|
||||
elif element in all_dimensions:
|
||||
order.append((all_dimensions[element], direction))
|
||||
elif element in all_metrics:
|
||||
order.append((all_metrics[element], direction))
|
||||
|
||||
return order
|
||||
|
||||
|
||||
def _get_group_limit_from_query_object(
|
||||
query_object: ValidatedQueryObject,
|
||||
all_metrics: dict[str, Metric],
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> GroupLimit | None:
|
||||
# no limit
|
||||
if query_object.series_limit == 0 or not query_object.columns:
|
||||
return None
|
||||
|
||||
dimensions = [all_dimensions[dim_id] for dim_id in query_object.series_columns]
|
||||
top = query_object.series_limit
|
||||
metric = (
|
||||
all_metrics[query_object.series_limit_metric]
|
||||
if query_object.series_limit_metric
|
||||
else None
|
||||
)
|
||||
direction = OrderDirection.DESC if query_object.order_desc else OrderDirection.ASC
|
||||
group_others = query_object.group_others_when_limit_reached
|
||||
|
||||
# Check if we need separate filters for the group limit subquery
|
||||
# This happens when inner_from_dttm/inner_to_dttm differ from from_dttm/to_dttm
|
||||
group_limit_filters = _get_group_limit_filters(query_object, all_dimensions)
|
||||
|
||||
return GroupLimit(
|
||||
dimensions=dimensions,
|
||||
top=top,
|
||||
metric=metric,
|
||||
direction=direction,
|
||||
group_others=group_others,
|
||||
filters=group_limit_filters,
|
||||
)
|
||||
|
||||
|
||||
def _get_group_limit_filters(
|
||||
query_object: ValidatedQueryObject,
|
||||
all_dimensions: dict[str, Dimension],
|
||||
) -> set[Filter | AdhocFilter] | None:
|
||||
"""
|
||||
Get separate filters for the group limit subquery if needed.
|
||||
|
||||
This is used when inner_from_dttm/inner_to_dttm differ from from_dttm/to_dttm,
|
||||
which happens during time comparison queries. The group limit subquery may need
|
||||
different time bounds to determine the top N groups.
|
||||
|
||||
Returns None if the group limit should use the same filters as the main query.
|
||||
"""
|
||||
# Check if inner time bounds are explicitly set and differ from outer bounds
|
||||
if (
|
||||
query_object.inner_from_dttm is None
|
||||
or query_object.inner_to_dttm is None
|
||||
or (
|
||||
query_object.inner_from_dttm == query_object.from_dttm
|
||||
and query_object.inner_to_dttm == query_object.to_dttm
|
||||
)
|
||||
):
|
||||
# No separate bounds needed - use the same filters as the main query
|
||||
return None
|
||||
|
||||
# Create separate filters for the group limit subquery
|
||||
filters: set[Filter | AdhocFilter] = set()
|
||||
|
||||
# Add time range filter using inner bounds
|
||||
if query_object.granularity:
|
||||
time_dimension = all_dimensions.get(query_object.granularity)
|
||||
if (
|
||||
time_dimension
|
||||
and query_object.inner_from_dttm
|
||||
and query_object.inner_to_dttm
|
||||
):
|
||||
filters.update(
|
||||
{
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=time_dimension,
|
||||
operator=Operator.GREATER_THAN_OR_EQUAL,
|
||||
value=query_object.inner_from_dttm,
|
||||
),
|
||||
Filter(
|
||||
type=PredicateType.WHERE,
|
||||
column=time_dimension,
|
||||
operator=Operator.LESS_THAN,
|
||||
value=query_object.inner_to_dttm,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# Add fetch values predicate if present
|
||||
if (
|
||||
query_object.apply_fetch_values_predicate
|
||||
and query_object.datasource.fetch_values_predicate
|
||||
):
|
||||
filters.add(
|
||||
AdhocFilter(
|
||||
type=PredicateType.WHERE,
|
||||
definition=query_object.datasource.fetch_values_predicate,
|
||||
)
|
||||
)
|
||||
|
||||
# Add filters from query_object.extras (WHERE and HAVING clauses)
|
||||
extras_filters = _get_filters_from_extras(query_object.extras)
|
||||
filters.update(extras_filters)
|
||||
|
||||
# Add all other non-temporal filters from query_object.filter
|
||||
for filter_ in query_object.filter:
|
||||
# Skip temporal range filters - we're using inner bounds instead
|
||||
if (
|
||||
filter_.get("op") == FilterOperator.TEMPORAL_RANGE.value
|
||||
and query_object.granularity
|
||||
):
|
||||
continue
|
||||
|
||||
if converted_filters := _convert_query_object_filter(filter_, all_dimensions):
|
||||
filters.update(converted_filters)
|
||||
|
||||
return filters if filters else None
|
||||
|
||||
|
||||
def _convert_time_grain(time_grain: str) -> type[Grain] | None:
|
||||
"""
|
||||
Convert a time grain string from the query object to a Grain enum.
|
||||
"""
|
||||
mapping = {
|
||||
grain.representation: grain
|
||||
for grain in [
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
Day,
|
||||
Week,
|
||||
Month,
|
||||
Quarter,
|
||||
Year,
|
||||
]
|
||||
}
|
||||
|
||||
return mapping.get(time_grain)
|
||||
|
||||
|
||||
def validate_query_object(
|
||||
query_object: QueryObject,
|
||||
) -> TypeGuard[ValidatedQueryObject]:
|
||||
"""
|
||||
Validate that the `QueryObject` is compatible with the `SemanticView`.
|
||||
|
||||
If some semantic view implementation supports these features we should add an
|
||||
attribute to the `SemanticViewImplementation` to indicate support for them.
|
||||
"""
|
||||
if not query_object.datasource:
|
||||
return False
|
||||
|
||||
query_object = cast(ValidatedQueryObject, query_object)
|
||||
|
||||
_validate_metrics(query_object)
|
||||
_validate_dimensions(query_object)
|
||||
_validate_filters(query_object)
|
||||
_validate_granularity(query_object)
|
||||
_validate_group_limit(query_object)
|
||||
_validate_orderby(query_object)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _validate_metrics(query_object: ValidatedQueryObject) -> None:
|
||||
"""
|
||||
Make sure metrics are defined in the semantic view.
|
||||
"""
|
||||
semantic_view = query_object.datasource.implementation
|
||||
|
||||
if any(not isinstance(metric, str) for metric in (query_object.metrics or [])):
|
||||
raise ValueError("Adhoc metrics are not supported in Semantic Views.")
|
||||
|
||||
metric_names = {metric.name for metric in semantic_view.metrics}
|
||||
if not set(query_object.metrics or []) <= metric_names:
|
||||
raise ValueError("All metrics must be defined in the Semantic View.")
|
||||
|
||||
|
||||
def _validate_dimensions(query_object: ValidatedQueryObject) -> None:
|
||||
"""
|
||||
Make sure all dimensions are defined in the semantic view.
|
||||
"""
|
||||
semantic_view = query_object.datasource.implementation
|
||||
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
|
||||
|
||||
# Normalize all columns to dimension names
|
||||
normalized_columns = [
|
||||
_normalize_column(column, dimension_names) for column in query_object.columns
|
||||
]
|
||||
|
||||
if not set(normalized_columns) <= dimension_names:
|
||||
raise ValueError("All dimensions must be defined in the Semantic View.")
|
||||
|
||||
|
||||
def _validate_filters(query_object: ValidatedQueryObject) -> None:
|
||||
"""
|
||||
Make sure all filters are valid.
|
||||
"""
|
||||
for filter_ in query_object.filter:
|
||||
if isinstance(filter_["col"], dict):
|
||||
raise ValueError(
|
||||
"Adhoc columns are not supported in Semantic View filters."
|
||||
)
|
||||
if not filter_.get("op"):
|
||||
raise ValueError("All filters must have an operator defined.")
|
||||
|
||||
|
||||
def _validate_granularity(query_object: ValidatedQueryObject) -> None:
|
||||
"""
|
||||
Make sure time column and time grain are valid.
|
||||
"""
|
||||
semantic_view = query_object.datasource.implementation
|
||||
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
|
||||
|
||||
if time_column := query_object.granularity:
|
||||
if time_column not in dimension_names:
|
||||
raise ValueError(
|
||||
"The time column must be defined in the Semantic View dimensions."
|
||||
)
|
||||
|
||||
if time_grain := query_object.extras.get("time_grain_sqla"):
|
||||
if not time_column:
|
||||
raise ValueError(
|
||||
"A time column must be specified when a time grain is provided."
|
||||
)
|
||||
|
||||
supported_time_grains = {
|
||||
dimension.grain
|
||||
for dimension in semantic_view.dimensions
|
||||
if dimension.name == time_column and dimension.grain
|
||||
}
|
||||
if _convert_time_grain(time_grain) not in supported_time_grains:
|
||||
raise ValueError(
|
||||
"The time grain is not supported for the time column in the "
|
||||
"Semantic View."
|
||||
)
|
||||
|
||||
|
||||
def _validate_group_limit(query_object: ValidatedQueryObject) -> None:
|
||||
"""
|
||||
Validate group limit related features in the query object.
|
||||
"""
|
||||
semantic_view = query_object.datasource.implementation
|
||||
|
||||
# no limit
|
||||
if query_object.series_limit == 0:
|
||||
return
|
||||
|
||||
if (
|
||||
query_object.series_columns
|
||||
and SemanticViewFeature.GROUP_LIMIT not in semantic_view.features
|
||||
):
|
||||
raise ValueError("Group limit is not supported in this Semantic View.")
|
||||
|
||||
if any(not isinstance(col, str) for col in query_object.series_columns):
|
||||
raise ValueError("Adhoc dimensions are not supported in series columns.")
|
||||
|
||||
metric_names = {metric.name for metric in semantic_view.metrics}
|
||||
if query_object.series_limit_metric and (
|
||||
not isinstance(query_object.series_limit_metric, str)
|
||||
or query_object.series_limit_metric not in metric_names
|
||||
):
|
||||
raise ValueError(
|
||||
"The series limit metric must be defined in the Semantic View."
|
||||
)
|
||||
|
||||
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
|
||||
if not set(query_object.series_columns) <= dimension_names:
|
||||
raise ValueError("All series columns must be defined in the Semantic View.")
|
||||
|
||||
if (
|
||||
query_object.group_others_when_limit_reached
|
||||
and SemanticViewFeature.GROUP_OTHERS not in semantic_view.features
|
||||
):
|
||||
raise ValueError(
|
||||
"Grouping others when limit is reached is not supported in this Semantic "
|
||||
"View."
|
||||
)
|
||||
|
||||
|
||||
def _validate_orderby(query_object: ValidatedQueryObject) -> None:
|
||||
"""
|
||||
Validate order by elements in the query object.
|
||||
"""
|
||||
semantic_view = query_object.datasource.implementation
|
||||
|
||||
if (
|
||||
any(not isinstance(element, str) for element, _ in query_object.orderby)
|
||||
and SemanticViewFeature.ADHOC_EXPRESSIONS_IN_ORDERBY
|
||||
not in semantic_view.features
|
||||
):
|
||||
raise ValueError(
|
||||
"Adhoc expressions in order by are not supported in this Semantic View."
|
||||
)
|
||||
|
||||
elements = {orderby[0] for orderby in query_object.orderby}
|
||||
metric_names = {metric.name for metric in semantic_view.metrics}
|
||||
dimension_names = {dimension.name for dimension in semantic_view.dimensions}
|
||||
if not elements <= metric_names | dimension_names:
|
||||
raise ValueError("All order by elements must be defined in the Semantic View.")
|
||||
374
superset/semantic_layers/models.py
Normal file
374
superset/semantic_layers/models.py
Normal file
@@ -0,0 +1,374 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Semantic layer models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from collections.abc import Hashable
|
||||
from dataclasses import dataclass
|
||||
from functools import cached_property
|
||||
from typing import Any, TYPE_CHECKING
|
||||
|
||||
from flask_appbuilder import Model
|
||||
from sqlalchemy import Column, ForeignKey, Integer, String, Text
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy_utils import UUIDType
|
||||
from sqlalchemy_utils.types.json import JSONType
|
||||
from superset_core.semantic_layers.semantic_layer import (
|
||||
SemanticLayer as SemanticLayerProtocol,
|
||||
)
|
||||
from superset_core.semantic_layers.semantic_view import (
|
||||
SemanticView as SemanticViewProtocol,
|
||||
)
|
||||
from superset_core.semantic_layers.types import (
|
||||
BINARY,
|
||||
BOOLEAN,
|
||||
DATE,
|
||||
DATETIME,
|
||||
DECIMAL,
|
||||
INTEGER,
|
||||
INTERVAL,
|
||||
NUMBER,
|
||||
OBJECT,
|
||||
STRING,
|
||||
TIME,
|
||||
Type,
|
||||
)
|
||||
|
||||
from superset.common.query_object import QueryObject
|
||||
from superset.explorables.base import TimeGrainDict
|
||||
from superset.extensions import encrypted_field_factory
|
||||
from superset.models.helpers import AuditMixinNullable, QueryResult
|
||||
from superset.semantic_layers.mapper import get_results
|
||||
from superset.semantic_layers.registry import registry
|
||||
from superset.utils import json
|
||||
from superset.utils.core import GenericDataType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.superset_typing import ExplorableData, QueryObjectDict
|
||||
|
||||
|
||||
def get_column_type(semantic_type: type[Type]) -> GenericDataType:
|
||||
"""
|
||||
Map semantic layer types to generic data types.
|
||||
"""
|
||||
if semantic_type in {DATE, DATETIME, TIME}:
|
||||
return GenericDataType.TEMPORAL
|
||||
if semantic_type in {INTEGER, NUMBER, DECIMAL, INTERVAL}:
|
||||
return GenericDataType.NUMERIC
|
||||
if semantic_type is BOOLEAN:
|
||||
return GenericDataType.BOOLEAN
|
||||
if semantic_type in {STRING, OBJECT, BINARY}:
|
||||
return GenericDataType.STRING
|
||||
return GenericDataType.STRING
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MetricMetadata:
|
||||
metric_name: str
|
||||
expression: str
|
||||
verbose_name: str | None = None
|
||||
description: str | None = None
|
||||
d3format: str | None = None
|
||||
currency: dict[str, Any] | None = None
|
||||
warning_text: str | None = None
|
||||
certified_by: str | None = None
|
||||
certification_details: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ColumnMetadata:
|
||||
column_name: str
|
||||
type: str
|
||||
is_dttm: bool
|
||||
verbose_name: str | None = None
|
||||
description: str | None = None
|
||||
groupby: bool = True
|
||||
filterable: bool = True
|
||||
expression: str | None = None
|
||||
python_date_format: str | None = None
|
||||
advanced_data_type: str | None = None
|
||||
extra: str | None = None
|
||||
|
||||
|
||||
class SemanticLayer(AuditMixinNullable, Model):
|
||||
"""
|
||||
Semantic layer model.
|
||||
|
||||
A semantic layer provides an abstraction over data sources,
|
||||
allowing users to query data through a semantic interface.
|
||||
"""
|
||||
|
||||
__tablename__ = "semantic_layers"
|
||||
|
||||
uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Core fields
|
||||
name = Column(String(250), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
type = Column(String(250), nullable=False) # snowflake, etc
|
||||
|
||||
configuration = Column(encrypted_field_factory.create(JSONType), default=dict)
|
||||
cache_timeout = Column(Integer, nullable=True)
|
||||
|
||||
# Semantic views relationship
|
||||
semantic_views: list[SemanticView] = relationship(
|
||||
"SemanticView",
|
||||
back_populates="semantic_layer",
|
||||
cascade="all, delete-orphan",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name or str(self.uuid)
|
||||
|
||||
@cached_property
|
||||
def implementation(
|
||||
self,
|
||||
) -> SemanticLayerProtocol[Any, SemanticViewProtocol]:
|
||||
"""
|
||||
Return semantic layer implementation.
|
||||
"""
|
||||
# TODO (betodealmeida):
|
||||
# return extension_manager.get_contribution("semanticLayers", self.type)
|
||||
class_ = registry[self.type]
|
||||
return class_.from_configuration(json.loads(self.configuration))
|
||||
|
||||
|
||||
class SemanticView(AuditMixinNullable, Model):
|
||||
"""
|
||||
Semantic view model.
|
||||
|
||||
A semantic view represents a queryable view within a semantic layer.
|
||||
"""
|
||||
|
||||
__tablename__ = "semantic_views"
|
||||
|
||||
uuid = Column(UUIDType(binary=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Core fields
|
||||
name = Column(String(250), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
configuration = Column(encrypted_field_factory.create(JSONType), default=dict)
|
||||
cache_timeout = Column(Integer, nullable=True)
|
||||
|
||||
# Semantic layer relationship
|
||||
semantic_layer_uuid = Column(
|
||||
UUIDType(binary=True),
|
||||
ForeignKey("semantic_layers.uuid", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
semantic_layer: SemanticLayer = relationship(
|
||||
"SemanticLayer",
|
||||
back_populates="semantic_views",
|
||||
foreign_keys=[semantic_layer_uuid],
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name or str(self.uuid)
|
||||
|
||||
@cached_property
|
||||
def implementation(self) -> SemanticViewProtocol:
|
||||
"""
|
||||
Return semantic view implementation.
|
||||
"""
|
||||
return self.semantic_layer.implementation.get_semantic_view(
|
||||
self.name,
|
||||
json.loads(self.configuration),
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Explorable protocol implementation
|
||||
# =========================================================================
|
||||
|
||||
def get_query_result(self, query_object: QueryObject) -> QueryResult:
|
||||
return get_results(query_object)
|
||||
|
||||
def get_query_str(self, query_obj: QueryObjectDict) -> str:
|
||||
return "Not implemented for semantic layers"
|
||||
|
||||
@property
|
||||
def uid(self) -> str:
|
||||
return self.implementation.uid()
|
||||
|
||||
@property
|
||||
def type(self) -> str:
|
||||
return "semantic_view"
|
||||
|
||||
@property
|
||||
def metrics(self) -> list[MetricMetadata]:
|
||||
return [
|
||||
MetricMetadata(
|
||||
metric_name=metric.name,
|
||||
expression=metric.definition,
|
||||
description=metric.description,
|
||||
)
|
||||
for metric in self.implementation.get_metrics()
|
||||
]
|
||||
|
||||
@property
|
||||
def columns(self) -> list[ColumnMetadata]:
|
||||
return [
|
||||
ColumnMetadata(
|
||||
column_name=dimension.name,
|
||||
type=dimension.type.__name__,
|
||||
is_dttm=dimension.type in {DATE, TIME, DATETIME},
|
||||
description=dimension.description,
|
||||
expression=dimension.definition,
|
||||
extra=json.dumps(
|
||||
{"grain": dimension.grain.name if dimension.grain else None}
|
||||
),
|
||||
)
|
||||
for dimension in self.implementation.get_dimensions()
|
||||
]
|
||||
|
||||
@property
|
||||
def column_names(self) -> list[str]:
|
||||
return [dimension.name for dimension in self.implementation.get_dimensions()]
|
||||
|
||||
@property
|
||||
def data(self) -> ExplorableData:
|
||||
return {
|
||||
# core
|
||||
"id": self.uuid.hex,
|
||||
"uid": self.uid,
|
||||
"type": "semantic_view",
|
||||
"name": self.name,
|
||||
"columns": [
|
||||
{
|
||||
"advanced_data_type": None,
|
||||
"certification_details": None,
|
||||
"certified_by": None,
|
||||
"column_name": dimension.name,
|
||||
"description": dimension.description,
|
||||
"expression": dimension.definition,
|
||||
"filterable": True,
|
||||
"groupby": True,
|
||||
"id": None,
|
||||
"uuid": None,
|
||||
"is_certified": False,
|
||||
"is_dttm": dimension.type in {DATE, TIME, DATETIME},
|
||||
"python_date_format": None,
|
||||
"type": dimension.type.__name__,
|
||||
"type_generic": get_column_type(dimension.type),
|
||||
"verbose_name": None,
|
||||
"warning_markdown": None,
|
||||
}
|
||||
for dimension in self.implementation.get_dimensions()
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"certification_details": None,
|
||||
"certified_by": None,
|
||||
"d3format": None,
|
||||
"description": metric.description,
|
||||
"expression": metric.definition,
|
||||
"id": None,
|
||||
"uuid": None,
|
||||
"is_certified": False,
|
||||
"metric_name": metric.name,
|
||||
"warning_markdown": None,
|
||||
"warning_text": None,
|
||||
"verbose_name": None,
|
||||
}
|
||||
for metric in self.implementation.get_metrics()
|
||||
],
|
||||
"database": {},
|
||||
# UI features
|
||||
"verbose_map": {},
|
||||
"order_by_choices": [],
|
||||
"filter_select": True,
|
||||
"filter_select_enabled": True,
|
||||
"sql": None,
|
||||
"select_star": None,
|
||||
"owners": [],
|
||||
"description": self.description,
|
||||
"table_name": self.name,
|
||||
"column_types": [
|
||||
get_column_type(dimension.type)
|
||||
for dimension in self.implementation.get_dimensions()
|
||||
],
|
||||
"column_names": [
|
||||
dimension.name for dimension in self.implementation.get_dimensions()
|
||||
],
|
||||
# rare
|
||||
"column_formats": {},
|
||||
"datasource_name": self.name,
|
||||
"perm": self.perm,
|
||||
"offset": self.offset,
|
||||
"cache_timeout": self.cache_timeout,
|
||||
"params": None,
|
||||
# sql-specific
|
||||
"schema": None,
|
||||
"catalog": None,
|
||||
"main_dttm_col": None,
|
||||
"time_grain_sqla": [],
|
||||
"granularity_sqla": [],
|
||||
"fetch_values_predicate": None,
|
||||
"template_params": None,
|
||||
"is_sqllab_view": False,
|
||||
"extra": None,
|
||||
"always_filter_main_dttm": False,
|
||||
"normalize_columns": False,
|
||||
# TODO XXX
|
||||
# "owners": [owner.id for owner in self.owners],
|
||||
"edit_url": "",
|
||||
"default_endpoint": None,
|
||||
"folders": [],
|
||||
"health_check_message": None,
|
||||
}
|
||||
|
||||
def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]:
|
||||
return []
|
||||
|
||||
@property
|
||||
def perm(self) -> str:
|
||||
return self.semantic_layer_uuid.hex + "::" + self.uuid.hex
|
||||
|
||||
@property
|
||||
def offset(self) -> int:
|
||||
# always return datetime as UTC
|
||||
return 0
|
||||
|
||||
@property
|
||||
def get_time_grains(self) -> list[TimeGrainDict]:
|
||||
return [
|
||||
{
|
||||
"name": dimension.grain.name,
|
||||
"function": "",
|
||||
"duration": dimension.grain.representation,
|
||||
}
|
||||
for dimension in self.implementation.get_dimensions()
|
||||
if dimension.grain
|
||||
]
|
||||
|
||||
def has_drill_by_columns(self, column_names: list[str]) -> bool:
|
||||
dimension_names = {
|
||||
dimension.name for dimension in self.implementation.get_dimensions()
|
||||
}
|
||||
return all(column_name in dimension_names for column_name in column_names)
|
||||
|
||||
@property
|
||||
def is_rls_supported(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
def query_language(self) -> str | None:
|
||||
return None
|
||||
20
superset/semantic_layers/registry.py
Normal file
20
superset/semantic_layers/registry.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from superset_core.semantic_layers.semantic_layer import SemanticLayer
|
||||
|
||||
registry: dict[str, type[SemanticLayer]] = {}
|
||||
File diff suppressed because one or more lines are too long
@@ -30,6 +30,46 @@ if TYPE_CHECKING:
|
||||
SQLType: TypeAlias = TypeEngine | type[TypeEngine]
|
||||
|
||||
|
||||
class DatasetColumnData(TypedDict, total=False):
|
||||
"""Type for column metadata in ExplorableData datasets."""
|
||||
|
||||
advanced_data_type: str | None
|
||||
certification_details: str | None
|
||||
certified_by: str | None
|
||||
column_name: str
|
||||
description: str | None
|
||||
expression: str | None
|
||||
filterable: bool
|
||||
groupby: bool
|
||||
id: int | None
|
||||
uuid: str | None
|
||||
is_certified: bool
|
||||
is_dttm: bool
|
||||
python_date_format: str | None
|
||||
type: str
|
||||
type_generic: NotRequired["GenericDataType" | None]
|
||||
verbose_name: str | None
|
||||
warning_markdown: str | None
|
||||
|
||||
|
||||
class DatasetMetricData(TypedDict, total=False):
|
||||
"""Type for metric metadata in ExplorableData datasets."""
|
||||
|
||||
certification_details: str | None
|
||||
certified_by: str | None
|
||||
currency: NotRequired[dict[str, Any]]
|
||||
d3format: str | None
|
||||
description: str | None
|
||||
expression: str | None
|
||||
id: int | None
|
||||
uuid: str | None
|
||||
is_certified: bool
|
||||
metric_name: str
|
||||
warning_markdown: str | None
|
||||
warning_text: str | None
|
||||
verbose_name: str | None
|
||||
|
||||
|
||||
class LegacyMetric(TypedDict):
|
||||
label: str | None
|
||||
|
||||
@@ -254,7 +294,7 @@ class ExplorableData(TypedDict, total=False):
|
||||
"""
|
||||
|
||||
# Core fields from BaseDatasource.data
|
||||
id: int
|
||||
id: int | str # String for UUID-based explorables like SemanticView
|
||||
uid: str
|
||||
column_formats: dict[str, str | None]
|
||||
description: str | None
|
||||
@@ -274,8 +314,8 @@ class ExplorableData(TypedDict, total=False):
|
||||
perm: str | None
|
||||
edit_url: str
|
||||
sql: str | None
|
||||
columns: list[dict[str, Any]]
|
||||
metrics: list[dict[str, Any]]
|
||||
columns: list["DatasetColumnData"]
|
||||
metrics: list["DatasetMetricData"]
|
||||
folders: Any # JSON field, can be list or dict
|
||||
order_by_choices: list[tuple[str, str]]
|
||||
owners: list[int] | list[dict[str, Any]] # Can be either format
|
||||
@@ -283,8 +323,8 @@ class ExplorableData(TypedDict, total=False):
|
||||
select_star: str | None
|
||||
|
||||
# Additional fields from SqlaTable and data_for_slices
|
||||
column_types: list[Any]
|
||||
column_names: set[str] | set[Any]
|
||||
column_types: list["GenericDataType"]
|
||||
column_names: set[str] | list[str]
|
||||
granularity_sqla: list[tuple[Any, Any]]
|
||||
time_grain_sqla: list[tuple[Any, Any]]
|
||||
main_dttm_col: str | None
|
||||
|
||||
@@ -11357,33 +11357,7 @@ msgid "Tree layout"
|
||||
msgstr "Diseño del árbol"
|
||||
|
||||
msgid "Tree orientation"
|
||||
Findings (brief):
|
||||
|
||||
- No git merge conflict markers found (no <<<<<<< / ======= / >>>>>>>).
|
||||
- PO header mismatch: "Language: en" — this is an Spanish file; set to "es".
|
||||
- Duplicate msgid entries with conflicting/empty translations:
|
||||
- " at line %(line)d" — one entry has " en la línea %(line)d", another has an empty msgstr.
|
||||
- "Dashboard cannot be copied due to invalid parameters." — appears multiple times with different/empty msgstr values.
|
||||
- "%(subtitle)s\nThis may be triggered by:\n %(issue)s" — msgstr is empty in one occurrence.
|
||||
- There are other repeated msgids with one occurrence left untranslated (examples: search for repeated msgid strings with one msgstr == "").
|
||||
- Empty translations (examples):
|
||||
- msgid "%(subtitle)s\nThis may be triggered by:\n %(issue)s" → msgstr "".
|
||||
- Several other msgid entries have msgstr "" (scan for msgstr "" occurrences).
|
||||
- Fuzzy entries present (e.g. entries annotated "#, fuzzy") — these need review and removal of the fuzzy flag after correction.
|
||||
- Typo in a translation: msgid "This action will permanently delete the user." → msgstr contains "uduario." (should be "usuario.").
|
||||
|
||||
Recommended next steps:
|
||||
- Fix header Language to "es".
|
||||
- Remove/fix duplicate msgids: consolidate into a single entry and keep the correct translation.
|
||||
- Fill in missing msgstr values (or mark as untranslated intentionally).
|
||||
- Review and resolve fuzzy entries, then remove the "fuzzy" flag.
|
||||
- Fix obvious typos (e.g., "uduario" → "usuario").
|
||||
|
||||
If you want, I can produce a patch that:
|
||||
- updates header Language to "es",
|
||||
- removes duplicate entries by keeping the first translated occurrence,
|
||||
- lists all msgids with empty msgstr for you to translate,
|
||||
or show exact locations (line ranges) for each problem. Which would you prefer?
|
||||
msgstr "Orientación del árbol"
|
||||
|
||||
msgid "Treemap"
|
||||
msgstr "Diagrama de árbol"
|
||||
|
||||
@@ -96,7 +96,6 @@ from superset.exceptions import (
|
||||
SupersetException,
|
||||
SupersetTimeoutException,
|
||||
)
|
||||
from superset.explorables.base import Explorable
|
||||
from superset.sql.parse import sanitize_clause
|
||||
from superset.superset_typing import (
|
||||
AdhocColumn,
|
||||
@@ -115,7 +114,7 @@ from superset.utils.hashing import hash_from_dict, hash_from_str
|
||||
from superset.utils.pandas import detect_datetime_format
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.connectors.sqla.models import TableColumn
|
||||
from superset.explorables.base import ColumnMetadata, Explorable
|
||||
from superset.models.core import Database
|
||||
|
||||
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
|
||||
@@ -200,6 +199,7 @@ class DatasourceType(StrEnum):
|
||||
QUERY = "query"
|
||||
SAVEDQUERY = "saved_query"
|
||||
VIEW = "view"
|
||||
SEMANTIC_VIEW = "semantic_view"
|
||||
|
||||
|
||||
class LoggerLevel(StrEnum):
|
||||
@@ -1730,15 +1730,12 @@ def get_metric_type_from_column(column: Any, datasource: Explorable) -> str:
|
||||
:return: The inferred metric type as a string, or an empty string if the
|
||||
column is not a metric or no valid operation is found.
|
||||
"""
|
||||
|
||||
from superset.connectors.sqla.models import SqlMetric
|
||||
|
||||
metric: SqlMetric = next(
|
||||
(metric for metric in datasource.metrics if metric.metric_name == column),
|
||||
SqlMetric(metric_name=""),
|
||||
metric = next(
|
||||
(m for m in datasource.metrics if m.metric_name == column),
|
||||
None,
|
||||
)
|
||||
|
||||
if metric.metric_name == "":
|
||||
if metric is None:
|
||||
return ""
|
||||
|
||||
expression: str = metric.expression
|
||||
@@ -1784,7 +1781,7 @@ def extract_dataframe_dtypes(
|
||||
|
||||
generic_types: list[GenericDataType] = []
|
||||
for column in df.columns:
|
||||
column_object = columns_by_name.get(column)
|
||||
column_object = columns_by_name.get(str(column))
|
||||
series = df[column]
|
||||
inferred_type: str = ""
|
||||
if series.isna().all():
|
||||
@@ -1814,11 +1811,17 @@ def extract_dataframe_dtypes(
|
||||
return generic_types
|
||||
|
||||
|
||||
def extract_column_dtype(col: TableColumn) -> GenericDataType:
|
||||
if col.is_temporal:
|
||||
def extract_column_dtype(col: ColumnMetadata) -> GenericDataType:
|
||||
# Check for temporal type
|
||||
if hasattr(col, "is_temporal") and col.is_temporal:
|
||||
return GenericDataType.TEMPORAL
|
||||
if col.is_numeric:
|
||||
if col.is_dttm:
|
||||
return GenericDataType.TEMPORAL
|
||||
|
||||
# Check for numeric type
|
||||
if hasattr(col, "is_numeric") and col.is_numeric:
|
||||
return GenericDataType.NUMERIC
|
||||
|
||||
# TODO: add check for boolean data type when proper support is added
|
||||
return GenericDataType.STRING
|
||||
|
||||
@@ -1832,9 +1835,7 @@ def get_time_filter_status(
|
||||
applied_time_extras: dict[str, str],
|
||||
) -> tuple[list[dict[str, str]], list[dict[str, str]]]:
|
||||
temporal_columns: set[Any] = {
|
||||
(col.column_name if hasattr(col, "column_name") else col.get("column_name"))
|
||||
for col in datasource.columns
|
||||
if (col.is_dttm if hasattr(col, "is_dttm") else col.get("is_dttm"))
|
||||
col.column_name for col in datasource.columns if col.is_dttm
|
||||
}
|
||||
applied: list[dict[str, str]] = []
|
||||
rejected: list[dict[str, str]] = []
|
||||
|
||||
@@ -24,6 +24,7 @@ import re
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, cast
|
||||
from urllib import parse
|
||||
from uuid import UUID
|
||||
|
||||
from flask import (
|
||||
abort,
|
||||
@@ -169,9 +170,9 @@ class Superset(BaseSupersetView):
|
||||
if viz_obj.has_error(payload):
|
||||
return json_error_response(payload=payload, status=400)
|
||||
response = {
|
||||
"data": payload["df"].to_dict("records")
|
||||
if payload["df"] is not None
|
||||
else [],
|
||||
"data": (
|
||||
payload["df"].to_dict("records") if payload["df"] is not None else []
|
||||
),
|
||||
"colnames": payload.get("colnames"),
|
||||
"coltypes": payload.get("coltypes"),
|
||||
"rowcount": payload.get("rowcount"),
|
||||
@@ -268,7 +269,9 @@ class Superset(BaseSupersetView):
|
||||
@check_resource_permissions(check_datasource_perms)
|
||||
@deprecated(eol_version="5.0.0")
|
||||
def explore_json(
|
||||
self, datasource_type: str | None = None, datasource_id: int | None = None
|
||||
self,
|
||||
datasource_type: str | None = None,
|
||||
datasource_id: int | str | None = None,
|
||||
) -> FlaskResponse:
|
||||
"""Serves all request that GET or POST form_data
|
||||
|
||||
@@ -302,8 +305,10 @@ class Superset(BaseSupersetView):
|
||||
|
||||
form_data = get_form_data()[0]
|
||||
try:
|
||||
datasource_id, datasource_type = get_datasource_info(
|
||||
datasource_id, datasource_type, form_data
|
||||
ds_id, datasource_type = get_datasource_info(
|
||||
datasource_id,
|
||||
datasource_type,
|
||||
form_data,
|
||||
)
|
||||
force = request.args.get("force") == "true"
|
||||
|
||||
@@ -316,7 +321,7 @@ class Superset(BaseSupersetView):
|
||||
with contextlib.suppress(CacheLoadError):
|
||||
viz_obj = get_viz(
|
||||
datasource_type=cast(str, datasource_type),
|
||||
datasource_id=datasource_id,
|
||||
datasource_id=ds_id,
|
||||
form_data=form_data,
|
||||
force_cached=True,
|
||||
force=force,
|
||||
@@ -343,7 +348,7 @@ class Superset(BaseSupersetView):
|
||||
|
||||
viz_obj = get_viz(
|
||||
datasource_type=cast(str, datasource_type),
|
||||
datasource_id=datasource_id,
|
||||
datasource_id=ds_id,
|
||||
form_data=form_data,
|
||||
force=force,
|
||||
)
|
||||
@@ -407,7 +412,7 @@ class Superset(BaseSupersetView):
|
||||
def explore( # noqa: C901
|
||||
self,
|
||||
datasource_type: str | None = None,
|
||||
datasource_id: int | None = None,
|
||||
datasource_id: int | str | None = None,
|
||||
key: str | None = None,
|
||||
) -> FlaskResponse:
|
||||
if request.method == "GET":
|
||||
@@ -451,21 +456,23 @@ class Superset(BaseSupersetView):
|
||||
|
||||
query_context = request.form.get("query_context")
|
||||
|
||||
ds_id: int | UUID | None = None
|
||||
try:
|
||||
datasource_id, datasource_type = get_datasource_info(
|
||||
datasource_id, datasource_type, form_data
|
||||
ds_id, datasource_type = get_datasource_info(
|
||||
datasource_id,
|
||||
datasource_type,
|
||||
form_data,
|
||||
)
|
||||
except SupersetException:
|
||||
datasource_id = None
|
||||
# fallback unknown datasource to table type
|
||||
datasource_type = SqlaTable.type
|
||||
|
||||
datasource: BaseDatasource | None = None
|
||||
if datasource_id is not None:
|
||||
if ds_id is not None:
|
||||
with contextlib.suppress(DatasetNotFoundError):
|
||||
datasource = DatasourceDAO.get_datasource(
|
||||
DatasourceType("table"),
|
||||
datasource_id,
|
||||
ds_id,
|
||||
)
|
||||
|
||||
datasource_name = datasource.name if datasource else _("[Missing Dataset]")
|
||||
|
||||
@@ -14,12 +14,16 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from functools import wraps
|
||||
from typing import Any, Callable, DefaultDict, Optional, Union
|
||||
from typing import Any, Callable, DefaultDict
|
||||
from urllib import parse
|
||||
from uuid import UUID
|
||||
|
||||
import msgpack
|
||||
import pyarrow as pa
|
||||
@@ -163,7 +167,7 @@ def get_permissions(
|
||||
def get_viz(
|
||||
form_data: FormData,
|
||||
datasource_type: str,
|
||||
datasource_id: int,
|
||||
datasource_id: int | UUID,
|
||||
force: bool = False,
|
||||
force_cached: bool = False,
|
||||
) -> BaseViz:
|
||||
@@ -186,10 +190,10 @@ def loads_request_json(request_json_data: str) -> dict[Any, Any]:
|
||||
|
||||
|
||||
def get_form_data(
|
||||
slice_id: Optional[int] = None,
|
||||
slice_id: int | None = None,
|
||||
use_slice_data: bool = False,
|
||||
initial_form_data: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[dict[str, Any], Optional[Slice]]:
|
||||
initial_form_data: dict[str, Any] | None = None,
|
||||
) -> tuple[dict[str, Any], Slice | None]:
|
||||
form_data: dict[str, Any] = initial_form_data or {}
|
||||
|
||||
if has_request_context():
|
||||
@@ -272,8 +276,10 @@ def add_sqllab_custom_filters(form_data: dict[Any, Any]) -> Any:
|
||||
|
||||
|
||||
def get_datasource_info(
|
||||
datasource_id: Optional[int], datasource_type: Optional[str], form_data: FormData
|
||||
) -> tuple[int, Optional[str]]:
|
||||
datasource_id: int | str | None,
|
||||
datasource_type: str | None,
|
||||
form_data: FormData,
|
||||
) -> tuple[int | UUID, str | None]:
|
||||
"""
|
||||
Compatibility layer for handling of datasource info
|
||||
|
||||
@@ -300,12 +306,16 @@ def get_datasource_info(
|
||||
_("The dataset associated with this chart no longer exists")
|
||||
)
|
||||
|
||||
datasource_id = int(datasource_id)
|
||||
return datasource_id, datasource_type
|
||||
# Convert datasource_id to appropriate type
|
||||
if isinstance(datasource_id, int):
|
||||
return datasource_id, datasource_type
|
||||
if datasource_id.isdigit():
|
||||
return int(datasource_id), datasource_type
|
||||
return UUID(datasource_id), datasource_type
|
||||
|
||||
|
||||
def apply_display_max_row_limit(
|
||||
sql_results: dict[str, Any], rows: Optional[int] = None
|
||||
sql_results: dict[str, Any], rows: int | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Given a `sql_results` nested structure, applies a limit to the number of rows
|
||||
@@ -482,8 +492,8 @@ def check_explore_cache_perms(_self: Any, cache_key: str) -> None:
|
||||
|
||||
def check_datasource_perms(
|
||||
_self: Any,
|
||||
datasource_type: Optional[str] = None,
|
||||
datasource_id: Optional[int] = None,
|
||||
datasource_type: str | None = None,
|
||||
datasource_id: int | str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
@@ -500,8 +510,10 @@ def check_datasource_perms(
|
||||
form_data = kwargs["form_data"] if "form_data" in kwargs else get_form_data()[0]
|
||||
|
||||
try:
|
||||
datasource_id, datasource_type = get_datasource_info(
|
||||
datasource_id, datasource_type, form_data
|
||||
ds_id, datasource_type = get_datasource_info(
|
||||
datasource_id,
|
||||
datasource_type,
|
||||
form_data,
|
||||
)
|
||||
except SupersetException as ex:
|
||||
raise SupersetSecurityException(
|
||||
@@ -524,7 +536,7 @@ def check_datasource_perms(
|
||||
try:
|
||||
viz_obj = get_viz(
|
||||
datasource_type=datasource_type,
|
||||
datasource_id=datasource_id,
|
||||
datasource_id=ds_id,
|
||||
form_data=form_data,
|
||||
force=False,
|
||||
)
|
||||
@@ -541,7 +553,9 @@ def check_datasource_perms(
|
||||
|
||||
|
||||
def _deserialize_results_payload(
|
||||
payload: Union[bytes, str], query: Query, use_msgpack: Optional[bool] = False
|
||||
payload: bytes | str,
|
||||
query: Query,
|
||||
use_msgpack: bool | None = False,
|
||||
) -> dict[str, Any]:
|
||||
logger.debug("Deserializing from msgpack: %r", use_msgpack)
|
||||
if use_msgpack:
|
||||
@@ -579,9 +593,12 @@ def _deserialize_results_payload(
|
||||
|
||||
|
||||
def get_cta_schema_name(
|
||||
database: Database, user: ab_models.User, schema: str, sql: str
|
||||
) -> Optional[str]:
|
||||
func: Optional[Callable[[Database, ab_models.User, str, str], str]] = app.config[
|
||||
database: Database,
|
||||
user: ab_models.User,
|
||||
schema: str,
|
||||
sql: str,
|
||||
) -> str | None:
|
||||
func: Callable[[Database, ab_models.User, str, str], str] | None = app.config[
|
||||
"SQLLAB_CTAS_SCHEMA_NAME_FUNC"
|
||||
]
|
||||
if not func:
|
||||
|
||||
@@ -626,7 +626,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase):
|
||||
assert response == {
|
||||
"message": {
|
||||
"datasource_type": [
|
||||
"Must be one of: table, dataset, query, saved_query, view."
|
||||
"Must be one of: table, dataset, query, saved_query, view, "
|
||||
"semantic_view."
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -981,7 +982,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, InsertChartMixin, SupersetTestCase):
|
||||
assert response == {
|
||||
"message": {
|
||||
"datasource_type": [
|
||||
"Must be one of: table, dataset, query, saved_query, view."
|
||||
"Must be one of: table, dataset, query, saved_query, view, "
|
||||
"semantic_view."
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
480
tests/unit_tests/mcp_service/utils/test_sanitization.py
Normal file
480
tests/unit_tests/mcp_service/utils/test_sanitization.py
Normal file
@@ -0,0 +1,480 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pytest
|
||||
|
||||
from superset.mcp_service.utils.sanitization import (
|
||||
_check_dangerous_patterns,
|
||||
_check_sql_patterns,
|
||||
_remove_dangerous_unicode,
|
||||
_strip_html_tags,
|
||||
sanitize_filter_value,
|
||||
sanitize_user_input,
|
||||
)
|
||||
|
||||
# --- _strip_html_tags tests ---
|
||||
|
||||
|
||||
def test_strip_html_tags_plain_text():
|
||||
assert _strip_html_tags("hello world") == "hello world"
|
||||
|
||||
|
||||
def test_strip_html_tags_preserves_ampersand():
|
||||
assert _strip_html_tags("A & B") == "A & B"
|
||||
|
||||
|
||||
def test_strip_html_tags_preserves_multiple_ampersands():
|
||||
assert _strip_html_tags("A & B & C") == "A & B & C"
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_bold_tags():
|
||||
assert _strip_html_tags("<b>hello</b>") == "hello"
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_script_tags():
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
assert "</script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_entity_encoded_script():
|
||||
"""Entity-encoded tags must be decoded and stripped, not passed through."""
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_double_encoded_script():
|
||||
"""Double-encoded entities must also be decoded and stripped."""
|
||||
result = _strip_html_tags("&lt;script&gt;alert(1)&lt;/script&gt;")
|
||||
assert "<script>" not in result
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_img_onerror():
|
||||
result = _strip_html_tags('<img src=x onerror="alert(1)">')
|
||||
assert "<img" not in result
|
||||
assert "onerror" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_div_tags():
|
||||
assert _strip_html_tags("<div>content</div>") == "content"
|
||||
|
||||
|
||||
def test_strip_html_tags_preserves_less_than_in_text():
|
||||
"""A bare < not forming a tag should be preserved."""
|
||||
result = _strip_html_tags("5 < 10")
|
||||
assert "5" in result
|
||||
assert "10" in result
|
||||
|
||||
|
||||
def test_strip_html_tags_empty_string():
|
||||
assert _strip_html_tags("") == ""
|
||||
|
||||
|
||||
def test_strip_html_tags_triple_encoded_script():
|
||||
"""Triple-encoded entities must also be decoded and stripped."""
|
||||
result = _strip_html_tags(
|
||||
"&amp;lt;script&amp;gt;alert(1)&amp;lt;/script&amp;gt;"
|
||||
)
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_mixed_encoded_and_raw():
|
||||
"""Both raw and entity-encoded tags should be stripped."""
|
||||
result = _strip_html_tags("<b>bold</b> and <i>italic</i>")
|
||||
assert "<b>" not in result
|
||||
assert "<i>" not in result
|
||||
assert "bold" in result
|
||||
assert "italic" in result
|
||||
|
||||
|
||||
def test_strip_html_tags_deep_encoding_terminates():
|
||||
"""Verify the iterative decode loop terminates on many encoding layers."""
|
||||
value = "<script>alert(1)</script>"
|
||||
for _ in range(10):
|
||||
value = value.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
result = _strip_html_tags(value)
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_entity_ampersand():
|
||||
"""& in input should become & in output."""
|
||||
assert _strip_html_tags("A & B") == "A & B"
|
||||
|
||||
|
||||
# --- _check_dangerous_patterns tests ---
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_safe_input():
|
||||
_check_dangerous_patterns("hello world", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_javascript_scheme():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("javascript:alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_vbscript_scheme():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("vbscript:MsgBox", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_data_scheme():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("data:text/html,<script>", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_case_insensitive():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("JAVASCRIPT:alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_onclick():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
_check_dangerous_patterns("onclick=alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_onerror():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
_check_dangerous_patterns("onerror = alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_onload():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
_check_dangerous_patterns("onload=fetch('x')", "test")
|
||||
|
||||
|
||||
# --- _check_sql_patterns tests ---
|
||||
|
||||
|
||||
def test_check_sql_patterns_safe_input():
|
||||
_check_sql_patterns("revenue_total", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_drop_table():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
_check_sql_patterns("DROP TABLE users", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_delete():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
_check_sql_patterns("DELETE FROM users", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_semicolon():
|
||||
with pytest.raises(ValueError, match="unsafe characters"):
|
||||
_check_sql_patterns("value; other", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_sql_comment_dash():
|
||||
with pytest.raises(ValueError, match="unsafe characters"):
|
||||
_check_sql_patterns("value -- comment", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_sql_comment_block():
|
||||
with pytest.raises(ValueError, match="unsafe SQL comment"):
|
||||
_check_sql_patterns("value /* comment */", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_pipe():
|
||||
with pytest.raises(ValueError, match="unsafe characters"):
|
||||
_check_sql_patterns("value | other", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_case_insensitive():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
_check_sql_patterns("drop table users", "test")
|
||||
|
||||
|
||||
# --- _remove_dangerous_unicode tests ---
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_plain_text():
|
||||
assert _remove_dangerous_unicode("hello") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_zero_width_space():
|
||||
assert _remove_dangerous_unicode("he\u200bllo") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_zero_width_joiner():
|
||||
assert _remove_dangerous_unicode("he\u200dllo") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_bom():
|
||||
assert _remove_dangerous_unicode("\ufeffhello") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_null_byte():
|
||||
assert _remove_dangerous_unicode("he\x00llo") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_preserves_normal_unicode():
|
||||
assert _remove_dangerous_unicode("café résumé") == "café résumé"
|
||||
|
||||
|
||||
# --- sanitize_user_input tests ---
|
||||
|
||||
|
||||
def test_sanitize_user_input_plain_text():
|
||||
assert sanitize_user_input("hello", "test") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_preserves_ampersand():
|
||||
assert sanitize_user_input("A & B", "test") == "A & B"
|
||||
|
||||
|
||||
def test_sanitize_user_input_strips_html():
|
||||
assert sanitize_user_input("<b>hello</b>", "test") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_none_not_allowed():
|
||||
with pytest.raises(ValueError, match="cannot be empty"):
|
||||
sanitize_user_input(None, "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_none_allowed():
|
||||
assert sanitize_user_input(None, "test", allow_empty=True) is None
|
||||
|
||||
|
||||
def test_sanitize_user_input_empty_string_not_allowed():
|
||||
with pytest.raises(ValueError, match="cannot be empty"):
|
||||
sanitize_user_input("", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_empty_string_allowed():
|
||||
assert sanitize_user_input("", "test", allow_empty=True) is None
|
||||
|
||||
|
||||
def test_sanitize_user_input_whitespace_only():
|
||||
with pytest.raises(ValueError, match="cannot be empty"):
|
||||
sanitize_user_input(" ", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_strips_whitespace():
|
||||
assert sanitize_user_input(" hello ", "test") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_too_long():
|
||||
with pytest.raises(ValueError, match="too long"):
|
||||
sanitize_user_input("a" * 256, "test", max_length=255)
|
||||
|
||||
|
||||
def test_sanitize_user_input_max_length_ok():
|
||||
result = sanitize_user_input("a" * 255, "test", max_length=255)
|
||||
assert result == "a" * 255
|
||||
|
||||
|
||||
def test_sanitize_user_input_blocks_javascript():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
sanitize_user_input("javascript:alert(1)", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_blocks_event_handler():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
sanitize_user_input("onclick=alert(1)", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_sql_keywords_not_checked_by_default():
|
||||
result = sanitize_user_input("DROP TABLE", "test")
|
||||
assert result == "DROP TABLE"
|
||||
|
||||
|
||||
def test_sanitize_user_input_sql_keywords_checked_when_enabled():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
sanitize_user_input("DROP TABLE users", "test", check_sql_keywords=True)
|
||||
|
||||
|
||||
def test_sanitize_user_input_removes_zero_width_chars():
|
||||
result = sanitize_user_input("hel\u200blo", "test")
|
||||
assert result == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_xss_entity_encoded():
|
||||
"""Entity-encoded XSS attempts must be neutralized."""
|
||||
result = sanitize_user_input("<script>alert(1)</script>", "test")
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_sanitize_user_input_entity_encoded_javascript():
|
||||
"""Entity-encoded javascript: scheme should be caught after decoding."""
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
sanitize_user_input("javascript:alert(1)", "test")
|
||||
|
||||
|
||||
# --- sanitize_filter_value tests ---
|
||||
|
||||
|
||||
def test_sanitize_filter_value_integer():
|
||||
assert sanitize_filter_value(42) == 42
|
||||
|
||||
|
||||
def test_sanitize_filter_value_float():
|
||||
assert sanitize_filter_value(3.14) == 3.14
|
||||
|
||||
|
||||
def test_sanitize_filter_value_bool():
|
||||
assert sanitize_filter_value(True) is True
|
||||
|
||||
|
||||
def test_sanitize_filter_value_plain_string():
|
||||
assert sanitize_filter_value("hello") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_preserves_ampersand():
|
||||
assert sanitize_filter_value("A & B") == "A & B"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_strips_html():
|
||||
assert sanitize_filter_value("<b>hello</b>") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_too_long():
|
||||
with pytest.raises(ValueError, match="too long"):
|
||||
sanitize_filter_value("a" * 1001)
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_javascript():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
sanitize_filter_value("javascript:alert(1)")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_xp_cmdshell():
|
||||
with pytest.raises(ValueError, match="malicious SQL procedures"):
|
||||
sanitize_filter_value("xp_cmdshell('dir')")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_sp_executesql():
|
||||
with pytest.raises(ValueError, match="malicious SQL procedures"):
|
||||
sanitize_filter_value("sp_executesql @stmt")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_union_select():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("' UNION SELECT * FROM users")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_sql_comment():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("value -- drop")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_shell_semicolon():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("value;rm -rf")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_shell_pipe():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("value|cat /etc/passwd")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_backtick():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("`whoami`")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_hex_encoding():
|
||||
with pytest.raises(ValueError, match="hex encoding"):
|
||||
sanitize_filter_value("\\x41\\x42")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_allows_ampersand_alone():
|
||||
"""Ampersand is safe in filter values (only dangerous in shell contexts)."""
|
||||
assert sanitize_filter_value("AT&T") == "AT&T"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_removes_zero_width_chars():
|
||||
result = sanitize_filter_value("hel\u200blo")
|
||||
assert result == "hello"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_or_injection():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("' OR '1'='1")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_and_injection():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("' AND '1'='1")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_block_comment():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("value /* comment */")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_semicolon_drop():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("; DROP TABLE users")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_parentheses():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("$(whoami)")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_dollar_sign():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("$HOME")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_event_handler():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
sanitize_filter_value("onerror=alert(1)")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_xss_entity_encoded():
|
||||
"""Entity-encoded XSS in filter values must be neutralized."""
|
||||
result = sanitize_filter_value("<img src=x onerror=alert(1)>")
|
||||
assert "<img" not in result
|
||||
|
||||
|
||||
# --- Defense-in-depth: verify html.unescape is not used after nh3 ---
|
||||
|
||||
|
||||
def test_strip_html_tags_does_not_unescape_angle_brackets():
|
||||
"""Ensure nh3 entity output is not fully unescaped back to raw HTML.
|
||||
|
||||
nh3.clean may pass through HTML entities (e.g. <script>) from
|
||||
the input without stripping them. A full html.unescape() on nh3's
|
||||
output could reintroduce raw angle brackets, creating an XSS vector.
|
||||
"""
|
||||
# Plain text passes through unchanged
|
||||
result = _strip_html_tags("safe text")
|
||||
assert result == "safe text"
|
||||
|
||||
# Verify ampersand preservation still works
|
||||
result = _strip_html_tags("A & B")
|
||||
assert result == "A & B"
|
||||
|
||||
# Verify real tags are stripped
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
|
||||
# Entity-encoded script tags must not become real tags in the output
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
assert "</script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_img_onerror_entity_bypass():
|
||||
"""Entity-encoded img/onerror should not survive sanitization."""
|
||||
result = _strip_html_tags("<img src=x onerror=alert(1)>")
|
||||
assert "<img" not in result
|
||||
assert "onerror" not in result
|
||||
2724
tests/unit_tests/semantic_layers/mapper_test.py
Normal file
2724
tests/unit_tests/semantic_layers/mapper_test.py
Normal file
File diff suppressed because it is too large
Load Diff
621
tests/unit_tests/semantic_layers/models_test.py
Normal file
621
tests/unit_tests/semantic_layers/models_test.py
Normal file
@@ -0,0 +1,621 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Tests for semantic layer models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from superset_core.semantic_layers.types import (
|
||||
BINARY,
|
||||
BOOLEAN,
|
||||
DATE,
|
||||
DATETIME,
|
||||
DECIMAL,
|
||||
INTEGER,
|
||||
INTERVAL,
|
||||
NUMBER,
|
||||
OBJECT,
|
||||
STRING,
|
||||
TIME,
|
||||
Day,
|
||||
Dimension,
|
||||
Metric,
|
||||
Type,
|
||||
)
|
||||
|
||||
from superset.semantic_layers.models import (
|
||||
ColumnMetadata,
|
||||
MetricMetadata,
|
||||
SemanticLayer,
|
||||
SemanticView,
|
||||
get_column_type,
|
||||
)
|
||||
from superset.utils.core import GenericDataType
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# get_column_type tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_get_column_type_temporal_date() -> None:
|
||||
"""Test that DATE maps to TEMPORAL."""
|
||||
assert get_column_type(DATE) == GenericDataType.TEMPORAL
|
||||
|
||||
|
||||
def test_get_column_type_temporal_datetime() -> None:
|
||||
"""Test that DATETIME maps to TEMPORAL."""
|
||||
assert get_column_type(DATETIME) == GenericDataType.TEMPORAL
|
||||
|
||||
|
||||
def test_get_column_type_temporal_time() -> None:
|
||||
"""Test that TIME maps to TEMPORAL."""
|
||||
assert get_column_type(TIME) == GenericDataType.TEMPORAL
|
||||
|
||||
|
||||
def test_get_column_type_numeric_integer() -> None:
|
||||
"""Test that INTEGER maps to NUMERIC."""
|
||||
assert get_column_type(INTEGER) == GenericDataType.NUMERIC
|
||||
|
||||
|
||||
def test_get_column_type_numeric_number() -> None:
|
||||
"""Test that NUMBER maps to NUMERIC."""
|
||||
assert get_column_type(NUMBER) == GenericDataType.NUMERIC
|
||||
|
||||
|
||||
def test_get_column_type_numeric_decimal() -> None:
|
||||
"""Test that DECIMAL maps to NUMERIC."""
|
||||
assert get_column_type(DECIMAL) == GenericDataType.NUMERIC
|
||||
|
||||
|
||||
def test_get_column_type_numeric_interval() -> None:
|
||||
"""Test that INTERVAL maps to NUMERIC."""
|
||||
assert get_column_type(INTERVAL) == GenericDataType.NUMERIC
|
||||
|
||||
|
||||
def test_get_column_type_boolean() -> None:
|
||||
"""Test that BOOLEAN maps to BOOLEAN."""
|
||||
assert get_column_type(BOOLEAN) == GenericDataType.BOOLEAN
|
||||
|
||||
|
||||
def test_get_column_type_string() -> None:
|
||||
"""Test that STRING maps to STRING."""
|
||||
assert get_column_type(STRING) == GenericDataType.STRING
|
||||
|
||||
|
||||
def test_get_column_type_object() -> None:
|
||||
"""Test that OBJECT maps to STRING."""
|
||||
assert get_column_type(OBJECT) == GenericDataType.STRING
|
||||
|
||||
|
||||
def test_get_column_type_binary() -> None:
|
||||
"""Test that BINARY maps to STRING."""
|
||||
assert get_column_type(BINARY) == GenericDataType.STRING
|
||||
|
||||
|
||||
def test_get_column_type_unknown() -> None:
|
||||
"""Test that unknown types default to STRING."""
|
||||
|
||||
class UnknownType(Type):
|
||||
pass
|
||||
|
||||
assert get_column_type(UnknownType) == GenericDataType.STRING
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MetricMetadata tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_metric_metadata_required_fields() -> None:
|
||||
"""Test MetricMetadata with required fields only."""
|
||||
metadata = MetricMetadata(
|
||||
metric_name="revenue",
|
||||
expression="SUM(amount)",
|
||||
)
|
||||
assert metadata.metric_name == "revenue"
|
||||
assert metadata.expression == "SUM(amount)"
|
||||
assert metadata.verbose_name is None
|
||||
assert metadata.description is None
|
||||
assert metadata.d3format is None
|
||||
assert metadata.currency is None
|
||||
assert metadata.warning_text is None
|
||||
assert metadata.certified_by is None
|
||||
assert metadata.certification_details is None
|
||||
|
||||
|
||||
def test_metric_metadata_all_fields() -> None:
|
||||
"""Test MetricMetadata with all fields."""
|
||||
metadata = MetricMetadata(
|
||||
metric_name="revenue",
|
||||
expression="SUM(amount)",
|
||||
verbose_name="Total Revenue",
|
||||
description="Sum of all revenue",
|
||||
d3format="$,.2f",
|
||||
currency={"symbol": "$", "symbolPosition": "prefix"},
|
||||
warning_text="Data may be incomplete",
|
||||
certified_by="Data Team",
|
||||
certification_details="Verified Q1 2024",
|
||||
)
|
||||
assert metadata.metric_name == "revenue"
|
||||
assert metadata.expression == "SUM(amount)"
|
||||
assert metadata.verbose_name == "Total Revenue"
|
||||
assert metadata.description == "Sum of all revenue"
|
||||
assert metadata.d3format == "$,.2f"
|
||||
assert metadata.currency == {"symbol": "$", "symbolPosition": "prefix"}
|
||||
assert metadata.warning_text == "Data may be incomplete"
|
||||
assert metadata.certified_by == "Data Team"
|
||||
assert metadata.certification_details == "Verified Q1 2024"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ColumnMetadata tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_column_metadata_required_fields() -> None:
|
||||
"""Test ColumnMetadata with required fields only."""
|
||||
metadata = ColumnMetadata(
|
||||
column_name="order_date",
|
||||
type="DATE",
|
||||
is_dttm=True,
|
||||
)
|
||||
assert metadata.column_name == "order_date"
|
||||
assert metadata.type == "DATE"
|
||||
assert metadata.is_dttm is True
|
||||
assert metadata.verbose_name is None
|
||||
assert metadata.description is None
|
||||
assert metadata.groupby is True
|
||||
assert metadata.filterable is True
|
||||
assert metadata.expression is None
|
||||
assert metadata.python_date_format is None
|
||||
assert metadata.advanced_data_type is None
|
||||
assert metadata.extra is None
|
||||
|
||||
|
||||
def test_column_metadata_all_fields() -> None:
|
||||
"""Test ColumnMetadata with all fields."""
|
||||
metadata = ColumnMetadata(
|
||||
column_name="order_date",
|
||||
type="DATE",
|
||||
is_dttm=True,
|
||||
verbose_name="Order Date",
|
||||
description="Date of the order",
|
||||
groupby=True,
|
||||
filterable=True,
|
||||
expression="DATE(order_timestamp)",
|
||||
python_date_format="%Y-%m-%d",
|
||||
advanced_data_type="date",
|
||||
extra='{"grain": "day"}',
|
||||
)
|
||||
assert metadata.column_name == "order_date"
|
||||
assert metadata.type == "DATE"
|
||||
assert metadata.is_dttm is True
|
||||
assert metadata.verbose_name == "Order Date"
|
||||
assert metadata.description == "Date of the order"
|
||||
assert metadata.groupby is True
|
||||
assert metadata.filterable is True
|
||||
assert metadata.expression == "DATE(order_timestamp)"
|
||||
assert metadata.python_date_format == "%Y-%m-%d"
|
||||
assert metadata.advanced_data_type == "date"
|
||||
assert metadata.extra == '{"grain": "day"}'
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SemanticLayer tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_semantic_layer_repr_with_name() -> None:
|
||||
"""Test SemanticLayer __repr__ with name."""
|
||||
layer = SemanticLayer()
|
||||
layer.name = "My Semantic Layer"
|
||||
layer.uuid = uuid.uuid4()
|
||||
assert repr(layer) == "My Semantic Layer"
|
||||
|
||||
|
||||
def test_semantic_layer_repr_without_name() -> None:
|
||||
"""Test SemanticLayer __repr__ without name (uses uuid)."""
|
||||
layer = SemanticLayer()
|
||||
layer.name = None
|
||||
test_uuid = uuid.uuid4()
|
||||
layer.uuid = test_uuid
|
||||
assert repr(layer) == str(test_uuid)
|
||||
|
||||
|
||||
def test_semantic_layer_implementation_not_implemented() -> None:
|
||||
"""Test that implementation raises NotImplementedError."""
|
||||
layer = SemanticLayer()
|
||||
with pytest.raises(NotImplementedError):
|
||||
_ = layer.implementation
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SemanticView tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_dimensions() -> list[Dimension]:
|
||||
"""Create mock dimensions for testing."""
|
||||
return [
|
||||
Dimension(
|
||||
id="orders.order_date",
|
||||
name="order_date",
|
||||
type=DATE,
|
||||
definition="orders.order_date",
|
||||
description="Date of the order",
|
||||
grain=Day,
|
||||
),
|
||||
Dimension(
|
||||
id="products.category",
|
||||
name="category",
|
||||
type=STRING,
|
||||
definition="products.category",
|
||||
description="Product category",
|
||||
grain=None,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_metrics() -> list[Metric]:
|
||||
"""Create mock metrics for testing."""
|
||||
return [
|
||||
Metric(
|
||||
id="orders.revenue",
|
||||
name="revenue",
|
||||
type=NUMBER,
|
||||
definition="SUM(orders.amount)",
|
||||
description="Total revenue",
|
||||
),
|
||||
Metric(
|
||||
id="orders.count",
|
||||
name="order_count",
|
||||
type=INTEGER,
|
||||
definition="COUNT(*)",
|
||||
description="Number of orders",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_implementation(
|
||||
mock_dimensions: list[Dimension],
|
||||
mock_metrics: list[Metric],
|
||||
) -> MagicMock:
|
||||
"""Create a mock implementation."""
|
||||
impl = MagicMock()
|
||||
impl.get_dimensions.return_value = mock_dimensions
|
||||
impl.get_metrics.return_value = mock_metrics
|
||||
impl.uid.return_value = "semantic_view_uid_123"
|
||||
return impl
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def semantic_view(mock_implementation: MagicMock) -> SemanticView:
|
||||
"""Create a SemanticView with mocked implementation."""
|
||||
view = SemanticView()
|
||||
view.name = "Orders View"
|
||||
view.description = "View of order data"
|
||||
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
|
||||
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
|
||||
view.cache_timeout = 3600
|
||||
view.configuration = "{}"
|
||||
|
||||
# Mock the implementation property
|
||||
with patch.object(
|
||||
SemanticView,
|
||||
"implementation",
|
||||
new_callable=lambda: property(lambda self: mock_implementation),
|
||||
):
|
||||
# We need to return the view but the patch won't persist
|
||||
pass
|
||||
|
||||
return view
|
||||
|
||||
|
||||
def test_semantic_view_repr_with_name() -> None:
|
||||
"""Test SemanticView __repr__ with name."""
|
||||
view = SemanticView()
|
||||
view.name = "My View"
|
||||
view.uuid = uuid.uuid4()
|
||||
assert repr(view) == "My View"
|
||||
|
||||
|
||||
def test_semantic_view_repr_without_name() -> None:
|
||||
"""Test SemanticView __repr__ without name (uses uuid)."""
|
||||
view = SemanticView()
|
||||
view.name = None
|
||||
test_uuid = uuid.uuid4()
|
||||
view.uuid = test_uuid
|
||||
assert repr(view) == str(test_uuid)
|
||||
|
||||
|
||||
def test_semantic_view_type() -> None:
|
||||
"""Test SemanticView type property."""
|
||||
view = SemanticView()
|
||||
assert view.type == "semantic_view"
|
||||
|
||||
|
||||
def test_semantic_view_offset() -> None:
|
||||
"""Test SemanticView offset property."""
|
||||
view = SemanticView()
|
||||
assert view.offset == 0
|
||||
|
||||
|
||||
def test_semantic_view_is_rls_supported() -> None:
|
||||
"""Test SemanticView is_rls_supported property."""
|
||||
view = SemanticView()
|
||||
assert view.is_rls_supported is False
|
||||
|
||||
|
||||
def test_semantic_view_query_language() -> None:
|
||||
"""Test SemanticView query_language property."""
|
||||
view = SemanticView()
|
||||
assert view.query_language is None
|
||||
|
||||
|
||||
def test_semantic_view_get_query_str() -> None:
|
||||
"""Test SemanticView get_query_str method."""
|
||||
view = SemanticView()
|
||||
result = view.get_query_str({})
|
||||
assert result == "Not implemented for semantic layers"
|
||||
|
||||
|
||||
def test_semantic_view_get_extra_cache_keys() -> None:
|
||||
"""Test SemanticView get_extra_cache_keys method."""
|
||||
view = SemanticView()
|
||||
result = view.get_extra_cache_keys({})
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_semantic_view_perm() -> None:
|
||||
"""Test SemanticView perm property."""
|
||||
view = SemanticView()
|
||||
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
|
||||
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
|
||||
assert view.perm == "87654321432187654321876543218765::12345678123456781234567812345678"
|
||||
|
||||
|
||||
def test_semantic_view_uid(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
mock_metrics: list[Metric],
|
||||
) -> None:
|
||||
"""Test SemanticView uid property."""
|
||||
view = SemanticView()
|
||||
view.name = "Test View"
|
||||
view.uuid = uuid.uuid4()
|
||||
view.semantic_layer_uuid = uuid.uuid4()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
assert view.uid == "semantic_view_uid_123"
|
||||
|
||||
|
||||
def test_semantic_view_metrics(
|
||||
mock_implementation: MagicMock,
|
||||
mock_metrics: list[Metric],
|
||||
) -> None:
|
||||
"""Test SemanticView metrics property."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
metrics = view.metrics
|
||||
assert len(metrics) == 2
|
||||
assert metrics[0].metric_name == "revenue"
|
||||
assert metrics[0].expression == "SUM(orders.amount)"
|
||||
assert metrics[0].description == "Total revenue"
|
||||
assert metrics[1].metric_name == "order_count"
|
||||
|
||||
|
||||
def test_semantic_view_columns(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
) -> None:
|
||||
"""Test SemanticView columns property."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
columns = view.columns
|
||||
assert len(columns) == 2
|
||||
assert columns[0].column_name == "order_date"
|
||||
assert columns[0].type == "DATE"
|
||||
assert columns[0].is_dttm is True
|
||||
assert columns[0].description == "Date of the order"
|
||||
assert columns[1].column_name == "category"
|
||||
assert columns[1].type == "STRING"
|
||||
assert columns[1].is_dttm is False
|
||||
|
||||
|
||||
def test_semantic_view_column_names(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
) -> None:
|
||||
"""Test SemanticView column_names property."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
column_names = view.column_names
|
||||
assert column_names == ["order_date", "category"]
|
||||
|
||||
|
||||
def test_semantic_view_get_time_grains(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
) -> None:
|
||||
"""Test SemanticView get_time_grains property."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
time_grains = view.get_time_grains
|
||||
assert len(time_grains) == 1
|
||||
assert time_grains[0]["name"] == "Day"
|
||||
assert time_grains[0]["duration"] == "P1D"
|
||||
|
||||
|
||||
def test_semantic_view_has_drill_by_columns_all_exist(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
) -> None:
|
||||
"""Test has_drill_by_columns when all columns exist."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
assert view.has_drill_by_columns(["order_date", "category"]) is True
|
||||
|
||||
|
||||
def test_semantic_view_has_drill_by_columns_some_missing(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
) -> None:
|
||||
"""Test has_drill_by_columns when some columns are missing."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
assert view.has_drill_by_columns(["order_date", "nonexistent"]) is False
|
||||
|
||||
|
||||
def test_semantic_view_has_drill_by_columns_empty(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
) -> None:
|
||||
"""Test has_drill_by_columns with empty list."""
|
||||
view = SemanticView()
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
assert view.has_drill_by_columns([]) is True
|
||||
|
||||
|
||||
def test_semantic_view_data(
|
||||
mock_implementation: MagicMock,
|
||||
mock_dimensions: list[Dimension],
|
||||
mock_metrics: list[Metric],
|
||||
) -> None:
|
||||
"""Test SemanticView data property."""
|
||||
view = SemanticView()
|
||||
view.name = "Orders View"
|
||||
view.description = "View of order data"
|
||||
view.uuid = uuid.UUID("12345678-1234-5678-1234-567812345678")
|
||||
view.semantic_layer_uuid = uuid.UUID("87654321-4321-8765-4321-876543218765")
|
||||
view.cache_timeout = 3600
|
||||
|
||||
with patch.object(
|
||||
SemanticView, "implementation", new_callable=lambda: property(lambda s: mock_implementation)
|
||||
):
|
||||
data = view.data
|
||||
|
||||
# Check core fields
|
||||
assert data["id"] == "12345678123456781234567812345678"
|
||||
assert data["uid"] == "semantic_view_uid_123"
|
||||
assert data["type"] == "semantic_view"
|
||||
assert data["name"] == "Orders View"
|
||||
assert data["description"] == "View of order data"
|
||||
assert data["cache_timeout"] == 3600
|
||||
|
||||
# Check columns
|
||||
assert len(data["columns"]) == 2
|
||||
assert data["columns"][0]["column_name"] == "order_date"
|
||||
assert data["columns"][0]["type"] == "DATE"
|
||||
assert data["columns"][0]["is_dttm"] is True
|
||||
assert data["columns"][0]["type_generic"] == GenericDataType.TEMPORAL
|
||||
assert data["columns"][1]["column_name"] == "category"
|
||||
assert data["columns"][1]["type"] == "STRING"
|
||||
assert data["columns"][1]["type_generic"] == GenericDataType.STRING
|
||||
|
||||
# Check metrics
|
||||
assert len(data["metrics"]) == 2
|
||||
assert data["metrics"][0]["metric_name"] == "revenue"
|
||||
assert data["metrics"][0]["expression"] == "SUM(orders.amount)"
|
||||
assert data["metrics"][1]["metric_name"] == "order_count"
|
||||
|
||||
# Check column_types and column_names
|
||||
assert data["column_types"] == [
|
||||
GenericDataType.TEMPORAL,
|
||||
GenericDataType.STRING,
|
||||
]
|
||||
assert data["column_names"] == {"order_date", "category"}
|
||||
|
||||
# Check other fields
|
||||
assert data["table_name"] == "Orders View"
|
||||
assert data["datasource_name"] == "Orders View"
|
||||
assert data["offset"] == 0
|
||||
|
||||
|
||||
def test_semantic_view_get_query_result(
|
||||
mock_implementation: MagicMock,
|
||||
) -> None:
|
||||
"""Test SemanticView get_query_result method."""
|
||||
view = SemanticView()
|
||||
|
||||
mock_query_object = MagicMock()
|
||||
mock_result = MagicMock()
|
||||
|
||||
with patch(
|
||||
"superset.semantic_layers.models.get_results",
|
||||
return_value=mock_result,
|
||||
) as mock_get_results:
|
||||
result = view.get_query_result(mock_query_object)
|
||||
|
||||
mock_get_results.assert_called_once_with(mock_query_object)
|
||||
assert result == mock_result
|
||||
|
||||
|
||||
def test_semantic_view_implementation() -> None:
|
||||
"""Test SemanticView implementation property."""
|
||||
view = SemanticView()
|
||||
view.name = "Test View"
|
||||
view.configuration = '{"key": "value"}'
|
||||
|
||||
mock_semantic_layer = MagicMock()
|
||||
mock_semantic_view_impl = MagicMock()
|
||||
mock_semantic_layer.implementation.get_semantic_view.return_value = (
|
||||
mock_semantic_view_impl
|
||||
)
|
||||
view.semantic_layer = mock_semantic_layer
|
||||
|
||||
# Clear cached property if it exists
|
||||
if "implementation" in view.__dict__:
|
||||
del view.__dict__["implementation"]
|
||||
|
||||
result = view.implementation
|
||||
|
||||
mock_semantic_layer.implementation.get_semantic_view.assert_called_once_with(
|
||||
"Test View",
|
||||
{"key": "value"},
|
||||
)
|
||||
assert result == mock_semantic_view_impl
|
||||
Reference in New Issue
Block a user