mirror of
https://github.com/apache/superset.git
synced 2026-04-18 15:44:57 +00:00
fix(datasets): prevent double time filter application in virtual datasets (#35890)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -2074,6 +2074,11 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
metrics_exprs = []
|
||||
|
||||
time_filters = []
|
||||
|
||||
# Process FROM clause early to populate removed_filters from virtual dataset
|
||||
# templates before we decide whether to add time filters
|
||||
tbl, cte = self.get_from_clause(template_processor)
|
||||
|
||||
if granularity:
|
||||
if granularity not in columns_by_name or not dttm_col:
|
||||
raise QueryObjectValidationError(
|
||||
@@ -2096,6 +2101,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
self.always_filter_main_dttm
|
||||
and self.main_dttm_col in self.dttm_cols
|
||||
and self.main_dttm_col != dttm_col.column_name
|
||||
and self.main_dttm_col not in removed_filters
|
||||
):
|
||||
time_filters.append(
|
||||
self.get_time_filter(
|
||||
@@ -2106,13 +2112,21 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
)
|
||||
)
|
||||
|
||||
time_filter_column = self.get_time_filter(
|
||||
time_col=dttm_col,
|
||||
start_dttm=from_dttm,
|
||||
end_dttm=to_dttm,
|
||||
template_processor=template_processor,
|
||||
# Check if time filter should be skipped because it was handled in template.
|
||||
# Check both the actual column name and __timestamp alias
|
||||
should_skip_time_filter = (
|
||||
dttm_col.column_name in removed_filters
|
||||
or utils.DTTM_ALIAS in removed_filters
|
||||
)
|
||||
time_filters.append(time_filter_column)
|
||||
|
||||
if not should_skip_time_filter:
|
||||
time_filter_column = self.get_time_filter(
|
||||
time_col=dttm_col,
|
||||
start_dttm=from_dttm,
|
||||
end_dttm=to_dttm,
|
||||
template_processor=template_processor,
|
||||
)
|
||||
time_filters.append(time_filter_column)
|
||||
|
||||
# Always remove duplicates by column name, as sometimes `metrics_exprs`
|
||||
# can have the same name as a groupby column (e.g. when users use
|
||||
@@ -2131,8 +2145,6 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
|
||||
qry = sa.select(select_exprs)
|
||||
|
||||
tbl, cte = self.get_from_clause(template_processor)
|
||||
|
||||
if groupby_all_columns:
|
||||
qry = qry.group_by(*groupby_all_columns.values())
|
||||
|
||||
@@ -2177,7 +2189,17 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
is_metric_filter = True
|
||||
filter_grain = flt.get("grain")
|
||||
|
||||
if get_column_name(flt_col) in removed_filters:
|
||||
# Check if this filter should be skipped because it was handled in
|
||||
# template. Special handling for __timestamp alias: check both the
|
||||
# alias and the actual column name
|
||||
filter_col_name = get_column_name(flt_col)
|
||||
should_skip_filter = filter_col_name in removed_filters
|
||||
if not should_skip_filter and flt_col == utils.DTTM_ALIAS and col_obj:
|
||||
# For __timestamp, also check if the actual datetime column was
|
||||
# removed
|
||||
should_skip_filter = col_obj.column_name in removed_filters
|
||||
|
||||
if should_skip_filter:
|
||||
# Skip generating SQLA filter when the jinja template handles it.
|
||||
continue
|
||||
|
||||
|
||||
369
tests/unit_tests/models/test_time_filter_double_application.py
Normal file
369
tests/unit_tests/models/test_time_filter_double_application.py
Normal file
@@ -0,0 +1,369 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""Test for double time filter application in virtual datasets (Issue #34894)"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from flask import Flask
|
||||
from freezegun import freeze_time
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
|
||||
from superset.models.core import Database
|
||||
from superset.superset_typing import QueryObjectDict
|
||||
|
||||
|
||||
def test_time_filter_applied_once_with_remove_filter_in_virtual_dataset(
|
||||
mocker: MockerFixture,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
Test that time filter is applied only once when using get_time_filter with
|
||||
remove_filter=True in a virtual dataset.
|
||||
|
||||
This test verifies the fix for GitHub issue #34894 where time filters were
|
||||
being applied twice - once in the inner query (virtual dataset) and again in
|
||||
the outer query.
|
||||
|
||||
Expected behavior: When remove_filter=True is used in the virtual dataset,
|
||||
the time filter should only be applied in the inner query, not in the outer query.
|
||||
"""
|
||||
# use a sqlite database just for the dttm conversion from its DB engine spec
|
||||
database = Database(
|
||||
id=1,
|
||||
database_name="test_db",
|
||||
sqlalchemy_uri="sqlite://",
|
||||
)
|
||||
|
||||
# Create a virtual dataset that uses get_time_filter with
|
||||
# remove_filter=True
|
||||
virtual_dataset_sql = """
|
||||
SELECT *
|
||||
FROM my_table
|
||||
WHERE
|
||||
dttm >= {{
|
||||
get_time_filter('dttm', remove_filter=True, target_type='DATE')
|
||||
.from_expr
|
||||
}}
|
||||
AND dttm < {{
|
||||
get_time_filter('dttm', remove_filter=True, target_type='DATE')
|
||||
.to_expr
|
||||
}}
|
||||
"""
|
||||
|
||||
columns = [
|
||||
TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"),
|
||||
TableColumn(column_name="value", type="INTEGER"),
|
||||
]
|
||||
|
||||
virtual_dataset = SqlaTable(
|
||||
table_name="virtual_dataset",
|
||||
sql=virtual_dataset_sql,
|
||||
columns=columns,
|
||||
main_dttm_col="dttm",
|
||||
database=database,
|
||||
)
|
||||
|
||||
# Mock the security manager to avoid RLS checks
|
||||
mocker.patch(
|
||||
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
|
||||
return_value=[],
|
||||
)
|
||||
mocker.patch(
|
||||
"superset.connectors.sqla.models.security_manager.is_guest_user",
|
||||
return_value=False,
|
||||
)
|
||||
|
||||
# Create a query object with a time filter
|
||||
query_obj: QueryObjectDict = {
|
||||
"granularity": "dttm",
|
||||
"from_dttm": datetime(2024, 1, 1),
|
||||
"to_dttm": datetime(2024, 1, 31),
|
||||
"is_timeseries": False,
|
||||
"filter": [
|
||||
{
|
||||
"col": "dttm",
|
||||
"op": "TEMPORAL_RANGE",
|
||||
"val": "2024-01-01 : 2024-01-31",
|
||||
}
|
||||
],
|
||||
"metrics": [],
|
||||
"columns": ["value"],
|
||||
}
|
||||
|
||||
with (
|
||||
freeze_time("2024-01-15"),
|
||||
app.test_request_context(
|
||||
json={
|
||||
"queries": [
|
||||
{
|
||||
"filters": [
|
||||
{
|
||||
"col": "dttm",
|
||||
"op": "TEMPORAL_RANGE",
|
||||
"val": "2024-01-01 : 2024-01-31",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
):
|
||||
# Get the query SQL
|
||||
sqla_query = virtual_dataset.get_query_str_extended(query_obj, mutate=False)
|
||||
generated_sql = sqla_query.sql.lower()
|
||||
|
||||
# Verify that the time filter appears only in the inner query from the
|
||||
# virtual dataset and NOT in the outer query WHERE clause
|
||||
|
||||
# Check if there's a WHERE clause after the subquery closes
|
||||
# Split by "as virtual_table" and check if WHERE is in outer part
|
||||
if "as virtual_table" in generated_sql:
|
||||
outer_part = generated_sql.split("as virtual_table")[1]
|
||||
# Extract just the part before GROUP BY/ORDER BY/LIMIT to check for WHERE
|
||||
outer_where_part = (
|
||||
outer_part.split("group by")[0]
|
||||
if "group by" in outer_part
|
||||
else outer_part
|
||||
)
|
||||
outer_where_part = (
|
||||
outer_where_part.split("order by")[0]
|
||||
if "order by" in outer_where_part
|
||||
else outer_where_part
|
||||
)
|
||||
outer_where_part = (
|
||||
outer_where_part.split("limit")[0]
|
||||
if "limit" in outer_where_part
|
||||
else outer_where_part
|
||||
)
|
||||
has_outer_where = "where" in outer_where_part and "dttm" in outer_where_part
|
||||
else:
|
||||
has_outer_where = False
|
||||
|
||||
# Assert that there's NO time filter in the outer query
|
||||
outer_clause = outer_where_part if "outer_where_part" in locals() else "N/A"
|
||||
assert not has_outer_where, (
|
||||
f"Time filter should only be applied in the inner query "
|
||||
f"(virtual dataset), not in the outer query when using "
|
||||
f"remove_filter=True. Found outer WHERE clause: {outer_clause}"
|
||||
)
|
||||
|
||||
|
||||
def test_time_filter_removed_from_outer_query_simple_case(
|
||||
mocker: MockerFixture, app: Flask
|
||||
) -> None:
|
||||
"""
|
||||
Test the basic mechanism: when a Jinja template uses get_time_filter with
|
||||
remove_filter=True, the filter should be added to removed_filters list and
|
||||
not be applied in the SQLAlchemy query generation.
|
||||
|
||||
This is a simpler test that verifies the core mechanism works at a single level.
|
||||
"""
|
||||
# use a sqlite database just for the dttm conversion from its DB engine spec
|
||||
database = Database(
|
||||
id=1,
|
||||
database_name="test_db",
|
||||
sqlalchemy_uri="sqlite://",
|
||||
)
|
||||
|
||||
columns = [
|
||||
TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"),
|
||||
TableColumn(column_name="value", type="INTEGER"),
|
||||
]
|
||||
|
||||
# Simple query with Jinja template that removes the filter
|
||||
simple_sql = """
|
||||
SELECT *
|
||||
FROM my_table
|
||||
WHERE dttm >= {{
|
||||
get_time_filter('dttm', remove_filter=True, target_type='DATE').from_expr
|
||||
}}
|
||||
"""
|
||||
|
||||
dataset = SqlaTable(
|
||||
table_name="test_table",
|
||||
sql=simple_sql,
|
||||
columns=columns,
|
||||
main_dttm_col="dttm",
|
||||
database=database,
|
||||
)
|
||||
|
||||
mocker.patch(
|
||||
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
|
||||
return_value=[],
|
||||
)
|
||||
mocker.patch(
|
||||
"superset.connectors.sqla.models.security_manager.is_guest_user",
|
||||
return_value=False,
|
||||
)
|
||||
|
||||
query_obj: QueryObjectDict = {
|
||||
"granularity": "dttm",
|
||||
"from_dttm": datetime(2024, 1, 1),
|
||||
"to_dttm": datetime(2024, 1, 31),
|
||||
"is_timeseries": False,
|
||||
"filter": [
|
||||
{
|
||||
"col": "dttm",
|
||||
"op": "TEMPORAL_RANGE",
|
||||
"val": "2024-01-01 : 2024-01-31",
|
||||
}
|
||||
],
|
||||
"metrics": [],
|
||||
"columns": ["value"],
|
||||
}
|
||||
|
||||
with (
|
||||
freeze_time("2024-01-15"),
|
||||
app.test_request_context(
|
||||
json={
|
||||
"queries": [
|
||||
{
|
||||
"filters": [
|
||||
{
|
||||
"col": "dttm",
|
||||
"op": "TEMPORAL_RANGE",
|
||||
"val": "2024-01-01 : 2024-01-31",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
):
|
||||
sqla_query = dataset.get_query_str_extended(query_obj, mutate=False)
|
||||
generated_sql = sqla_query.sql.lower()
|
||||
|
||||
# In this simple case, the filter should appear exactly once
|
||||
# (in the template, not added by SQLAlchemy)
|
||||
# Count occurrences of date filter patterns
|
||||
dttm_count = generated_sql.count("dttm")
|
||||
|
||||
# We expect to see dttm in the SELECT and WHERE (from template only)
|
||||
# But NOT duplicated by SQLAlchemy's filter application
|
||||
assert dttm_count <= 3, (
|
||||
f"Expected at most 3 occurrences of 'dttm' "
|
||||
f"(SELECT, WHERE from template), but found {dttm_count}. "
|
||||
f"This suggests the filter is being applied multiple times."
|
||||
)
|
||||
|
||||
|
||||
def test_time_filter_with_timestamp_alias(mocker: MockerFixture, app: Flask) -> None:
|
||||
"""
|
||||
Test that remove_filter works correctly when using __timestamp alias.
|
||||
|
||||
This test specifically addresses the issue where time filters use the __timestamp
|
||||
alias in timeseries queries, but get_time_filter uses the actual column name.
|
||||
The fix ensures both are recognized as the same column.
|
||||
"""
|
||||
# use a sqlite database just for the dttm conversion from its DB engine spec
|
||||
database = Database(
|
||||
id=1,
|
||||
database_name="test_db",
|
||||
sqlalchemy_uri="sqlite://",
|
||||
)
|
||||
|
||||
columns = [
|
||||
TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"),
|
||||
TableColumn(column_name="value", type="INTEGER"),
|
||||
]
|
||||
|
||||
# Virtual dataset with Jinja template that removes the filter using
|
||||
# actual column name
|
||||
virtual_sql = """
|
||||
SELECT *
|
||||
FROM my_table
|
||||
WHERE dttm >= {{
|
||||
get_time_filter('dttm', remove_filter=True, target_type='DATE').from_expr
|
||||
}}
|
||||
AND dttm < {{
|
||||
get_time_filter('dttm', remove_filter=True, target_type='DATE').to_expr
|
||||
}}
|
||||
"""
|
||||
|
||||
dataset = SqlaTable(
|
||||
table_name="test_table",
|
||||
sql=virtual_sql,
|
||||
columns=columns,
|
||||
main_dttm_col="dttm",
|
||||
database=database,
|
||||
metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")],
|
||||
)
|
||||
|
||||
mocker.patch(
|
||||
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
|
||||
return_value=[],
|
||||
)
|
||||
mocker.patch(
|
||||
"superset.connectors.sqla.models.security_manager.is_guest_user",
|
||||
return_value=False,
|
||||
)
|
||||
|
||||
# Query using __timestamp alias (as timeseries charts do)
|
||||
query_obj: QueryObjectDict = {
|
||||
"granularity": "dttm",
|
||||
"from_dttm": datetime(2024, 1, 1),
|
||||
"to_dttm": datetime(2024, 1, 31),
|
||||
"is_timeseries": True, # This will cause __timestamp alias to be used
|
||||
"filter": [
|
||||
{
|
||||
"col": "__timestamp", # Using the special alias
|
||||
"op": "TEMPORAL_RANGE",
|
||||
"val": "2024-01-01 : 2024-01-31",
|
||||
}
|
||||
],
|
||||
"metrics": ["count"],
|
||||
"columns": [],
|
||||
}
|
||||
|
||||
with (
|
||||
freeze_time("2024-01-15"),
|
||||
app.test_request_context(
|
||||
json={
|
||||
"queries": [
|
||||
{
|
||||
"filters": [
|
||||
{
|
||||
"col": "__timestamp",
|
||||
"op": "TEMPORAL_RANGE",
|
||||
"val": "2024-01-01 : 2024-01-31",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
):
|
||||
sqla_query = dataset.get_query_str_extended(query_obj, mutate=False)
|
||||
generated_sql = sqla_query.sql.lower()
|
||||
|
||||
# The filter should only appear in the inner query (virtual dataset)
|
||||
# NOT in the outer query WHERE clause
|
||||
# Check if there's a WHERE clause after the subquery
|
||||
parts = generated_sql.split(")")
|
||||
if len(parts) > 1:
|
||||
outer_part = parts[-1]
|
||||
# The outer part should not have additional date filter conditions
|
||||
# (It may have GROUP BY, ORDER BY, LIMIT, but not WHERE with dttm)
|
||||
if "where" in outer_part:
|
||||
assert "dttm" not in outer_part or "to_date" not in outer_part, (
|
||||
"Time filter should not be applied in outer query when using "
|
||||
"remove_filter=True in virtual dataset. The __timestamp alias "
|
||||
"should be recognized as equivalent to the actual column name."
|
||||
)
|
||||
Reference in New Issue
Block a user