feat(timeshift): Add support for date range timeshifts (#34375)

This commit is contained in:
Mehmet Salih Yavuz
2025-08-05 19:31:40 +03:00
committed by GitHub
parent 407fb67f1e
commit 761daec53d
6 changed files with 695 additions and 88 deletions

View File

@@ -537,13 +537,11 @@ class TestQueryContext(SupersetTestCase):
sql for sql in responses["queries"][0]["query"].split(";") if sql.strip()
]
assert len(sqls) == 3
# 1 year ago
# 1 year ago - should only contain the shifted range
assert re.search(r"1989-01-01.+1990-01-01", sqls[1], re.S)
assert re.search(r"1990-01-01.+1991-01-01", sqls[1], re.S)
# # 1 year later
# # 1 year later - should only contain the shifted range
assert re.search(r"1991-01-01.+1992-01-01", sqls[2], re.S)
assert re.search(r"1990-01-01.+1991-01-01", sqls[2], re.S)
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_processing_time_offsets_cache(self):
@@ -1182,6 +1180,273 @@ OFFSET 0
) is not None
@with_feature_flags(DATE_RANGE_TIMESHIFTS_ENABLED=True)
def test_date_range_timeshift_enabled(app_context, physical_dataset):
"""Test date range timeshift functionality when feature flag is enabled."""
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
{
"label": "col6",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
],
"metrics": [
{
"label": "SUM(col1)",
"expressionType": "SQL",
"sqlExpression": "SUM(col1)",
}
],
"time_offsets": ["2001-01-01 : 2001-12-31"], # Date range timeshift
"filters": [
{
"col": "col6",
"op": "TEMPORAL_RANGE",
"val": "2002-01-01 : 2002-12-31",
}
],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_payload = qc.get_df_payload(qc.queries[0])
df = query_payload["df"]
# Should have both main metrics and offset metrics columns
assert "SUM(col1)" in df.columns
assert "SUM(col1)__2001-01-01 : 2001-12-31" in df.columns
# Check that queries were generated correctly
sqls = query_payload["query"].split(";")
assert len(sqls) >= 2 # Main query + offset query
# Main query should filter for 2002 data
main_sql = sqls[0]
assert "2002-01-01" in main_sql
assert "2002-12-31" in main_sql or "2003-01-01" in main_sql
# Offset query should filter for 2001 data
offset_sql = sqls[1]
assert "2001-01-01" in offset_sql
assert "2001-12-31" in offset_sql or "2002-01-01" in offset_sql
@with_feature_flags(DATE_RANGE_TIMESHIFTS_ENABLED=False)
def test_date_range_timeshift_disabled(app_context, physical_dataset):
"""Test that date range timeshift raises error when feature flag is disabled."""
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
{
"label": "col6",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
],
"metrics": [
{
"label": "SUM(col1)",
"expressionType": "SQL",
"sqlExpression": "SUM(col1)",
}
],
"time_offsets": ["2001-01-01 : 2001-12-31"], # Date range timeshift
"filters": [
{
"col": "col6",
"op": "TEMPORAL_RANGE",
"val": "2002-01-01 : 2002-12-31",
}
],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
# Should raise QueryObjectValidationError
from superset.exceptions import QueryObjectValidationError
with pytest.raises(
QueryObjectValidationError, match="Date range timeshifts are not enabled"
):
qc.get_df_payload(qc.queries[0])
@with_feature_flags(DATE_RANGE_TIMESHIFTS_ENABLED=True)
def test_date_range_timeshift_multiple_periods(app_context, physical_dataset):
"""Test date range timeshift with multiple comparison periods."""
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
{
"label": "col6",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
],
"metrics": [
{
"label": "SUM(col1)",
"expressionType": "SQL",
"sqlExpression": "SUM(col1)",
}
],
"time_offsets": [
"2001-01-01 : 2001-12-31", # Previous year
"2000-01-01 : 2000-12-31", # Two years ago
],
"filters": [
{
"col": "col6",
"op": "TEMPORAL_RANGE",
"val": "2002-01-01 : 2002-12-31",
}
],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_payload = qc.get_df_payload(qc.queries[0])
df = query_payload["df"]
# Should have main metrics and both offset metrics columns
assert "SUM(col1)" in df.columns
assert "SUM(col1)__2001-01-01 : 2001-12-31" in df.columns
assert "SUM(col1)__2000-01-01 : 2000-12-31" in df.columns
# Check that all queries were generated
sqls = query_payload["query"].split(";")
assert len(sqls) >= 3 # Main query + 2 offset queries
@with_feature_flags(DATE_RANGE_TIMESHIFTS_ENABLED=True)
def test_date_range_timeshift_invalid_format(app_context, physical_dataset):
"""Test that invalid date range format raises appropriate error."""
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
{
"label": "col6",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
],
"metrics": [
{
"label": "SUM(col1)",
"expressionType": "SQL",
"sqlExpression": "SUM(col1)",
}
],
"time_offsets": ["invalid-date-range"], # Invalid format
"filters": [
{
"col": "col6",
"op": "TEMPORAL_RANGE",
"val": "2002-01-01 : 2002-12-31",
}
],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
# Should raise an error for invalid date range format
from superset.commands.chart.exceptions import TimeDeltaAmbiguousError
with pytest.raises(TimeDeltaAmbiguousError):
qc.get_df_payload(qc.queries[0])
@with_feature_flags(DATE_RANGE_TIMESHIFTS_ENABLED=True)
def test_date_range_timeshift_mixed_with_relative_offsets(
app_context, physical_dataset
):
"""Test mixing date range timeshifts with traditional relative offsets."""
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
{
"label": "col6",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1M",
}
],
"metrics": [
{
"label": "SUM(col1)",
"expressionType": "SQL",
"sqlExpression": "SUM(col1)",
}
],
"time_offsets": [
"2001-01-01 : 2001-12-31", # Date range timeshift
"1 year ago", # Traditional relative offset
],
"filters": [
{
"col": "col6",
"op": "TEMPORAL_RANGE",
"val": "2002-01-01 : 2002-12-31",
}
],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_payload = qc.get_df_payload(qc.queries[0])
df = query_payload["df"]
# Should have main metrics and both offset metrics columns
assert "SUM(col1)" in df.columns
assert "SUM(col1)__2001-01-01 : 2001-12-31" in df.columns
assert "SUM(col1)__1 year ago" in df.columns
# Check that all queries were generated
sqls = query_payload["query"].split(";")
assert len(sqls) >= 3 # Main query + 2 offset queries
def test_virtual_dataset_with_comments(app_context, virtual_dataset_with_comments):
if backend() == "mysql":
return

View File

@@ -240,3 +240,134 @@ def test_get_data_xlsx_apply_column_types_error(
mock_query_context.result_format = ChartDataResultFormat.XLSX
with pytest.raises(ValueError, match="Conversion error"):
processor.get_data(df, coltypes)
def test_is_valid_date_range_format(processor):
"""Test that date range format validation works correctly."""
# Should return True for valid date range format
assert processor.is_valid_date_range("2023-01-01 : 2023-01-31") is True
assert processor.is_valid_date_range("2020-12-25 : 2020-12-31") is True
# Should return False for invalid format
assert processor.is_valid_date_range("1 day ago") is False
assert processor.is_valid_date_range("2023-01-01") is False
assert processor.is_valid_date_range("invalid") is False
def test_is_valid_date_range_static_format():
"""Test that static date range format validation works correctly."""
# Should return True for valid date range format
assert (
QueryContextProcessor.is_valid_date_range_static("2023-01-01 : 2023-01-31")
is True
)
assert (
QueryContextProcessor.is_valid_date_range_static("2020-12-25 : 2020-12-31")
is True
)
# Should return False for invalid format
assert QueryContextProcessor.is_valid_date_range_static("1 day ago") is False
assert QueryContextProcessor.is_valid_date_range_static("2023-01-01") is False
assert QueryContextProcessor.is_valid_date_range_static("invalid") is False
def test_processing_time_offsets_date_range_logic(processor):
"""Test that date range timeshift logic works correctly with feature flag checks."""
# Test that the date range validation works
assert processor.is_valid_date_range("2023-01-01 : 2023-01-31") is True
assert processor.is_valid_date_range("1 year ago") is False
# Test that static method also works
assert (
QueryContextProcessor.is_valid_date_range_static("2023-01-01 : 2023-01-31")
is True
)
assert QueryContextProcessor.is_valid_date_range_static("1 year ago") is False
def test_feature_flag_validation_logic():
"""Test that feature flag validation logic works as expected."""
from superset.extensions import feature_flag_manager
# This tests the concept - actual feature flag value depends on config
# The important thing is that the code checks for DATE_RANGE_TIMESHIFTS_ENABLED
flag_name = "DATE_RANGE_TIMESHIFTS_ENABLED"
# Test that the feature flag is being checked
# (This will vary based on actual config but tests the mechanism)
result = feature_flag_manager.is_feature_enabled(flag_name)
assert isinstance(result, bool) # Should return a boolean
def test_join_offset_dfs_date_range_basic(processor):
"""Test basic join logic for date range offsets."""
# Create simple test data
main_df = pd.DataFrame({"dim1": ["A", "B", "C"], "metric1": [10, 20, 30]})
offset_df = pd.DataFrame({"dim1": ["A", "B", "C"], "metric1": [5, 10, 15]})
# Mock query context
mock_query = MagicMock()
mock_query.granularity = "date_col"
processor._query_context.queries = [mock_query]
# Test basic join with date range offset
offset_dfs = {"2023-01-01 : 2023-01-31": offset_df}
join_keys = ["dim1"]
with patch(
"superset.common.query_context_processor.feature_flag_manager"
) as mock_ff:
mock_ff.is_feature_enabled.return_value = True
with patch(
"superset.common.query_context_processor.dataframe_utils.left_join_df"
) as mock_join:
mock_join.return_value = pd.DataFrame(
{
"dim1": ["A", "B", "C"],
"metric1": [10, 20, 30],
"metric1 2023-01-01 : 2023-01-31": [5, 10, 15],
}
)
result_df = processor.join_offset_dfs(
main_df, offset_dfs, time_grain=None, join_keys=join_keys
)
# Verify join was called
mock_join.assert_called_once()
assert len(result_df) == 3
def test_get_offset_custom_or_inherit_with_inherit(processor):
"""Test get_offset_custom_or_inherit with 'inherit' option."""
from_dttm = pd.Timestamp("2024-01-01")
to_dttm = pd.Timestamp("2024-01-10")
result = processor.get_offset_custom_or_inherit("inherit", from_dttm, to_dttm)
# Should return the difference in days
assert result == "9 days ago"
def test_get_offset_custom_or_inherit_with_date(processor):
"""Test get_offset_custom_or_inherit with specific date."""
from_dttm = pd.Timestamp("2024-01-10")
to_dttm = pd.Timestamp("2024-01-20")
result = processor.get_offset_custom_or_inherit("2024-01-05", from_dttm, to_dttm)
# Should return difference between from_dttm and the specified date
assert result == "5 days ago"
def test_get_offset_custom_or_inherit_with_invalid_date(processor):
"""Test get_offset_custom_or_inherit with invalid date format."""
from_dttm = pd.Timestamp("2024-01-10")
to_dttm = pd.Timestamp("2024-01-20")
result = processor.get_offset_custom_or_inherit("invalid-date", from_dttm, to_dttm)
# Should return empty string for invalid format
assert result == ""