fix(time-comparison): use chart row_limit instead of instance config in offset queries (#39490)

This commit is contained in:
jesperct
2026-05-01 20:24:59 -03:00
committed by GitHub
parent dbe26d81ce
commit d8dd2d99b3
3 changed files with 433 additions and 34 deletions

View File

@@ -23,7 +23,6 @@ from unittest.mock import Mock, patch
import numpy as np
import pandas as pd
import pytest
from flask import current_app
from pandas import DateOffset
from superset import db
@@ -43,6 +42,7 @@ from superset.utils.core import (
QueryStatus,
)
from superset.utils.pandas_postprocessing.utils import FLAT_COLUMN_SEPARATOR
from tests.conftest import with_config
from tests.integration_tests.base_tests import SupersetTestCase
from tests.integration_tests.conftest import (
only_postgresql,
@@ -68,6 +68,130 @@ def get_sql_text(payload: dict[str, Any]) -> str:
return response["query"]
def _time_comparison_offset_queries_payload() -> dict[str, Any]:
"""Birth-names chart payload with time comparison and x-axis suitable for tests."""
payload = get_query_context("birth_names")
payload["queries"][0]["columns"] = [
{
"timeGrain": "P1D",
"columnType": "BASE_AXIS",
"sqlExpression": "ds",
"label": "ds",
"expressionType": "SQL",
}
]
payload["queries"][0]["metrics"] = ["sum__num"]
payload["queries"][0]["groupby"] = ["name"]
payload["queries"][0]["is_timeseries"] = True
payload["queries"][0]["time_range"] = "1990 : 1991"
return payload
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@patch("superset.common.query_context.QueryContext.get_query_result")
def test_time_offset_comparison_queries_use_chart_row_limit(
query_result_mock: Mock,
) -> None:
"""Comparison SQL covers the main query's window (row_limit + row_offset)."""
payload = _time_comparison_offset_queries_payload()
payload["queries"][0]["row_limit"] = 100
payload["queries"][0]["row_offset"] = 10
initial_df = pd.DataFrame(
{
"__timestamp": ["1990-01-01", "1990-01-01"],
"name": ["zban", "ahwb"],
"sum__num": [43571, 27225],
}
)
mock_query_result = Mock()
mock_query_result.df = initial_df
query_result_mock.side_effect = [mock_query_result]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
df = query_context.get_query_result(query_object).df
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
def cache_key_fn(qo: QueryObject, time_offset: str, time_grain: Any) -> str | None:
return query_context._processor.query_cache_key(
qo, time_offset=time_offset, time_grain=time_grain
)
def cache_timeout_fn() -> int:
return query_context._processor.get_cache_timeout()
time_offsets_obj = query_context.datasource.processing_time_offsets(
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
)
sqls = time_offsets_obj["queries"]
assert len(sqls) == 2
assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
assert re.search(r"LIMIT 110", sqls[0], re.S)
assert not re.search(r"OFFSET 10", sqls[0], re.S)
assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
assert re.search(r"LIMIT 110", sqls[1], re.S)
assert not re.search(r"OFFSET 10", sqls[1], re.S)
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@with_config({"ROW_LIMIT": 4242})
@patch("superset.common.query_context.QueryContext.get_query_result")
def test_time_offset_comparison_queries_use_config_row_limit_without_chart_limit(
query_result_mock: Mock,
) -> None:
"""Chart with row_offset only: subquery widens the config ROW_LIMIT by row_offset.
The schema fills `row_limit` with `app.config["ROW_LIMIT"]` when the payload
omits it, so the query_object arrives with row_limit=4242. The subquery then
covers the window via row_limit + row_offset = 4252.
"""
payload = _time_comparison_offset_queries_payload()
del payload["queries"][0]["row_limit"]
payload["queries"][0]["row_offset"] = 10
initial_df = pd.DataFrame(
{
"__timestamp": ["1990-01-01", "1990-01-01"],
"name": ["zban", "ahwb"],
"sum__num": [43571, 27225],
}
)
mock_query_result = Mock()
mock_query_result.df = initial_df
query_result_mock.side_effect = [mock_query_result]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
df = query_context.get_query_result(query_object).df
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
def cache_key_fn(qo: QueryObject, time_offset: str, time_grain: Any) -> str | None:
return query_context._processor.query_cache_key(
qo, time_offset=time_offset, time_grain=time_grain
)
def cache_timeout_fn() -> int:
return query_context._processor.get_cache_timeout()
time_offsets_obj = query_context.datasource.processing_time_offsets(
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
)
sqls = time_offsets_obj["queries"]
limit_pattern = re.compile(r"LIMIT\s+4252\b")
assert len(sqls) == 2
assert limit_pattern.search(sqls[0])
assert not re.search(r"OFFSET 10", sqls[0], re.S)
assert limit_pattern.search(sqls[1])
assert not re.search(r"OFFSET 10", sqls[1], re.S)
@pytest.mark.skip(
reason=(
"TODO: Fix test class to work with DuckDB example data format. "
@@ -794,28 +918,17 @@ class TestQueryContext(SupersetTestCase):
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@patch("superset.common.query_context.QueryContext.get_query_result")
def test_time_offsets_in_query_object_no_limit(self, query_result_mock):
def test_time_offsets_in_query_object_uses_chart_row_limit(self, query_result_mock):
"""
Ensure that time_offsets can generate the correct queries and
it doesnt use the row_limit nor row_offset from the original
query object
Subquery honors the chart's row_limit (widened by row_offset so the
LEFT JOIN covers the main query's paginated window) and drops
row_offset. Before this fix, row_limit was replaced with
app.config["ROW_LIMIT"], which caused the main query and offset
subquery to fetch different row counts.
"""
payload = get_query_context("birth_names")
payload["queries"][0]["columns"] = [
{
"timeGrain": "P1D",
"columnType": "BASE_AXIS",
"sqlExpression": "ds",
"label": "ds",
"expressionType": "SQL",
}
]
payload["queries"][0]["metrics"] = ["sum__num"]
payload["queries"][0]["groupby"] = ["name"]
payload["queries"][0]["is_timeseries"] = True
payload = _time_comparison_offset_queries_payload()
payload["queries"][0]["row_limit"] = 100
payload["queries"][0]["row_offset"] = 10
payload["queries"][0]["time_range"] = "1990 : 1991"
initial_data = {
"__timestamp": ["1990-01-01", "1990-01-01"],
@@ -839,33 +952,86 @@ class TestQueryContext(SupersetTestCase):
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
def cache_key_fn(qo, time_offset, time_grain):
def cache_key_fn(
qo: QueryObject, time_offset: str, time_grain: Any
) -> str | None:
return query_context._processor.query_cache_key(
qo, time_offset=time_offset, time_grain=time_grain
)
def cache_timeout_fn():
def cache_timeout_fn() -> int:
return query_context._processor.get_cache_timeout()
time_offsets_obj = query_context.datasource.processing_time_offsets(
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
)
sqls = time_offsets_obj["queries"]
row_limit_value = current_app.config["ROW_LIMIT"]
row_limit_pattern_with_config_value = r"LIMIT " + re.escape(
str(row_limit_value)
)
assert len(sqls) == 2
# 1 year ago
# 1 year ago — subquery widens row_limit to cover main window (100 + 10)
assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
assert not re.search(r"LIMIT 100", sqls[0], re.S)
assert re.search(r"LIMIT 110", sqls[0], re.S)
assert not re.search(r"OFFSET 10", sqls[0], re.S)
assert re.search(row_limit_pattern_with_config_value, sqls[0], re.S)
# 1 year later
assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
assert not re.search(r"LIMIT 100", sqls[1], re.S)
assert re.search(r"LIMIT 110", sqls[1], re.S)
assert not re.search(r"OFFSET 10", sqls[1], re.S)
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@with_config({"ROW_LIMIT": 4242})
@patch("superset.common.query_context.QueryContext.get_query_result")
def test_time_offsets_use_config_row_limit_when_chart_has_offset_only(
self, query_result_mock
):
"""
Chart with row_offset only: subquery widens the config ROW_LIMIT by row_offset.
The schema fills row_limit with app.config["ROW_LIMIT"] (4242) when the
payload omits it, so the subquery covers the window via 4242 + 10 = 4252.
"""
payload = _time_comparison_offset_queries_payload()
del payload["queries"][0]["row_limit"]
payload["queries"][0]["row_offset"] = 10
initial_data = {
"__timestamp": ["1990-01-01", "1990-01-01"],
"name": ["zban", "ahwb"],
"sum__num": [43571, 27225],
}
initial_df = pd.DataFrame(initial_data)
mock_query_result = Mock()
mock_query_result.df = initial_df
query_result_mock.side_effect = [mock_query_result]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
query_result = query_context.get_query_result(query_object)
df = query_result.df
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
def cache_key_fn(
qo: QueryObject, time_offset: str, time_grain: Any
) -> str | None:
return query_context._processor.query_cache_key(
qo, time_offset=time_offset, time_grain=time_grain
)
def cache_timeout_fn() -> int:
return query_context._processor.get_cache_timeout()
time_offsets_obj = query_context.datasource.processing_time_offsets(
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
)
sqls = time_offsets_obj["queries"]
limit_pattern = re.compile(r"LIMIT\s+4252\b")
assert len(sqls) == 2
assert limit_pattern.search(sqls[0])
assert not re.search(r"OFFSET 10", sqls[0], re.S)
assert limit_pattern.search(sqls[1])
assert not re.search(r"OFFSET 10", sqls[1], re.S)
assert re.search(row_limit_pattern_with_config_value, sqls[1], re.S)
def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):