mirror of
https://github.com/apache/superset.git
synced 2026-05-09 01:46:06 +00:00
fix(time-comparison): use chart row_limit instead of instance config in offset queries (#39490)
This commit is contained in:
@@ -23,7 +23,6 @@ from unittest.mock import Mock, patch
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from flask import current_app
|
||||
from pandas import DateOffset
|
||||
|
||||
from superset import db
|
||||
@@ -43,6 +42,7 @@ from superset.utils.core import (
|
||||
QueryStatus,
|
||||
)
|
||||
from superset.utils.pandas_postprocessing.utils import FLAT_COLUMN_SEPARATOR
|
||||
from tests.conftest import with_config
|
||||
from tests.integration_tests.base_tests import SupersetTestCase
|
||||
from tests.integration_tests.conftest import (
|
||||
only_postgresql,
|
||||
@@ -68,6 +68,130 @@ def get_sql_text(payload: dict[str, Any]) -> str:
|
||||
return response["query"]
|
||||
|
||||
|
||||
def _time_comparison_offset_queries_payload() -> dict[str, Any]:
|
||||
"""Birth-names chart payload with time comparison and x-axis suitable for tests."""
|
||||
payload = get_query_context("birth_names")
|
||||
payload["queries"][0]["columns"] = [
|
||||
{
|
||||
"timeGrain": "P1D",
|
||||
"columnType": "BASE_AXIS",
|
||||
"sqlExpression": "ds",
|
||||
"label": "ds",
|
||||
"expressionType": "SQL",
|
||||
}
|
||||
]
|
||||
payload["queries"][0]["metrics"] = ["sum__num"]
|
||||
payload["queries"][0]["groupby"] = ["name"]
|
||||
payload["queries"][0]["is_timeseries"] = True
|
||||
payload["queries"][0]["time_range"] = "1990 : 1991"
|
||||
return payload
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
||||
@patch("superset.common.query_context.QueryContext.get_query_result")
|
||||
def test_time_offset_comparison_queries_use_chart_row_limit(
|
||||
query_result_mock: Mock,
|
||||
) -> None:
|
||||
"""Comparison SQL covers the main query's window (row_limit + row_offset)."""
|
||||
payload = _time_comparison_offset_queries_payload()
|
||||
payload["queries"][0]["row_limit"] = 100
|
||||
payload["queries"][0]["row_offset"] = 10
|
||||
|
||||
initial_df = pd.DataFrame(
|
||||
{
|
||||
"__timestamp": ["1990-01-01", "1990-01-01"],
|
||||
"name": ["zban", "ahwb"],
|
||||
"sum__num": [43571, 27225],
|
||||
}
|
||||
)
|
||||
mock_query_result = Mock()
|
||||
mock_query_result.df = initial_df
|
||||
query_result_mock.side_effect = [mock_query_result]
|
||||
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
df = query_context.get_query_result(query_object).df
|
||||
|
||||
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
|
||||
def cache_key_fn(qo: QueryObject, time_offset: str, time_grain: Any) -> str | None:
|
||||
return query_context._processor.query_cache_key(
|
||||
qo, time_offset=time_offset, time_grain=time_grain
|
||||
)
|
||||
|
||||
def cache_timeout_fn() -> int:
|
||||
return query_context._processor.get_cache_timeout()
|
||||
|
||||
time_offsets_obj = query_context.datasource.processing_time_offsets(
|
||||
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
|
||||
)
|
||||
sqls = time_offsets_obj["queries"]
|
||||
assert len(sqls) == 2
|
||||
assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
|
||||
assert re.search(r"LIMIT 110", sqls[0], re.S)
|
||||
assert not re.search(r"OFFSET 10", sqls[0], re.S)
|
||||
assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
|
||||
assert re.search(r"LIMIT 110", sqls[1], re.S)
|
||||
assert not re.search(r"OFFSET 10", sqls[1], re.S)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
||||
@with_config({"ROW_LIMIT": 4242})
|
||||
@patch("superset.common.query_context.QueryContext.get_query_result")
|
||||
def test_time_offset_comparison_queries_use_config_row_limit_without_chart_limit(
|
||||
query_result_mock: Mock,
|
||||
) -> None:
|
||||
"""Chart with row_offset only: subquery widens the config ROW_LIMIT by row_offset.
|
||||
|
||||
The schema fills `row_limit` with `app.config["ROW_LIMIT"]` when the payload
|
||||
omits it, so the query_object arrives with row_limit=4242. The subquery then
|
||||
covers the window via row_limit + row_offset = 4252.
|
||||
"""
|
||||
payload = _time_comparison_offset_queries_payload()
|
||||
del payload["queries"][0]["row_limit"]
|
||||
payload["queries"][0]["row_offset"] = 10
|
||||
|
||||
initial_df = pd.DataFrame(
|
||||
{
|
||||
"__timestamp": ["1990-01-01", "1990-01-01"],
|
||||
"name": ["zban", "ahwb"],
|
||||
"sum__num": [43571, 27225],
|
||||
}
|
||||
)
|
||||
mock_query_result = Mock()
|
||||
mock_query_result.df = initial_df
|
||||
query_result_mock.side_effect = [mock_query_result]
|
||||
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
df = query_context.get_query_result(query_object).df
|
||||
|
||||
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
|
||||
def cache_key_fn(qo: QueryObject, time_offset: str, time_grain: Any) -> str | None:
|
||||
return query_context._processor.query_cache_key(
|
||||
qo, time_offset=time_offset, time_grain=time_grain
|
||||
)
|
||||
|
||||
def cache_timeout_fn() -> int:
|
||||
return query_context._processor.get_cache_timeout()
|
||||
|
||||
time_offsets_obj = query_context.datasource.processing_time_offsets(
|
||||
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
|
||||
)
|
||||
sqls = time_offsets_obj["queries"]
|
||||
limit_pattern = re.compile(r"LIMIT\s+4252\b")
|
||||
assert len(sqls) == 2
|
||||
assert limit_pattern.search(sqls[0])
|
||||
assert not re.search(r"OFFSET 10", sqls[0], re.S)
|
||||
assert limit_pattern.search(sqls[1])
|
||||
assert not re.search(r"OFFSET 10", sqls[1], re.S)
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"TODO: Fix test class to work with DuckDB example data format. "
|
||||
@@ -794,28 +918,17 @@ class TestQueryContext(SupersetTestCase):
|
||||
|
||||
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
||||
@patch("superset.common.query_context.QueryContext.get_query_result")
|
||||
def test_time_offsets_in_query_object_no_limit(self, query_result_mock):
|
||||
def test_time_offsets_in_query_object_uses_chart_row_limit(self, query_result_mock):
|
||||
"""
|
||||
Ensure that time_offsets can generate the correct queries and
|
||||
it doesnt use the row_limit nor row_offset from the original
|
||||
query object
|
||||
Subquery honors the chart's row_limit (widened by row_offset so the
|
||||
LEFT JOIN covers the main query's paginated window) and drops
|
||||
row_offset. Before this fix, row_limit was replaced with
|
||||
app.config["ROW_LIMIT"], which caused the main query and offset
|
||||
subquery to fetch different row counts.
|
||||
"""
|
||||
payload = get_query_context("birth_names")
|
||||
payload["queries"][0]["columns"] = [
|
||||
{
|
||||
"timeGrain": "P1D",
|
||||
"columnType": "BASE_AXIS",
|
||||
"sqlExpression": "ds",
|
||||
"label": "ds",
|
||||
"expressionType": "SQL",
|
||||
}
|
||||
]
|
||||
payload["queries"][0]["metrics"] = ["sum__num"]
|
||||
payload["queries"][0]["groupby"] = ["name"]
|
||||
payload["queries"][0]["is_timeseries"] = True
|
||||
payload = _time_comparison_offset_queries_payload()
|
||||
payload["queries"][0]["row_limit"] = 100
|
||||
payload["queries"][0]["row_offset"] = 10
|
||||
payload["queries"][0]["time_range"] = "1990 : 1991"
|
||||
|
||||
initial_data = {
|
||||
"__timestamp": ["1990-01-01", "1990-01-01"],
|
||||
@@ -839,33 +952,86 @@ class TestQueryContext(SupersetTestCase):
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
|
||||
def cache_key_fn(qo, time_offset, time_grain):
|
||||
def cache_key_fn(
|
||||
qo: QueryObject, time_offset: str, time_grain: Any
|
||||
) -> str | None:
|
||||
return query_context._processor.query_cache_key(
|
||||
qo, time_offset=time_offset, time_grain=time_grain
|
||||
)
|
||||
|
||||
def cache_timeout_fn():
|
||||
def cache_timeout_fn() -> int:
|
||||
return query_context._processor.get_cache_timeout()
|
||||
|
||||
time_offsets_obj = query_context.datasource.processing_time_offsets(
|
||||
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
|
||||
)
|
||||
sqls = time_offsets_obj["queries"]
|
||||
row_limit_value = current_app.config["ROW_LIMIT"]
|
||||
row_limit_pattern_with_config_value = r"LIMIT " + re.escape(
|
||||
str(row_limit_value)
|
||||
)
|
||||
assert len(sqls) == 2
|
||||
# 1 year ago
|
||||
# 1 year ago — subquery widens row_limit to cover main window (100 + 10)
|
||||
assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
|
||||
assert not re.search(r"LIMIT 100", sqls[0], re.S)
|
||||
assert re.search(r"LIMIT 110", sqls[0], re.S)
|
||||
assert not re.search(r"OFFSET 10", sqls[0], re.S)
|
||||
assert re.search(row_limit_pattern_with_config_value, sqls[0], re.S)
|
||||
# 1 year later
|
||||
assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
|
||||
assert not re.search(r"LIMIT 100", sqls[1], re.S)
|
||||
assert re.search(r"LIMIT 110", sqls[1], re.S)
|
||||
assert not re.search(r"OFFSET 10", sqls[1], re.S)
|
||||
|
||||
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
|
||||
@with_config({"ROW_LIMIT": 4242})
|
||||
@patch("superset.common.query_context.QueryContext.get_query_result")
|
||||
def test_time_offsets_use_config_row_limit_when_chart_has_offset_only(
|
||||
self, query_result_mock
|
||||
):
|
||||
"""
|
||||
Chart with row_offset only: subquery widens the config ROW_LIMIT by row_offset.
|
||||
|
||||
The schema fills row_limit with app.config["ROW_LIMIT"] (4242) when the
|
||||
payload omits it, so the subquery covers the window via 4242 + 10 = 4252.
|
||||
"""
|
||||
payload = _time_comparison_offset_queries_payload()
|
||||
del payload["queries"][0]["row_limit"]
|
||||
payload["queries"][0]["row_offset"] = 10
|
||||
|
||||
initial_data = {
|
||||
"__timestamp": ["1990-01-01", "1990-01-01"],
|
||||
"name": ["zban", "ahwb"],
|
||||
"sum__num": [43571, 27225],
|
||||
}
|
||||
initial_df = pd.DataFrame(initial_data)
|
||||
|
||||
mock_query_result = Mock()
|
||||
mock_query_result.df = initial_df
|
||||
query_result_mock.side_effect = [mock_query_result]
|
||||
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
query_result = query_context.get_query_result(query_object)
|
||||
df = query_result.df
|
||||
|
||||
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
|
||||
query_context = ChartDataQueryContextSchema().load(payload)
|
||||
query_object = query_context.queries[0]
|
||||
|
||||
def cache_key_fn(
|
||||
qo: QueryObject, time_offset: str, time_grain: Any
|
||||
) -> str | None:
|
||||
return query_context._processor.query_cache_key(
|
||||
qo, time_offset=time_offset, time_grain=time_grain
|
||||
)
|
||||
|
||||
def cache_timeout_fn() -> int:
|
||||
return query_context._processor.get_cache_timeout()
|
||||
|
||||
time_offsets_obj = query_context.datasource.processing_time_offsets(
|
||||
df, query_object, cache_key_fn, cache_timeout_fn, query_context.force
|
||||
)
|
||||
sqls = time_offsets_obj["queries"]
|
||||
limit_pattern = re.compile(r"LIMIT\s+4252\b")
|
||||
assert len(sqls) == 2
|
||||
assert limit_pattern.search(sqls[0])
|
||||
assert not re.search(r"OFFSET 10", sqls[0], re.S)
|
||||
assert limit_pattern.search(sqls[1])
|
||||
assert not re.search(r"OFFSET 10", sqls[1], re.S)
|
||||
assert re.search(row_limit_pattern_with_config_value, sqls[1], re.S)
|
||||
|
||||
|
||||
def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
|
||||
|
||||
Reference in New Issue
Block a user