mirror of
https://github.com/apache/superset.git
synced 2026-06-24 17:09:20 +00:00
Compare commits
3 Commits
chore/ci-f
...
msyavuz/fe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
45c8fef646 | ||
|
|
0e4e9f231c | ||
|
|
8d5a6a9918 |
@@ -132,6 +132,26 @@ export const advancedAnalyticsControls: ControlPanelSectionConfig = {
|
||||
},
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
name: 'time_compare_full_range',
|
||||
config: {
|
||||
type: 'CheckboxControl',
|
||||
label: t('Show full range for time shift'),
|
||||
default: false,
|
||||
description: t(
|
||||
'Plot each time-shifted series across its full time range instead ' +
|
||||
'of truncating it to the main series. Useful for comparing a ' +
|
||||
'partial current period (e.g. today so far) against complete ' +
|
||||
'prior periods (e.g. all of yesterday).',
|
||||
),
|
||||
visibility: ({ controls }) =>
|
||||
Boolean(controls?.time_compare?.value) &&
|
||||
(!Array.isArray(controls?.time_compare?.value) ||
|
||||
controls.time_compare.value.length > 0),
|
||||
},
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
name: 'comparison_type',
|
||||
|
||||
@@ -318,14 +318,25 @@ function createAdvancedAnalyticsSection(
|
||||
): ControlPanelSectionConfig {
|
||||
const aaWithSuffix = cloneDeep(sections.advancedAnalyticsControls);
|
||||
aaWithSuffix.label = label;
|
||||
// `time_compare_full_range` is only wired into the regular timeseries query
|
||||
// builder, not the mixed-timeseries one, so drop it here to avoid showing a
|
||||
// control that has no effect.
|
||||
aaWithSuffix.controlSetRows = aaWithSuffix.controlSetRows
|
||||
.map(row =>
|
||||
row.filter(
|
||||
control =>
|
||||
(control as CustomControlItem)?.name !== 'time_compare_full_range',
|
||||
),
|
||||
)
|
||||
.filter(row => row.length > 0);
|
||||
if (!controlSuffix) {
|
||||
return aaWithSuffix;
|
||||
}
|
||||
aaWithSuffix.controlSetRows.forEach(row =>
|
||||
row.forEach((control: CustomControlItem) => {
|
||||
if (control?.name) {
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
control.name = `${control.name}${controlSuffix}`;
|
||||
row.forEach(control => {
|
||||
const item = control as CustomControlItem;
|
||||
if (item?.name) {
|
||||
item.name = `${item.name}${controlSuffix}`;
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -82,6 +82,11 @@ export default function buildQuery(formData: QueryFormData) {
|
||||
? formData.time_compare
|
||||
: [];
|
||||
|
||||
// When comparing against prior periods, optionally keep each shifted series at
|
||||
// its full time range instead of truncating it to the main series' range.
|
||||
const time_compare_full_range =
|
||||
time_offsets.length > 0 && Boolean(formData.time_compare_full_range);
|
||||
|
||||
return [
|
||||
{
|
||||
...baseQueryObject,
|
||||
@@ -92,6 +97,7 @@ export default function buildQuery(formData: QueryFormData) {
|
||||
// todo: move `normalizeOrderBy to extractQueryFields`
|
||||
orderby: normalizeOrderBy(baseQueryObject).orderby,
|
||||
time_offsets,
|
||||
time_compare_full_range,
|
||||
/* Note that:
|
||||
1. The resample, rolling, cum, timeCompare operators should be after pivot.
|
||||
2. Resample must come before rolling so that imputed values are
|
||||
|
||||
@@ -381,6 +381,15 @@ export default function transformProps(
|
||||
const array = ensureIsArray(chartProps.rawFormData?.time_compare);
|
||||
const inverted = invert(verboseMap);
|
||||
|
||||
// With the "full range" time-shift option, offset series are outer-joined onto
|
||||
// the main series, which inserts null rows into the main series wherever the
|
||||
// comparison period has data the current period lacks. Connect nulls so the
|
||||
// main line stays continuous (matching the default left-join appearance) rather
|
||||
// than fragmenting at every inserted gap.
|
||||
const timeCompareFullRange = Boolean(
|
||||
chartProps.rawFormData?.time_compare_full_range,
|
||||
);
|
||||
|
||||
const offsetLineWidths: { [key: string]: number } = {};
|
||||
|
||||
// For horizontal bar charts, calculate min/max from data to avoid cutting off labels
|
||||
@@ -478,7 +487,7 @@ export default function transformProps(
|
||||
colorScaleKey,
|
||||
{
|
||||
area,
|
||||
connectNulls: derivedSeries,
|
||||
connectNulls: derivedSeries || timeCompareFullRange,
|
||||
filterState,
|
||||
seriesContexts,
|
||||
markerEnabled,
|
||||
|
||||
@@ -1455,6 +1455,18 @@ class ChartDataQueryObjectSchema(Schema):
|
||||
fields.String(),
|
||||
allow_none=True,
|
||||
)
|
||||
time_compare_full_range = fields.Boolean(
|
||||
required=False,
|
||||
allow_none=True,
|
||||
metadata={
|
||||
"description": (
|
||||
"When using a time comparison (time_offsets), plot each shifted "
|
||||
"series across its full time range instead of truncating it to the "
|
||||
"main series' range. Useful for comparing a partial current period "
|
||||
"against complete prior periods."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@post_load
|
||||
def rename_deprecated_fields(
|
||||
|
||||
@@ -105,6 +105,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
||||
series_limit: int
|
||||
series_limit_metric: Metric | None
|
||||
time_offsets: list[str]
|
||||
time_compare_full_range: bool
|
||||
time_shift: str | None
|
||||
time_range: str | None
|
||||
to_dttm: datetime | None
|
||||
@@ -162,6 +163,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
||||
self.to_dttm = kwargs.get("to_dttm")
|
||||
self.result_type = kwargs.get("result_type")
|
||||
self.time_offsets = kwargs.get("time_offsets", [])
|
||||
self.time_compare_full_range = kwargs.get("time_compare_full_range", False)
|
||||
self.inner_from_dttm = kwargs.get("inner_from_dttm")
|
||||
self.inner_to_dttm = kwargs.get("inner_to_dttm")
|
||||
self._rename_deprecated_fields(kwargs)
|
||||
@@ -410,6 +412,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
|
||||
"group_others_when_limit_reached": self.group_others_when_limit_reached,
|
||||
"to_dttm": self.to_dttm,
|
||||
"time_shift": self.time_shift,
|
||||
"time_compare_full_range": self.time_compare_full_range,
|
||||
}
|
||||
return query_object_dict
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
from typing import Any, TYPE_CHECKING
|
||||
from typing import Any, Literal, TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@@ -32,9 +32,14 @@ def left_join_df(
|
||||
join_keys: list[str],
|
||||
lsuffix: str = "",
|
||||
rsuffix: str = "",
|
||||
how: Literal["left", "right", "inner", "outer", "cross"] = "left",
|
||||
) -> pd.DataFrame:
|
||||
# `how` defaults to "left" so callers that only want the left frame's rows are
|
||||
# unaffected. Passing how="outer" keeps right-only rows, which is used by the
|
||||
# time-comparison "full range" option so historical series are not truncated to
|
||||
# the main series' time range.
|
||||
df = left_df.set_index(join_keys).join(
|
||||
right_df.set_index(join_keys), lsuffix=lsuffix, rsuffix=rsuffix
|
||||
right_df.set_index(join_keys), how=how, lsuffix=lsuffix, rsuffix=rsuffix
|
||||
)
|
||||
df.reset_index(inplace=True)
|
||||
return df
|
||||
|
||||
@@ -33,6 +33,7 @@ from typing import (
|
||||
Callable,
|
||||
cast,
|
||||
ClassVar,
|
||||
Literal,
|
||||
NamedTuple,
|
||||
Optional,
|
||||
TYPE_CHECKING,
|
||||
@@ -130,7 +131,11 @@ from superset.utils.core import (
|
||||
SqlExpressionType,
|
||||
TIME_COMPARISON,
|
||||
)
|
||||
from superset.utils.date_parser import get_past_or_future, normalize_time_delta
|
||||
from superset.utils.date_parser import (
|
||||
get_past_or_future,
|
||||
normalize_time_delta,
|
||||
TimeDeltaAmbiguousError,
|
||||
)
|
||||
from superset.utils.dates import datetime_to_epoch
|
||||
from superset.utils.rls import apply_rls
|
||||
|
||||
@@ -2013,6 +2018,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
offset_dfs,
|
||||
time_grain,
|
||||
join_keys,
|
||||
full_range=getattr(query_object, "time_compare_full_range", False),
|
||||
)
|
||||
|
||||
return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys)
|
||||
@@ -2210,7 +2216,11 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
return offset_df, join_keys
|
||||
|
||||
def _perform_join(
|
||||
self, df: pd.DataFrame, offset_df: pd.DataFrame, actual_join_keys: list[str]
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
offset_df: pd.DataFrame,
|
||||
actual_join_keys: list[str],
|
||||
how: Literal["left", "right", "inner", "outer", "cross"] = "left",
|
||||
) -> pd.DataFrame:
|
||||
"""Perform the appropriate join operation."""
|
||||
if actual_join_keys:
|
||||
@@ -2219,6 +2229,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
right_df=offset_df,
|
||||
join_keys=actual_join_keys,
|
||||
rsuffix=R_SUFFIX,
|
||||
how=how,
|
||||
)
|
||||
else:
|
||||
temp_key = "__temp_join_key__"
|
||||
@@ -2230,6 +2241,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
right_df=offset_df,
|
||||
join_keys=[temp_key],
|
||||
rsuffix=R_SUFFIX,
|
||||
how=how,
|
||||
)
|
||||
|
||||
# Remove temporary join keys
|
||||
@@ -2245,6 +2257,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
offset_dfs: dict[str, pd.DataFrame],
|
||||
time_grain: str | None,
|
||||
join_keys: list[str],
|
||||
full_range: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Join offset DataFrames with the main DataFrame.
|
||||
@@ -2253,6 +2266,10 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
:param offset_dfs: A list of offset DataFrames.
|
||||
:param time_grain: The time grain used to calculate the temporal join key.
|
||||
:param join_keys: The keys to join on.
|
||||
:param full_range: When True, time-shifted (offset) series keep their full
|
||||
time range instead of being truncated to the main series' range. This
|
||||
uses an outer join so offset-only rows (e.g. the rest of a prior day when
|
||||
the current day is still in progress) are preserved.
|
||||
"""
|
||||
join_column_producer = app.config["TIME_GRAIN_JOIN_COLUMN_PRODUCERS"].get(
|
||||
time_grain
|
||||
@@ -2280,13 +2297,60 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
join_column_producer,
|
||||
)
|
||||
|
||||
df = self._perform_join(df, offset_df, actual_join_keys)
|
||||
# The full-range option is only meaningful for relative offsets aligned
|
||||
# on a temporal join column (time_grain set). Date-range offsets and the
|
||||
# grain-less path keep the existing left-join behavior.
|
||||
use_outer_join = (
|
||||
full_range
|
||||
and bool(time_grain)
|
||||
and not is_date_range_offset
|
||||
and bool(join_keys)
|
||||
)
|
||||
how: Literal["left", "outer"] = "outer" if use_outer_join else "left"
|
||||
|
||||
df = self._perform_join(df, offset_df, actual_join_keys, how=how)
|
||||
|
||||
if use_outer_join:
|
||||
df = self._coalesce_offset_index(df, offset, join_keys)
|
||||
|
||||
df = self._apply_cleanup_logic(
|
||||
df, offset, time_grain, join_keys, is_date_range_offset
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
def _coalesce_offset_index(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
offset: str,
|
||||
join_keys: list[str],
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Rebuild the temporal x-axis after an outer join with an offset DataFrame.
|
||||
|
||||
Offset-only rows (those with no matching row in the main series) have a null
|
||||
x-axis value because the join happens on the normalized offset join column,
|
||||
not the raw temporal column. Their real timestamp lives in the suffixed
|
||||
right-hand column, expressed in the offset's own time range (e.g. "yesterday
|
||||
15:00"). Shifting it forward by the offset places it on the main series'
|
||||
axis (e.g. "today 15:00") so the comparison line spans the full period.
|
||||
"""
|
||||
x_axis = join_keys[0]
|
||||
offset_x_axis = f"{x_axis}{R_SUFFIX}"
|
||||
if x_axis not in df.columns or offset_x_axis not in df.columns:
|
||||
return df
|
||||
|
||||
# normalize_time_delta returns a negative delta for "... ago" offsets, so
|
||||
# subtracting it shifts the historical timestamp forward onto the main axis.
|
||||
try:
|
||||
forward_shift = DateOffset(**normalize_time_delta(offset))
|
||||
except (ValueError, TimeDeltaAmbiguousError):
|
||||
return df
|
||||
|
||||
shifted = df[offset_x_axis] - forward_shift
|
||||
df[x_axis] = df[x_axis].fillna(shifted)
|
||||
return df
|
||||
|
||||
def add_offset_join_column(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
|
||||
@@ -221,6 +221,7 @@ class QueryObjectDict(TypedDict, total=False):
|
||||
group_others_when_limit_reached: bool
|
||||
to_dttm: datetime | None
|
||||
time_shift: str | None
|
||||
time_compare_full_range: bool
|
||||
post_processing: list[dict[str, Any]]
|
||||
|
||||
# Additional fields used throughout the codebase
|
||||
|
||||
@@ -53,6 +53,9 @@ _datasource._perform_join = ExploreMixin._perform_join.__get__(_datasource)
|
||||
_datasource._apply_cleanup_logic = ExploreMixin._apply_cleanup_logic.__get__(
|
||||
_datasource
|
||||
)
|
||||
_datasource._coalesce_offset_index = ExploreMixin._coalesce_offset_index.__get__(
|
||||
_datasource
|
||||
)
|
||||
# Static methods don't need binding - assign directly
|
||||
_datasource.generate_join_column = ExploreMixin.generate_join_column
|
||||
_datasource.is_valid_date_range_static = ExploreMixin.is_valid_date_range_static
|
||||
@@ -211,6 +214,91 @@ def test_join_offset_dfs_with_month_granularity():
|
||||
assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_join_offset_dfs_full_range_keeps_historical_tail():
|
||||
"""
|
||||
With full_range=True the offset (historical) series keeps its full time range
|
||||
even when the main series ends earlier.
|
||||
|
||||
Simulates "today so far" (main, ends at 01:00) compared against "1 day ago"
|
||||
(a complete prior day, runs to 02:00). The 02:00 historical point must survive
|
||||
and be aligned onto today's axis, with the main metric left null there.
|
||||
"""
|
||||
# Main series: today, only two hours of data so far.
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [Timestamp("2021-01-02 00:00"), Timestamp("2021-01-02 01:00")],
|
||||
"V": [1.0, 2.0],
|
||||
}
|
||||
)
|
||||
# Offset series: the full prior day (already renamed metric column "B").
|
||||
offset_df = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
Timestamp("2021-01-01 00:00"),
|
||||
Timestamp("2021-01-01 01:00"),
|
||||
Timestamp("2021-01-01 02:00"),
|
||||
],
|
||||
"B": [10.0, 20.0, 30.0],
|
||||
}
|
||||
)
|
||||
offset_dfs = {"1 day ago": offset_df}
|
||||
time_grain = TimeGrain.HOUR
|
||||
join_keys = ["A"]
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
Timestamp("2021-01-02 00:00"),
|
||||
Timestamp("2021-01-02 01:00"),
|
||||
Timestamp("2021-01-02 02:00"),
|
||||
],
|
||||
"V": [1.0, 2.0, None],
|
||||
"B": [10.0, 20.0, 30.0],
|
||||
}
|
||||
)
|
||||
|
||||
result = query_context_processor.join_offset_dfs(
|
||||
df, offset_dfs, time_grain, join_keys, full_range=True
|
||||
)
|
||||
|
||||
assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_join_offset_dfs_full_range_disabled_truncates_historical():
|
||||
"""The default (full_range=False) left join drops the historical 02:00 point."""
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [Timestamp("2021-01-02 00:00"), Timestamp("2021-01-02 01:00")],
|
||||
"V": [1.0, 2.0],
|
||||
}
|
||||
)
|
||||
offset_df = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
Timestamp("2021-01-01 00:00"),
|
||||
Timestamp("2021-01-01 01:00"),
|
||||
Timestamp("2021-01-01 02:00"),
|
||||
],
|
||||
"B": [10.0, 20.0, 30.0],
|
||||
}
|
||||
)
|
||||
offset_dfs = {"1 day ago": offset_df}
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [Timestamp("2021-01-02 00:00"), Timestamp("2021-01-02 01:00")],
|
||||
"V": [1.0, 2.0],
|
||||
"B": [10.0, 20.0],
|
||||
}
|
||||
)
|
||||
|
||||
result = query_context_processor.join_offset_dfs(
|
||||
df, offset_dfs, TimeGrain.HOUR, ["A"], full_range=False
|
||||
)
|
||||
|
||||
assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_join_offset_dfs_totals_query_no_dimensions():
|
||||
"""
|
||||
Test time offset join for totals query with no dimension columns.
|
||||
|
||||
@@ -59,6 +59,7 @@ def test_default_query_object_to_dict():
|
||||
"series_limit": 0,
|
||||
"series_limit_metric": None,
|
||||
"time_shift": None,
|
||||
"time_compare_full_range": False,
|
||||
"to_dttm": None,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user