Compare commits

...

9 Commits

Author SHA1 Message Date
Maxime Beauchemin
89f9ca7364 style: apply prettier and ruff-format fixes
Auto-format renameOperator.ts (prettier) and test_time_shifts.py
(ruff-format) to pass CI pre-commit checks.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-08 23:56:21 +00:00
Maxime Beauchemin
b89364531f fix(charts): add return type annotation to test function
Address CodeAnt AI review comment — add explicit -> None return type
annotation to test_extract_dataframe_dtypes_with_duplicate_columns.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:55:25 +00:00
Maxime Beauchemin
95ad8b81c7 fix: shorten docstring to pass E501 line length check
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:54:35 +00:00
Maxime Beauchemin
6846244d8c fix(charts): address code review — check all join keys and fix timeOffset.ts
1. Check any join key (not just the first) for temporal dtype when
   validating time grain requirement. Fixes edge case where temporal
   key is not the first in the join_keys list.

2. Fix substring matching bugs in timeOffset.ts:
   - getTimeOffset: use startsWith for <offset>, pattern
   - getOriginalSeries: use startsWith+slice for <offset>, pattern

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:54:35 +00:00
Maxime Beauchemin
c08706e460 fix(charts): only require time grain when join key is temporal
Refine the time grain validation to check whether the first join key
is a datetime column before raising. Table charts with time comparison
use non-temporal dimension joins that work without a time grain.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:54:35 +00:00
Maxime Beauchemin
5f40f5f1bb fix(charts): use separator-aware matching in renameOperator for time offsets
The `includes(offset)` substring match caused "11 year ago" to match
"1 year ago", producing duplicate column names that crash the backend
with "truth value of a Series is ambiguous". Switch to `endsWith` with
the `__` separator prefix for exact offset matching.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:54:35 +00:00
Maxime Beauchemin
194831d685 fix(charts): handle duplicate column names in extract_dataframe_dtypes
When a DataFrame has duplicate column names (e.g. from time comparison
post-processing), `df[column]` returns a DataFrame instead of a Series.
Calling `.isna().all()` on a DataFrame produces a Series of booleans,
which raises "truth value of a Series is ambiguous" in the `if` check.

Fix by using positional indexing (`df.iloc[:, i]`) to always retrieve a
single Series regardless of duplicate column names.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:54:35 +00:00
Maxime Beauchemin
aaad620cb3 fix(charts): raise error when time comparison is used without a time grain
Without a time grain, the offset join columns that align shifted dates
to original dates are never created. The join falls back to raw column
values, producing silently wrong results (identical values instead of
shifted data). Replace the existing dead-code validation with a check
that raises a helpful error early.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:51:54 +00:00
Maxime Beauchemin
6c00ee2eec fix(charts): use separator-aware matching for time shift column grouping
The `endswith(ts)` check in `get_column_groups` matched columns
incorrectly when time shifts shared a numeric suffix — e.g.
"metric__22 weeks ago".endswith("2 weeks ago") returned True,
causing columns to be misclassified. Use `endswith(TIME_COMPARISON + ts)`
to include the "__" separator in the match.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 19:51:54 +00:00
10 changed files with 173 additions and 12 deletions

View File

@@ -67,7 +67,7 @@ export const renameOperator: PostProcessingFactory<PostProcessingRename> = (
[...metricOffsetMap.entries()].forEach(
([metricWithOffset, metricOnly]) => {
const offsetLabel = timeOffsets.find(offset =>
metricWithOffset.includes(offset),
metricWithOffset.endsWith(`${TIME_COMPARISON_SEPARATOR}${offset}`),
);
renamePairs.push([
formData.comparison_type === ComparisonType.Values

View File

@@ -26,7 +26,7 @@ export const getTimeOffset = (
timeCompare.find(
timeOffset =>
// offset is represented as <offset>, group by list
series.name.includes(`${timeOffset},`) ||
series.name.startsWith(`${timeOffset},`) ||
// offset is represented as <metric>__<offset>
series.name.includes(`__${timeOffset}`) ||
// offset is represented as <metric>, <offset>
@@ -50,7 +50,9 @@ export const getOriginalSeries = (
// offset in the middle: <metric>, <offset>, <dimension>
result = result.replace(`, ${compare},`, ',');
// offset at start: <offset>, <dimension>
result = result.replace(`${compare},`, '');
if (result.startsWith(`${compare},`)) {
result = result.slice(`${compare},`.length);
}
// offset with double underscore: <metric>__<offset>
result = result.replace(`__${compare}`, '');
// offset at end: <metric>, <offset>

View File

@@ -303,6 +303,30 @@ test('should add renameOperator if multiple metrics exist', () => {
});
});
test('should correctly match offsets that share a numeric prefix', () => {
expect(
renameOperator(
{
...formData,
comparison_type: ComparisonType.Values,
time_compare: ['1 year ago', '11 year ago'],
},
queryObject,
),
).toEqual({
operation: 'rename',
options: {
columns: {
'count(*)__1 year ago': '1 year ago',
'count(*)__11 year ago': '11 year ago',
},
inplace: true,
level: 0,
},
});
});
test('should remove renameOperator', () => {
expect(
renameOperator(

View File

@@ -114,3 +114,26 @@ test('hasTimeOffset returns false when series name is not a string', () => {
const timeCompare = ['1 year ago'];
expect(hasTimeOffset(series, timeCompare)).toBe(false);
});
test('getTimeOffset correctly matches offsets that share a numeric prefix', () => {
const timeCompare = ['1 year ago', '11 year ago'];
expect(
getTimeOffset({ name: '11 year ago, Alexander' }, timeCompare),
).toEqual('11 year ago');
expect(getTimeOffset({ name: '1 year ago, Alexander' }, timeCompare)).toEqual(
'1 year ago',
);
expect(getTimeOffset({ name: 'Births__11 year ago' }, timeCompare)).toEqual(
'11 year ago',
);
});
test('getOriginalSeries correctly strips offsets that share a numeric prefix', () => {
const timeCompare = ['1 year ago', '11 year ago'];
expect(getOriginalSeries('11 year ago, Alexander', timeCompare)).toEqual(
'Alexander',
);
expect(getOriginalSeries('1 year ago, Alexander', timeCompare)).toEqual(
'Alexander',
);
});

View File

@@ -1887,10 +1887,26 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
time_grain
)
if join_column_producer and not time_grain:
raise QueryObjectValidationError(
_("Time Grain must be specified when using Time Shift.")
if not time_grain:
has_temporal_join_key = any(
pd.api.types.is_datetime64_any_dtype(df[key])
for key in join_keys
if key in df.columns
)
if has_temporal_join_key:
has_relative_offset = any(
not (
self.is_valid_date_range(offset)
and feature_flag_manager.is_feature_enabled(
"DATE_RANGE_TIMESHIFTS_ENABLED"
)
)
for offset in offset_dfs
)
if has_relative_offset:
raise QueryObjectValidationError(
_("Time Grain must be specified when using Time Comparison.")
)
for offset, offset_df in offset_dfs.items():
is_date_range_offset = self.is_valid_date_range(

View File

@@ -1782,9 +1782,9 @@ def extract_dataframe_dtypes(
columns_by_name[column.column_name] = column
generic_types: list[GenericDataType] = []
for column in df.columns:
for i, column in enumerate(df.columns):
column_object = columns_by_name.get(str(column))
series = df[column]
series = df.iloc[:, i]
inferred_type: str = ""
if series.isna().all():
sql_type: Optional[str] = ""

View File

@@ -24,7 +24,7 @@ from flask_babel import gettext as _
from pandas import DataFrame, MultiIndex
from superset.exceptions import InvalidPostProcessingError
from superset.utils.core import PostProcessingContributionOrientation
from superset.utils.core import PostProcessingContributionOrientation, TIME_COMPARISON
from superset.utils.pandas_postprocessing.utils import validate_column_args
@@ -130,7 +130,7 @@ def get_column_groups(
time_shift = None
if time_shifts and isinstance(col_0, str):
for ts in time_shifts:
if col_0.endswith(ts):
if col_0.endswith(TIME_COMPARISON + ts):
time_shift = ts
break
if time_shift is not None:

View File

@@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
from pandas import DataFrame, Series, Timestamp
from pandas.testing import assert_frame_equal
from pytest import fixture, mark # noqa: PT013
@@ -23,6 +24,7 @@ from superset.common.query_context import QueryContext
from superset.common.query_context_processor import QueryContextProcessor
from superset.connectors.sqla.models import BaseDatasource
from superset.constants import TimeGrain
from superset.exceptions import QueryObjectValidationError
from superset.models.helpers import ExploreMixin
# Create processor and bind ExploreMixin methods to datasource
@@ -242,3 +244,55 @@ def test_join_offset_dfs_totals_query_no_dimensions():
)
assert_frame_equal(expected, result)
def test_join_offset_dfs_raises_without_time_grain():
"""Time comparison with relative offsets requires a time grain."""
df = DataFrame({"ds": [Timestamp("2021-01-01")], "D": [1]})
offset_df = DataFrame({"ds": [Timestamp("2021-02-01")], "B": [5]})
offset_dfs = {"1 year ago": offset_df}
with pytest.raises(
QueryObjectValidationError, match="Time Grain must be specified"
):
query_context_processor.join_offset_dfs(
df, offset_dfs, time_grain=None, join_keys=["ds"]
)
def test_join_offset_dfs_allows_non_temporal_join_without_time_grain():
"""Time comparison without time grain is valid when join keys are non-temporal."""
df = DataFrame({"country": ["US", "UK"], "metric": [10, 20]})
offset_df = DataFrame({"country": ["US", "UK"], "metric__1 year ago": [8, 15]})
offset_dfs = {"1 year ago": offset_df}
result = query_context_processor.join_offset_dfs(
df, offset_dfs, time_grain=None, join_keys=["country"]
)
assert "metric__1 year ago" in result.columns
def test_join_offset_dfs_raises_when_temporal_key_not_first():
"""Temporal join key detection works even when it's not the first key."""
df = DataFrame(
{
"country": ["US", "UK"],
"ds": [Timestamp("2021-01-01"), Timestamp("2021-02-01")],
"D": [1, 2],
}
)
offset_df = DataFrame(
{
"country": ["US", "UK"],
"ds": [Timestamp("2021-03-01"), Timestamp("2021-04-01")],
"B": [5, 6],
}
)
offset_dfs = {"1 year ago": offset_df}
with pytest.raises(
QueryObjectValidationError, match="Time Grain must be specified"
):
query_context_processor.join_offset_dfs(
df, offset_dfs, time_grain=None, join_keys=["country", "ds"]
)

View File

@@ -124,3 +124,36 @@ def test_contribution_with_time_shift_columns():
assert_array_equal(processed_df["a__1 week ago"].tolist(), [0.5, 0.5])
assert_array_equal(processed_df["b__1 week ago"].tolist(), [0.25, 0.25])
assert_array_equal(processed_df["c__1 week ago"].tolist(), [0.25, 0.25])
def test_contribution_with_numeric_prefix_time_shifts():
"""Time shifts like '2 weeks ago' and '22 weeks ago' share a numeric suffix;
columns must be grouped by their exact offset, not by suffix matching."""
df = DataFrame(
{
DTTM_ALIAS: [
datetime(2020, 7, 16, 14, 49),
datetime(2020, 7, 16, 14, 50),
],
"a": [3, 6],
"b": [6, 3],
"a__2 weeks ago": [1, 1],
"b__2 weeks ago": [1, 1],
"a__22 weeks ago": [2, 4],
"b__22 weeks ago": [4, 2],
}
)
processed_df = contribution(
df,
orientation=PostProcessingContributionOrientation.ROW,
time_shifts=["2 weeks ago", "22 weeks ago"],
)
# Non-time-shift columns: a=3,b=6 -> a=1/3, b=2/3; a=6,b=3 -> a=2/3, b=1/3
assert_array_equal(processed_df["a"].tolist(), [1 / 3, 2 / 3])
assert_array_equal(processed_df["b"].tolist(), [2 / 3, 1 / 3])
# "2 weeks ago" group: a=1,b=1 -> 0.5,0.5 each row
assert_array_equal(processed_df["a__2 weeks ago"].tolist(), [0.5, 0.5])
assert_array_equal(processed_df["b__2 weeks ago"].tolist(), [0.5, 0.5])
# "22 weeks ago" group: a=2,b=4 -> 1/3,2/3; a=4,b=2 -> 2/3,1/3
assert_array_equal(processed_df["a__22 weeks ago"].tolist(), [1 / 3, 2 / 3])
assert_array_equal(processed_df["b__22 weeks ago"].tolist(), [2 / 3, 1 / 3])

View File

@@ -31,6 +31,7 @@ from superset.utils.core import (
cast_to_boolean,
check_is_safe_zip,
DateColumn,
extract_dataframe_dtypes,
FilterOperator,
generic_find_constraint_name,
generic_find_fk_constraint_name,
@@ -647,8 +648,9 @@ def test_get_user_agent(mocker: MockerFixture, app_context: None) -> None:
@with_config(
{
"USER_AGENT_FUNC": lambda database,
source: f"{database.database_name} {source.name}"
"USER_AGENT_FUNC": lambda database, source: (
f"{database.database_name} {source.name}"
)
}
)
def test_get_user_agent_custom(mocker: MockerFixture, app_context: None) -> None:
@@ -1730,3 +1732,10 @@ def test_markdown_with_markup_wrap() -> None:
assert isinstance(result, Markup)
assert "<strong>bold</strong>" in str(result)
def test_extract_dataframe_dtypes_with_duplicate_columns() -> None:
"""extract_dataframe_dtypes should not crash on duplicate column names."""
df = pd.DataFrame([[1, 2, 3]], columns=["a", "b", "a"])
result = extract_dataframe_dtypes(df)
assert len(result) == 3