From 6c00ee2eecbdf2c7f0d64dc9227a5c8b0a476901 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 13 Apr 2026 17:19:10 +0000 Subject: [PATCH] fix(charts): use separator-aware matching for time shift column grouping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `endswith(ts)` check in `get_column_groups` matched columns incorrectly when time shifts shared a numeric suffix — e.g. "metric__22 weeks ago".endswith("2 weeks ago") returned True, causing columns to be misclassified. Use `endswith(TIME_COMPARISON + ts)` to include the "__" separator in the match. Co-Authored-By: Claude Opus 4.6 --- .../pandas_postprocessing/contribution.py | 4 +-- .../test_contribution.py | 33 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/superset/utils/pandas_postprocessing/contribution.py b/superset/utils/pandas_postprocessing/contribution.py index ec6716fcba0..a2875c5d5b9 100644 --- a/superset/utils/pandas_postprocessing/contribution.py +++ b/superset/utils/pandas_postprocessing/contribution.py @@ -24,7 +24,7 @@ from flask_babel import gettext as _ from pandas import DataFrame, MultiIndex from superset.exceptions import InvalidPostProcessingError -from superset.utils.core import PostProcessingContributionOrientation +from superset.utils.core import PostProcessingContributionOrientation, TIME_COMPARISON from superset.utils.pandas_postprocessing.utils import validate_column_args @@ -130,7 +130,7 @@ def get_column_groups( time_shift = None if time_shifts and isinstance(col_0, str): for ts in time_shifts: - if col_0.endswith(ts): + if col_0.endswith(TIME_COMPARISON + ts): time_shift = ts break if time_shift is not None: diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py index 7853fc97f29..8d45e7c8f44 100644 --- a/tests/unit_tests/pandas_postprocessing/test_contribution.py +++ b/tests/unit_tests/pandas_postprocessing/test_contribution.py @@ -124,3 +124,36 @@ def test_contribution_with_time_shift_columns(): assert_array_equal(processed_df["a__1 week ago"].tolist(), [0.5, 0.5]) assert_array_equal(processed_df["b__1 week ago"].tolist(), [0.25, 0.25]) assert_array_equal(processed_df["c__1 week ago"].tolist(), [0.25, 0.25]) + + +def test_contribution_with_numeric_prefix_time_shifts(): + """Time shifts like '2 weeks ago' and '22 weeks ago' share a numeric suffix; + columns must be grouped by their exact offset, not by suffix matching.""" + df = DataFrame( + { + DTTM_ALIAS: [ + datetime(2020, 7, 16, 14, 49), + datetime(2020, 7, 16, 14, 50), + ], + "a": [3, 6], + "b": [6, 3], + "a__2 weeks ago": [1, 1], + "b__2 weeks ago": [1, 1], + "a__22 weeks ago": [2, 4], + "b__22 weeks ago": [4, 2], + } + ) + processed_df = contribution( + df, + orientation=PostProcessingContributionOrientation.ROW, + time_shifts=["2 weeks ago", "22 weeks ago"], + ) + # Non-time-shift columns: a=3,b=6 -> a=1/3, b=2/3; a=6,b=3 -> a=2/3, b=1/3 + assert_array_equal(processed_df["a"].tolist(), [1 / 3, 2 / 3]) + assert_array_equal(processed_df["b"].tolist(), [2 / 3, 1 / 3]) + # "2 weeks ago" group: a=1,b=1 -> 0.5,0.5 each row + assert_array_equal(processed_df["a__2 weeks ago"].tolist(), [0.5, 0.5]) + assert_array_equal(processed_df["b__2 weeks ago"].tolist(), [0.5, 0.5]) + # "22 weeks ago" group: a=2,b=4 -> 1/3,2/3; a=4,b=2 -> 2/3,1/3 + assert_array_equal(processed_df["a__22 weeks ago"].tolist(), [1 / 3, 2 / 3]) + assert_array_equal(processed_df["b__22 weeks ago"].tolist(), [2 / 3, 1 / 3])