From adaae8ba150e2ef244d4237043cbdf16930fea51 Mon Sep 17 00:00:00 2001 From: Mehmet Salih Yavuz Date: Thu, 7 Aug 2025 15:20:34 +0300 Subject: [PATCH] fix(Timeshift): Determine temporal column correctly (#34582) --- superset/common/query_context_processor.py | 68 ++++- .../common/test_query_context_processor.py | 253 ++++++++++++++++++ 2 files changed, 313 insertions(+), 8 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 0d06ece2811..a0228cedeef 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -456,11 +456,17 @@ class QueryContextProcessor: return f"{(outer_from_dttm - offset_date).days} days ago" return "" - def processing_time_offsets( # pylint: disable=too-many-locals,too-many-statements # noqa: C901 + def processing_time_offsets( # pylint: disable=too-many-locals,too-many-statements # noqa: C901 self, df: pd.DataFrame, query_object: QueryObject, ) -> CachedTimeOffset: + """ + Process time offsets for time comparison feature. + + This method handles both relative time offsets (e.g., "1 week ago") and + absolute date range offsets (e.g., "2015-01-03 : 2015-01-04"). + """ query_context = self._query_context # ensure query_object is immutable query_object_clone = copy.copy(query_object) @@ -550,11 +556,10 @@ class QueryContextProcessor: # Get time offset index index = (get_base_axis_labels(query_object.columns) or [DTTM_ALIAS])[0] - # Handle temporal filters if is_date_range_offset and feature_flag_manager.is_feature_enabled( "DATE_RANGE_TIMESHIFTS_ENABLED" ): - # Create a completely new filter list to avoid conflicts + # Create a completely new filter list to preserve original filters query_object_clone.filter = copy.deepcopy(query_object_clone.filter) # Remove any existing temporal filters that might conflict @@ -564,8 +569,12 @@ class QueryContextProcessor: if not (flt.get("op") == FilterOperator.TEMPORAL_RANGE) ] - # Add our specific temporal filter - temporal_col = query_object_clone.granularity or x_axis_label + # Determine the temporal column with multiple fallback strategies + temporal_col = self._get_temporal_column_for_filter( + query_object_clone, x_axis_label + ) + + # Always add a temporal filter for date range offsets if temporal_col: new_temporal_filter: QueryObjectFilterClause = { "col": temporal_col, @@ -577,7 +586,17 @@ class QueryContextProcessor: } query_object_clone.filter.append(new_temporal_filter) + else: + # This should rarely happen with proper fallbacks + raise QueryObjectValidationError( + _( + "Unable to identify temporal column for date range time comparison." # noqa: E501 + "Please ensure your dataset has a properly configured time column." # noqa: E501 + ) + ) + else: + # RELATIVE OFFSET: Original logic for non-date-range offsets # The comparison is not using a temporal column so we need to modify # the temporal filter so we run the query with the correct time range if not dataframe_utils.is_datetime_series(df.get(index)): @@ -600,8 +619,7 @@ class QueryContextProcessor: ) flt["val"] = f"{new_outer_from_dttm} : {new_outer_to_dttm}" else: - # If it IS a datetime series, we still need to clear conflicting - # filters + # If it IS a datetime series, we still need to clear conflicts query_object_clone.filter = copy.deepcopy(query_object_clone.filter) # For relative offsets with datetime series, ensure the temporal @@ -629,7 +647,7 @@ class QueryContextProcessor: ) ] - # Continue with the rest of the method... + # Continue with the rest of the method (caching, execution, etc.) cached_time_offset_key = ( offset if offset == original_offset else f"{offset}_{original_offset}" ) @@ -713,6 +731,40 @@ class QueryContextProcessor: return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) + def _get_temporal_column_for_filter( # noqa: C901 + self, query_object: QueryObject, x_axis_label: str | None + ) -> str | None: + """ + Helper method to reliably determine the temporal column for filtering. + + This method tries multiple strategies to find the correct temporal column: + 1. Use explicitly set granularity + 2. Use x_axis_label if it's a temporal column + 3. Find any datetime column in the datasource + + :param query_object: The query object + :param x_axis_label: The x-axis label from the query + :return: The name of the temporal column, or None if not found + """ + # Strategy 1: Use explicitly set granularity + if query_object.granularity: + return query_object.granularity + + # Strategy 2: Use x_axis_label if it exists + if x_axis_label: + return x_axis_label + + # Strategy 3: Find any datetime column in the datasource + if hasattr(self._qc_datasource, "columns"): + for col in self._qc_datasource.columns: + if hasattr(col, "is_dttm") and col.is_dttm: + if hasattr(col, "column_name"): + return col.column_name + elif hasattr(col, "name"): + return col.name + + return None + def _process_date_range_offset( self, offset_df: pd.DataFrame, join_keys: list[str] ) -> tuple[pd.DataFrame, list[str]]: diff --git a/tests/unit_tests/common/test_query_context_processor.py b/tests/unit_tests/common/test_query_context_processor.py index 26b545feac2..dc55f8174bf 100644 --- a/tests/unit_tests/common/test_query_context_processor.py +++ b/tests/unit_tests/common/test_query_context_processor.py @@ -371,3 +371,256 @@ def test_get_offset_custom_or_inherit_with_invalid_date(processor): # Should return empty string for invalid format assert result == "" + + +def test_get_temporal_column_for_filter_with_granularity(processor): + """Test _get_temporal_column_for_filter returns granularity when available.""" + query_object = MagicMock() + query_object.granularity = "date_column" + + result = processor._get_temporal_column_for_filter(query_object, "x_axis_col") + + assert result == "date_column" + + +def test_get_temporal_column_for_filter_with_x_axis_fallback(processor): + """Test _get_temporal_column_for_filter falls back to x_axis_label.""" + query_object = MagicMock() + query_object.granularity = None + + result = processor._get_temporal_column_for_filter(query_object, "x_axis_col") + + assert result == "x_axis_col" + + +def test_get_temporal_column_for_filter_with_datasource_columns(processor): + """Test _get_temporal_column_for_filter finds datetime column from datasource.""" + query_object = MagicMock() + query_object.granularity = None + + # Mock datasource with datetime columns + mock_datetime_col = MagicMock() + mock_datetime_col.is_dttm = True + mock_datetime_col.column_name = "created_at" + + mock_regular_col = MagicMock() + mock_regular_col.is_dttm = False + mock_regular_col.column_name = "name" + + processor._qc_datasource.columns = [mock_regular_col, mock_datetime_col] + + result = processor._get_temporal_column_for_filter(query_object, None) + + assert result == "created_at" + + +def test_get_temporal_column_for_filter_with_datasource_name_attr(processor): + """Test _get_temporal_column_for_filter with columns using name attribute.""" + query_object = MagicMock() + query_object.granularity = None + + # Mock datasource with datetime column using 'name' attribute + # instead of 'column_name' + mock_datetime_col = MagicMock() + mock_datetime_col.is_dttm = True + mock_datetime_col.name = "timestamp_col" + # Remove column_name attribute to test name fallback + del mock_datetime_col.column_name + + processor._qc_datasource.columns = [mock_datetime_col] + + result = processor._get_temporal_column_for_filter(query_object, None) + + assert result == "timestamp_col" + + +def test_get_temporal_column_for_filter_no_columns_found(processor): + """Test _get_temporal_column_for_filter + returns None when no temporal column found.""" + query_object = MagicMock() + query_object.granularity = None + + # Mock datasource with no datetime columns + mock_regular_col = MagicMock() + mock_regular_col.is_dttm = False + mock_regular_col.column_name = "name" + + processor._qc_datasource.columns = [mock_regular_col] + + result = processor._get_temporal_column_for_filter(query_object, None) + + assert result is None + + +def test_get_temporal_column_for_filter_no_datasource_columns(processor): + """Test _get_temporal_column_for_filter handles datasource + without columns attribute.""" + query_object = MagicMock() + query_object.granularity = None + + # Remove columns attribute from datasource + if hasattr(processor._qc_datasource, "columns"): + delattr(processor._qc_datasource, "columns") + + result = processor._get_temporal_column_for_filter(query_object, None) + + assert result is None + + +def test_processing_time_offsets_temporal_column_error(processor): + """Test processing_time_offsets raises QueryObjectValidationError + when temporal column can't be determined.""" + from superset.common.query_object import QueryObject + from superset.exceptions import QueryObjectValidationError + + # Create a dataframe for testing + df = pd.DataFrame({"dim1": ["A", "B", "C"], "metric1": [10, 20, 30]}) + + # Create query object with date range offset and proper time range + query_object = QueryObject( + datasource=MagicMock(), + granularity=None, # No granularity set + columns=[], + is_timeseries=True, + time_offsets=["2023-01-01 : 2023-01-31"], + filter=[ + { + "col": "some_date_col", + "op": "TEMPORAL_RANGE", + "val": "2024-01-01 : 2024-01-31", + } + ], + ) + + # Mock get_since_until_from_query_object to return valid dates + with patch( + "superset.common.query_context_processor.get_since_until_from_query_object" + ) as mock_dates: + mock_dates.return_value = ( + pd.Timestamp("2024-01-01"), + pd.Timestamp("2024-01-31"), + ) + + # Mock feature flag to be enabled + with patch( + "superset.common.query_context_processor.feature_flag_manager" + ) as mock_ff: + mock_ff.is_feature_enabled.return_value = True + + # Mock _get_temporal_column_for_filter to return None + # (no temporal column found) + with patch.object( + processor, "_get_temporal_column_for_filter", return_value=None + ): + with patch( + "superset.common.query_context_processor.get_base_axis_labels", + return_value=["__timestamp"], + ): + with pytest.raises( + QueryObjectValidationError, + match="Unable to identify temporal column", + ): + processor.processing_time_offsets(df, query_object) + + +def test_processing_time_offsets_date_range_enabled(processor): + """Test processing_time_offsets correctly handles + date range offsets when enabled.""" + from superset.common.query_object import QueryObject + + # Create a dataframe for testing + df = pd.DataFrame( + { + "dim1": ["A", "B", "C"], + "metric1": [10, 20, 30], + "__timestamp": pd.date_range("2023-01-01", periods=3, freq="D"), + } + ) + + # Create a properly mocked datasource + mock_datasource = MagicMock() + mock_datasource.id = 123 + mock_datasource.uid = "abc123" + mock_datasource.cache_timeout = None + mock_datasource.changed_on = pd.Timestamp("2023-01-01") + mock_datasource.get_extra_cache_keys.return_value = {} + + # Create query object with date range offset + query_object = QueryObject( + datasource=mock_datasource, + granularity="date_col", + columns=[], + is_timeseries=True, + time_offsets=["2022-01-01 : 2022-01-31"], + filter=[], + ) + + # Mock the query context and its methods + processor._query_context.queries = [query_object] + + with patch( + "superset.common.query_context_processor.feature_flag_manager" + ) as mock_ff: + mock_ff.is_feature_enabled.return_value = True + + with patch( + "superset.common.query_context_processor.get_base_axis_labels", + return_value=["__timestamp"], + ): + with patch( + "superset.common.query_context_processor.get_since_until_from_query_object" + ) as mock_dates: + mock_dates.return_value = ( + pd.Timestamp("2023-01-01"), + pd.Timestamp("2023-01-03"), + ) + + with patch( + "superset.common.query_context_processor.get_since_until_from_time_range" + ) as mock_time_range: + mock_time_range.return_value = ( + pd.Timestamp("2022-01-01"), + pd.Timestamp("2022-01-31"), + ) + + with patch.object( + processor, "get_query_result" + ) as mock_query_result: + mock_result = MagicMock() + mock_result.df = pd.DataFrame( + { + "dim1": ["A", "B"], + "metric1": [5, 10], + "__timestamp": pd.date_range( + "2022-01-01", periods=2, freq="D" + ), + } + ) + mock_result.query = "SELECT * FROM table" + mock_result.cache_key = "offset_cache_key" + mock_query_result.return_value = mock_result + + with patch.object( + processor, + "_get_temporal_column_for_filter", + return_value="date_col", + ): + with patch.object( + processor, + "query_cache_key", + return_value="mock_cache_key", + ): + # Test the method + result = processor.processing_time_offsets( + df, query_object + ) + + # Verify that the method completes successfully + assert "df" in result + assert "queries" in result + assert "cache_keys" in result + + # Verify the result has the expected structure + assert isinstance(result["df"], pd.DataFrame) + assert isinstance(result["queries"], list) + assert isinstance(result["cache_keys"], list)