mirror of
https://github.com/apache/superset.git
synced 2026-07-02 12:55:35 +00:00
Compare commits
1 Commits
codex/fix-
...
fix/issue-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72aa495908 |
@@ -1625,6 +1625,56 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
seen.add(label)
|
||||
return tuple(labels)
|
||||
|
||||
def _offset_only_dttm_cols(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
query_object: QueryObject,
|
||||
already_collected: set[str],
|
||||
) -> list[DateColumn]:
|
||||
"""``DateColumn`` entries that only need the dataset HOURS OFFSET (and any
|
||||
time shift) applied, for temporal columns the database already returns as
|
||||
native datetimes.
|
||||
|
||||
The dataset offset must apply to *every* temporal column a query returns,
|
||||
not just the selected time column. ``_collect_dttm_labels`` only covers
|
||||
columns that need parsing (a declared ``python_date_format``) or the
|
||||
base-axis / granularity column; a second temporal column returned as a
|
||||
native datetime would otherwise keep its raw, un-offset value. Columns
|
||||
arriving as plain integers/strings without a declared format are skipped,
|
||||
since they cannot be safely interpreted as datetimes.
|
||||
See https://github.com/apache/superset/issues/23167.
|
||||
"""
|
||||
if not (self.offset or query_object.time_shift) or not hasattr(
|
||||
self, "get_column"
|
||||
):
|
||||
return []
|
||||
|
||||
extra: list[DateColumn] = []
|
||||
for label in df.columns:
|
||||
if label in already_collected or label == DTTM_ALIAS:
|
||||
continue
|
||||
if not pd.api.types.is_datetime64_any_dtype(df[label]):
|
||||
continue
|
||||
column_obj = self.get_column(label)
|
||||
if not column_obj:
|
||||
continue
|
||||
is_dttm = (
|
||||
column_obj.get("is_dttm")
|
||||
if isinstance(column_obj, dict)
|
||||
else getattr(column_obj, "is_dttm", False)
|
||||
)
|
||||
if is_dttm:
|
||||
extra.append(
|
||||
DateColumn(
|
||||
timestamp_format=None,
|
||||
offset=self.offset,
|
||||
time_shift=query_object.time_shift,
|
||||
col_label=label,
|
||||
)
|
||||
)
|
||||
already_collected.add(label)
|
||||
return extra
|
||||
|
||||
def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> pd.DataFrame:
|
||||
"""
|
||||
Normalize the dataframe by converting datetime columns and ensuring
|
||||
@@ -1655,6 +1705,12 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
)
|
||||
)
|
||||
|
||||
dttm_cols.extend(
|
||||
self._offset_only_dttm_cols(
|
||||
df, query_object, {col.col_label for col in dttm_cols}
|
||||
)
|
||||
)
|
||||
|
||||
# Build format map from detected datetime formats stored in dataset columns
|
||||
format_map: dict[str, str] = {}
|
||||
if hasattr(self, "columns"):
|
||||
|
||||
@@ -2626,7 +2626,12 @@ def _normalize_df_datasource(column: object) -> MagicMock:
|
||||
datasource.enforce_numerical_metrics = False
|
||||
datasource.columns = [column]
|
||||
datasource.get_column = lambda name: {"ts": column}.get(name)
|
||||
for method in ("_python_date_format", "_collect_dttm_labels", "normalize_df"):
|
||||
for method in (
|
||||
"_python_date_format",
|
||||
"_collect_dttm_labels",
|
||||
"_offset_only_dttm_cols",
|
||||
"normalize_df",
|
||||
):
|
||||
setattr(datasource, method, getattr(ExploreMixin, method).__get__(datasource))
|
||||
return datasource
|
||||
|
||||
@@ -2750,7 +2755,12 @@ def test_normalize_df_without_get_column_is_a_noop() -> None:
|
||||
columns: list[object] = []
|
||||
|
||||
datasource = _NoGetColumnDatasource()
|
||||
for method in ("_python_date_format", "_collect_dttm_labels", "normalize_df"):
|
||||
for method in (
|
||||
"_python_date_format",
|
||||
"_collect_dttm_labels",
|
||||
"_offset_only_dttm_cols",
|
||||
"normalize_df",
|
||||
):
|
||||
setattr(datasource, method, getattr(ExploreMixin, method).__get__(datasource))
|
||||
|
||||
df = pd.DataFrame({"ts": [1577836800, 1609459200]})
|
||||
@@ -2849,6 +2859,108 @@ def test_normalize_df_normalizes_legacy_time_column() -> None:
|
||||
assert result[DTTM_ALIAS][0].strftime("%Y-%m-%d") == "2020-01-01"
|
||||
|
||||
|
||||
def test_normalize_df_applies_offset_to_all_temporal_columns() -> None:
|
||||
"""Regression test for issue #23167: the dataset HOURS OFFSET must be applied
|
||||
to every temporal column a query returns, not only the selected time column.
|
||||
Two native-datetime temporal columns (neither declaring a
|
||||
``python_date_format``) must both be shifted by the dataset offset."""
|
||||
import pandas as pd
|
||||
|
||||
from superset.models.helpers import ExploreMixin
|
||||
|
||||
created = MagicMock(
|
||||
column_name="created",
|
||||
is_dttm=True,
|
||||
python_date_format=None,
|
||||
datetime_format=None,
|
||||
)
|
||||
expired = MagicMock(
|
||||
column_name="expired",
|
||||
is_dttm=True,
|
||||
python_date_format=None,
|
||||
datetime_format=None,
|
||||
)
|
||||
columns = {"created": created, "expired": expired}
|
||||
|
||||
datasource = MagicMock()
|
||||
datasource.offset = 4
|
||||
datasource.enforce_numerical_metrics = False
|
||||
datasource.columns = list(columns.values())
|
||||
datasource.get_column = lambda name: columns.get(name)
|
||||
for method in (
|
||||
"_python_date_format",
|
||||
"_collect_dttm_labels",
|
||||
"_offset_only_dttm_cols",
|
||||
"normalize_df",
|
||||
):
|
||||
setattr(datasource, method, getattr(ExploreMixin, method).__get__(datasource))
|
||||
|
||||
query_object = MagicMock()
|
||||
query_object.columns = ["created", "expired"]
|
||||
query_object.granularity = None
|
||||
query_object.time_shift = None
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"created": pd.to_datetime(["2020-01-01 00:00:00", "2020-01-02 00:00:00"]),
|
||||
"expired": pd.to_datetime(["2020-06-01 12:00:00", "2020-06-02 12:00:00"]),
|
||||
}
|
||||
)
|
||||
|
||||
result = datasource.normalize_df(df, query_object)
|
||||
|
||||
assert (
|
||||
result["created"].tolist()
|
||||
== pd.to_datetime(["2020-01-01 04:00:00", "2020-01-02 04:00:00"]).tolist()
|
||||
)
|
||||
assert (
|
||||
result["expired"].tolist()
|
||||
== pd.to_datetime(["2020-06-01 16:00:00", "2020-06-02 16:00:00"]).tolist()
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_df_offset_skips_unconfigured_integer_temporal_columns() -> None:
|
||||
"""The offset extension for native-datetime temporal columns must not touch a
|
||||
temporal column whose values arrive as plain integers with no declared
|
||||
format: such a column cannot be safely interpreted as a datetime, so it is
|
||||
left untouched rather than reinterpreted as nanoseconds (see issue #23167)."""
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_datetime64_any_dtype
|
||||
|
||||
from superset.models.helpers import ExploreMixin
|
||||
|
||||
int_col = MagicMock(
|
||||
column_name="ts",
|
||||
is_dttm=True,
|
||||
python_date_format=None,
|
||||
datetime_format=None,
|
||||
)
|
||||
datasource = MagicMock()
|
||||
datasource.offset = 4
|
||||
datasource.enforce_numerical_metrics = False
|
||||
datasource.columns = [int_col]
|
||||
datasource.get_column = lambda name: {"ts": int_col}.get(name)
|
||||
for method in (
|
||||
"_python_date_format",
|
||||
"_collect_dttm_labels",
|
||||
"_offset_only_dttm_cols",
|
||||
"normalize_df",
|
||||
):
|
||||
setattr(datasource, method, getattr(ExploreMixin, method).__get__(datasource))
|
||||
|
||||
query_object = MagicMock()
|
||||
query_object.columns = ["ts"]
|
||||
query_object.granularity = None
|
||||
query_object.time_shift = None
|
||||
|
||||
df = pd.DataFrame({"ts": [1577836800, 1609459200, 1640995200]})
|
||||
|
||||
result = datasource.normalize_df(df, query_object)
|
||||
|
||||
assert not is_datetime64_any_dtype(result["ts"])
|
||||
assert result["ts"].tolist() == [1577836800, 1609459200, 1640995200]
|
||||
|
||||
|
||||
def test_adhoc_column_to_sqla_returns_type_from_column_metadata(
|
||||
database: Database,
|
||||
) -> None:
|
||||
|
||||
Reference in New Issue
Block a user