mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
fix(utils): Suppress pandas date parsing warnings in normalize_dttm_col (#35042)
This commit is contained in:
@@ -34,6 +34,7 @@ import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
import uuid
|
||||
import warnings
|
||||
import zlib
|
||||
from collections.abc import Iterable, Iterator, Sequence
|
||||
from contextlib import closing, contextmanager
|
||||
@@ -110,6 +111,7 @@ from superset.utils.backports import StrEnum
|
||||
from superset.utils.database import get_example_database
|
||||
from superset.utils.date_parser import parse_human_timedelta
|
||||
from superset.utils.hashing import md5_sha_from_dict, md5_sha_from_str
|
||||
from superset.utils.pandas import detect_datetime_format
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.connectors.sqla.models import BaseDatasource, TableColumn
|
||||
@@ -1858,6 +1860,62 @@ class DateColumn:
|
||||
)
|
||||
|
||||
|
||||
def _process_datetime_column(
|
||||
df: pd.DataFrame,
|
||||
col: DateColumn,
|
||||
) -> None:
|
||||
"""Process a single datetime column with format detection."""
|
||||
if col.timestamp_format in ("epoch_s", "epoch_ms"):
|
||||
dttm_series = df[col.col_label]
|
||||
if is_numeric_dtype(dttm_series):
|
||||
# Column is formatted as a numeric value
|
||||
unit = col.timestamp_format.replace("epoch_", "")
|
||||
df[col.col_label] = pd.to_datetime(
|
||||
dttm_series,
|
||||
utc=False,
|
||||
unit=unit,
|
||||
origin="unix",
|
||||
errors="coerce",
|
||||
exact=False,
|
||||
)
|
||||
else:
|
||||
# Column has already been formatted as a timestamp.
|
||||
try:
|
||||
df[col.col_label] = dttm_series.apply(
|
||||
lambda x: pd.Timestamp(x) if pd.notna(x) else pd.NaT
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
"Unable to convert column %s to datetime, ignoring",
|
||||
col.col_label,
|
||||
)
|
||||
else:
|
||||
# Try to detect format if not specified
|
||||
format_to_use = col.timestamp_format or detect_datetime_format(
|
||||
df[col.col_label]
|
||||
)
|
||||
|
||||
# Parse with or without format (suppress warning if no format)
|
||||
if format_to_use:
|
||||
df[col.col_label] = pd.to_datetime(
|
||||
df[col.col_label],
|
||||
utc=False,
|
||||
format=format_to_use,
|
||||
errors="coerce",
|
||||
exact=False,
|
||||
)
|
||||
else:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", message=".*Could not infer format.*")
|
||||
df[col.col_label] = pd.to_datetime(
|
||||
df[col.col_label],
|
||||
utc=False,
|
||||
format=None,
|
||||
errors="coerce",
|
||||
exact=False,
|
||||
)
|
||||
|
||||
|
||||
def normalize_dttm_col(
|
||||
df: pd.DataFrame,
|
||||
dttm_cols: tuple[DateColumn, ...] = tuple(), # noqa: C408
|
||||
@@ -1866,38 +1924,8 @@ def normalize_dttm_col(
|
||||
if _col.col_label not in df.columns:
|
||||
continue
|
||||
|
||||
if _col.timestamp_format in ("epoch_s", "epoch_ms"):
|
||||
dttm_series = df[_col.col_label]
|
||||
if is_numeric_dtype(dttm_series):
|
||||
# Column is formatted as a numeric value
|
||||
unit = _col.timestamp_format.replace("epoch_", "")
|
||||
df[_col.col_label] = pd.to_datetime(
|
||||
dttm_series,
|
||||
utc=False,
|
||||
unit=unit,
|
||||
origin="unix",
|
||||
errors="coerce",
|
||||
exact=False,
|
||||
)
|
||||
else:
|
||||
# Column has already been formatted as a timestamp.
|
||||
try:
|
||||
df[_col.col_label] = dttm_series.apply(
|
||||
lambda x: pd.Timestamp(x) if pd.notna(x) else pd.NaT
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
"Unable to convert column %s to datetime, ignoring",
|
||||
_col.col_label,
|
||||
)
|
||||
else:
|
||||
df[_col.col_label] = pd.to_datetime(
|
||||
df[_col.col_label],
|
||||
utc=False,
|
||||
format=_col.timestamp_format,
|
||||
errors="coerce",
|
||||
exact=False,
|
||||
)
|
||||
_process_datetime_column(df, _col)
|
||||
|
||||
if _col.offset:
|
||||
df[_col.col_label] += timedelta(hours=_col.offset)
|
||||
if _col.time_shift is not None:
|
||||
|
||||
Reference in New Issue
Block a user