fix: to_datetime in Pandas 2 (#24952)

This commit is contained in:
Beto Dealmeida
2023-08-10 19:32:15 -07:00
committed by GitHub
parent ce65a3b9cd
commit 41ca4a00b9
3 changed files with 41 additions and 5 deletions

View File

@@ -17,11 +17,14 @@
import os
from typing import Any, Optional
import pandas as pd
import pytest
from superset.utils.core import (
cast_to_boolean,
DateColumn,
is_test,
normalize_dttm_col,
parse_boolean_string,
QueryObjectFilterClause,
remove_extra_adhoc_filters,
@@ -171,3 +174,30 @@ def test_other_values():
assert cast_to_boolean([]) is False
assert cast_to_boolean({}) is False
assert cast_to_boolean(object()) is False
def test_normalize_dttm_col() -> None:
"""
Tests for the ``normalize_dttm_col`` function.
In particular, this covers a regression when Pandas was upgraded from 1.5.3 to
2.0.3 and the behavior of ``pd.to_datetime`` changed.
"""
df = pd.DataFrame({"__time": ["2017-07-01T00:00:00.000Z"]})
assert (
df.to_markdown()
== """
| | __time |
|---:|:-------------------------|
| 0 | 2017-07-01T00:00:00.000Z |
""".strip()
)
# in 1.5.3 this would return a datetime64[ns] dtype, but in 2.0.3 we had to
# add ``exact=False`` since there is a leftover after parsing the format
dttm_cols = (DateColumn("__time", "%Y-%m-%d"),)
# the function modifies the dataframe in place
normalize_dttm_col(df, dttm_cols)
assert df["__time"].astype(str).tolist() == ["2017-07-01"]