Files
superset2/tests/unit_tests/common/test_dataframe_utils.py
2026-06-09 10:28:34 -07:00

76 lines
3.4 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime
from unittest.mock import MagicMock
import pandas as pd
from superset.common.utils import dataframe_utils
def test_is_datetime_series():
assert not dataframe_utils.is_datetime_series(None)
assert not dataframe_utils.is_datetime_series(pd.DataFrame({"foo": [1]}))
assert not dataframe_utils.is_datetime_series(pd.Series([1, 2, 3]))
assert not dataframe_utils.is_datetime_series(pd.Series(["1", "2", "3"]))
assert not dataframe_utils.is_datetime_series(pd.Series())
assert not dataframe_utils.is_datetime_series(pd.Series([None, None]))
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2), None])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2)])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2), None])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2)])
)
assert dataframe_utils.is_datetime_series(
pd.date_range(datetime.date(2018, 1, 1), datetime.date(2018, 2, 1)).to_series()
)
assert dataframe_utils.is_datetime_series(
pd.date_range(
datetime.datetime(2018, 1, 1), datetime.datetime(2018, 2, 1)
).to_series()
)
def test_df_metrics_to_num_converts_string_numerics():
"""Test that string-encoded numeric columns (e.g. from ClickHouse) are converted."""
query_object = MagicMock()
query_object.metric_names = ["sum_col", "mixed_col", "text_col"]
df = pd.DataFrame(
{
"sum_col": pd.Series(["100", "200", "300"], dtype=object),
"mixed_col": pd.Series(["1", "not_a_number", "3"], dtype=object),
"text_col": pd.Series(["foo", "bar", "baz"], dtype=object),
"dim_col": pd.Series(["a", "b", "c"], dtype=object),
}
)
dataframe_utils.df_metrics_to_num(df, query_object)
# sum_col: all numeric strings -> should be converted to numeric
assert pd.api.types.is_numeric_dtype(df["sum_col"]), "sum_col should be numeric"
assert df["sum_col"].tolist() == [100.0, 200.0, 300.0]
# mixed_col: has non-numeric value -> should NOT be converted
assert df["mixed_col"].dtype == object, "mixed_col should remain object"
# text_col: all non-numeric -> should NOT be converted
assert df["text_col"].dtype == object, "text_col should remain object"
# dim_col: not in metric_names -> should NOT be touched
assert df["dim_col"].dtype == object, "dim_col should not be touched"