mirror of
https://github.com/apache/superset.git
synced 2026-06-12 19:19:20 +00:00
Co-authored-by: Emmanuela Opurum <youremail@example.com> Co-authored-by: Đỗ Trọng Hải <41283691+hainenber@users.noreply.github.com>
76 lines
3.4 KiB
Python
76 lines
3.4 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
import datetime
|
|
from unittest.mock import MagicMock
|
|
|
|
import pandas as pd
|
|
|
|
from superset.common.utils import dataframe_utils
|
|
|
|
|
|
def test_is_datetime_series():
|
|
assert not dataframe_utils.is_datetime_series(None)
|
|
assert not dataframe_utils.is_datetime_series(pd.DataFrame({"foo": [1]}))
|
|
assert not dataframe_utils.is_datetime_series(pd.Series([1, 2, 3]))
|
|
assert not dataframe_utils.is_datetime_series(pd.Series(["1", "2", "3"]))
|
|
assert not dataframe_utils.is_datetime_series(pd.Series())
|
|
assert not dataframe_utils.is_datetime_series(pd.Series([None, None]))
|
|
assert dataframe_utils.is_datetime_series(
|
|
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2), None])
|
|
)
|
|
assert dataframe_utils.is_datetime_series(
|
|
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2)])
|
|
)
|
|
assert dataframe_utils.is_datetime_series(
|
|
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2), None])
|
|
)
|
|
assert dataframe_utils.is_datetime_series(
|
|
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2)])
|
|
)
|
|
assert dataframe_utils.is_datetime_series(
|
|
pd.date_range(datetime.date(2018, 1, 1), datetime.date(2018, 2, 1)).to_series()
|
|
)
|
|
assert dataframe_utils.is_datetime_series(
|
|
pd.date_range(
|
|
datetime.datetime(2018, 1, 1), datetime.datetime(2018, 2, 1)
|
|
).to_series()
|
|
)
|
|
|
|
|
|
def test_df_metrics_to_num_converts_string_numerics():
|
|
"""Test that string-encoded numeric columns (e.g. from ClickHouse) are converted."""
|
|
query_object = MagicMock()
|
|
query_object.metric_names = ["sum_col", "mixed_col", "text_col"]
|
|
df = pd.DataFrame(
|
|
{
|
|
"sum_col": pd.Series(["100", "200", "300"], dtype=object),
|
|
"mixed_col": pd.Series(["1", "not_a_number", "3"], dtype=object),
|
|
"text_col": pd.Series(["foo", "bar", "baz"], dtype=object),
|
|
"dim_col": pd.Series(["a", "b", "c"], dtype=object),
|
|
}
|
|
)
|
|
dataframe_utils.df_metrics_to_num(df, query_object)
|
|
# sum_col: all numeric strings -> should be converted to numeric
|
|
assert pd.api.types.is_numeric_dtype(df["sum_col"]), "sum_col should be numeric"
|
|
assert df["sum_col"].tolist() == [100.0, 200.0, 300.0]
|
|
# mixed_col: has non-numeric value -> should NOT be converted
|
|
assert df["mixed_col"].dtype == object, "mixed_col should remain object"
|
|
# text_col: all non-numeric -> should NOT be converted
|
|
assert df["text_col"].dtype == object, "text_col should remain object"
|
|
# dim_col: not in metric_names -> should NOT be touched
|
|
assert df["dim_col"].dtype == object, "dim_col should not be touched"
|