refactor: remove unused flatten function (#20582)

This commit is contained in:
Yongjie Zhao
2022-07-01 19:17:55 +08:00
committed by GitHub
parent 290b89c7b4
commit b870a21eaa
17 changed files with 28 additions and 192 deletions

View File

@@ -188,8 +188,6 @@ def test_compare_after_pivot():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
count_metric sum_metric

View File

@@ -83,8 +83,6 @@ def test_cum_after_pivot_with_single_metric():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
"""
sum_metric
@@ -127,8 +125,6 @@ def test_cum_after_pivot_with_multiple_metrics():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
count_metric sum_metric

View File

@@ -17,86 +17,12 @@
import numpy as np
import pytest
from pandas import DataFrame, Timestamp, to_datetime
from pandas import DataFrame, to_datetime
from superset.exceptions import InvalidPostProcessingError
from superset.utils.pandas_postprocessing import _flatten_column_after_pivot, pivot
from tests.unit_tests.fixtures.dataframes import categories_df, single_metric_df
from tests.unit_tests.pandas_postprocessing.utils import (
AGGREGATES_MULTIPLE,
AGGREGATES_SINGLE,
)
def test_flatten_column_after_pivot():
"""
Test pivot column flattening function
"""
# single aggregate cases
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column="idx_nulls",
)
== "idx_nulls"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=1234,
)
== "1234"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=Timestamp("2020-09-29T00:00:00"),
)
== "2020-09-29 00:00:00"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column="idx_nulls",
)
== "idx_nulls"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=("idx_nulls", "col1"),
)
== "col1"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE,
column=("idx_nulls", "col1", 1234),
)
== "col1, 1234"
)
# Multiple aggregate cases
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_MULTIPLE,
column=("idx_nulls", "asc_idx", "col1"),
)
== "idx_nulls, asc_idx, col1"
)
assert (
_flatten_column_after_pivot(
aggregates=AGGREGATES_MULTIPLE,
column=("idx_nulls", "asc_idx", "col1", 1234),
)
== "idx_nulls, asc_idx, col1, 1234"
)
from superset.utils.pandas_postprocessing import flatten, pivot
from tests.unit_tests.fixtures.dataframes import categories_df
from tests.unit_tests.pandas_postprocessing.utils import AGGREGATES_SINGLE
def test_pivot_without_columns():
@@ -108,9 +34,9 @@ def test_pivot_without_columns():
index=["name"],
aggregates=AGGREGATES_SINGLE,
)
assert df.columns.tolist() == ["name", "idx_nulls"]
assert df.columns.tolist() == ["idx_nulls"]
assert len(df) == 101
assert df.sum()[1] == 1050
assert df["idx_nulls"].sum() == 1050
def test_pivot_with_single_column():
@@ -123,9 +49,13 @@ def test_pivot_with_single_column():
columns=["category"],
aggregates=AGGREGATES_SINGLE,
)
assert df.columns.tolist() == ["name", "cat0", "cat1", "cat2"]
assert df.columns.tolist() == [
("idx_nulls", "cat0"),
("idx_nulls", "cat1"),
("idx_nulls", "cat2"),
]
assert len(df) == 101
assert df.sum()[1] == 315
assert df["idx_nulls"]["cat0"].sum() == 315
df = pivot(
df=categories_df,
@@ -133,7 +63,11 @@ def test_pivot_with_single_column():
columns=["category"],
aggregates=AGGREGATES_SINGLE,
)
assert df.columns.tolist() == ["dept", "cat0", "cat1", "cat2"]
assert df.columns.tolist() == [
("idx_nulls", "cat0"),
("idx_nulls", "cat1"),
("idx_nulls", "cat2"),
]
assert len(df) == 5
@@ -147,6 +81,7 @@ def test_pivot_with_multiple_columns():
columns=["category", "dept"],
aggregates=AGGREGATES_SINGLE,
)
df = flatten(df)
assert len(df.columns) == 1 + 3 * 5 # index + possible permutations
@@ -161,7 +96,7 @@ def test_pivot_fill_values():
metric_fill_value=1,
aggregates={"idx_nulls": {"operator": "sum"}},
)
assert df.sum()[1] == 382
assert df["idx_nulls"]["cat0"].sum() == 382
def test_pivot_fill_column_values():
@@ -177,7 +112,7 @@ def test_pivot_fill_column_values():
aggregates={"idx_nulls": {"operator": "sum"}},
)
assert len(df) == 101
assert df.columns.tolist() == ["name", "<NULL>"]
assert df.columns.tolist() == [("idx_nulls", "<NULL>")]
def test_pivot_exceptions():
@@ -234,8 +169,9 @@ def test_pivot_eliminate_cartesian_product_columns():
aggregates={"metric": {"operator": "mean"}},
drop_missing_columns=False,
)
assert list(df.columns) == ["dttm", "0, 0", "1, 1"]
assert np.isnan(df["1, 1"][0])
df = flatten(df)
assert list(df.columns) == ["dttm", "metric, 0, 0", "metric, 1, 1"]
assert np.isnan(df["metric, 1, 1"][0])
# multiple metrics
mock_df = DataFrame(
@@ -258,6 +194,7 @@ def test_pivot_eliminate_cartesian_product_columns():
},
drop_missing_columns=False,
)
df = flatten(df)
assert list(df.columns) == [
"dttm",
"metric, 0, 0",
@@ -266,21 +203,3 @@ def test_pivot_eliminate_cartesian_product_columns():
"metric2, 1, 1",
]
assert np.isnan(df["metric, 1, 1"][0])
def test_pivot_without_flatten_columns_and_reset_index():
df = pivot(
df=single_metric_df,
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
# metric
# country UK US
# dttm
# 2019-01-01 5 6
# 2019-01-02 7 8
assert df.columns.to_list() == [("sum_metric", "UK"), ("sum_metric", "US")]
assert df.index.to_list() == to_datetime(["2019-01-01", "2019-01-02"]).to_list()

View File

@@ -110,8 +110,6 @@ def test_resample_after_pivot():
aggregates={
"val": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
val

View File

@@ -113,8 +113,6 @@ def test_rolling_should_empty_df():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
rolling_df = pp.rolling(
df=pivot_df,
@@ -132,8 +130,6 @@ def test_rolling_after_pivot_with_single_metric():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
flatten_columns=False,
reset_index=False,
)
"""
sum_metric
@@ -182,8 +178,6 @@ def test_rolling_after_pivot_with_multiple_metrics():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
flatten_columns=False,
reset_index=False,
)
"""
count_metric sum_metric