feat(chart-data-api): make pivoted columns flattenable (#10255)

* feat(chart-data-api): make pivoted columns flattenable

* Linting + improve tests
This commit is contained in:
Ville Brofeldt
2020-07-08 13:35:53 +03:00
committed by GitHub
parent 4252770d50
commit baeacc3c56
3 changed files with 134 additions and 21 deletions

View File

@@ -26,6 +26,12 @@ from superset.utils import pandas_postprocessing as proc
from .base_tests import SupersetTestCase
from .fixtures.dataframes import categories_df, lonlat_df, timeseries_df
AGGREGATES_SINGLE = {"idx_nulls": {"operator": "sum"}}
AGGREGATES_MULTIPLE = {
"idx_nulls": {"operator": "sum"},
"asc_idx": {"operator": "mean"},
}
def series_to_list(series: Series) -> List[Any]:
"""
@@ -57,33 +63,99 @@ def round_floats(
class TestPostProcessing(SupersetTestCase):
def test_pivot(self):
aggregates = {"idx_nulls": {"operator": "sum"}}
def test_flatten_column_after_pivot(self):
"""
Test pivot column flattening function
"""
# single aggregate cases
self.assertEqual(
proc._flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE, column="idx_nulls",
),
"idx_nulls",
)
self.assertEqual(
proc._flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE, column=("idx_nulls", "col1"),
),
"col1",
)
self.assertEqual(
proc._flatten_column_after_pivot(
aggregates=AGGREGATES_SINGLE, column=("idx_nulls", "col1", "col2"),
),
"col1, col2",
)
# regular pivot
# Multiple aggregate cases
self.assertEqual(
proc._flatten_column_after_pivot(
aggregates=AGGREGATES_MULTIPLE, column=("idx_nulls", "asc_idx", "col1"),
),
"idx_nulls, asc_idx, col1",
)
self.assertEqual(
proc._flatten_column_after_pivot(
aggregates=AGGREGATES_MULTIPLE,
column=("idx_nulls", "asc_idx", "col1", "col2"),
),
"idx_nulls, asc_idx, col1, col2",
)
def test_pivot_without_columns(self):
"""
Make sure pivot without columns returns correct DataFrame
"""
df = proc.pivot(df=categories_df, index=["name"], aggregates=AGGREGATES_SINGLE,)
self.assertListEqual(
df.columns.tolist(), ["name", "idx_nulls"],
)
self.assertEqual(len(df), 101)
self.assertEqual(df.sum()[1], 1050)
def test_pivot_with_single_column(self):
"""
Make sure pivot with single column returns correct DataFrame
"""
df = proc.pivot(
df=categories_df,
index=["name"],
columns=["category"],
aggregates=aggregates,
aggregates=AGGREGATES_SINGLE,
)
self.assertListEqual(
df.columns.tolist(),
[("idx_nulls", "cat0"), ("idx_nulls", "cat1"), ("idx_nulls", "cat2")],
df.columns.tolist(), ["name", "cat0", "cat1", "cat2"],
)
self.assertEqual(len(df), 101)
self.assertEqual(df.sum()[0], 315)
self.assertEqual(df.sum()[1], 315)
# regular pivot
df = proc.pivot(
df=categories_df,
index=["dept"],
columns=["category"],
aggregates=aggregates,
aggregates=AGGREGATES_SINGLE,
)
self.assertListEqual(
df.columns.tolist(), ["dept", "cat0", "cat1", "cat2"],
)
self.assertEqual(len(df), 5)
# fill value
def test_pivot_with_multiple_columns(self):
"""
Make sure pivot with multiple columns returns correct DataFrame
"""
df = proc.pivot(
df=categories_df,
index=["name"],
columns=["category", "dept"],
aggregates=AGGREGATES_SINGLE,
)
self.assertEqual(len(df.columns), 1 + 3 * 5) # index + possible permutations
def test_pivot_fill_values(self):
"""
Make sure pivot with fill values returns correct DataFrame
"""
df = proc.pivot(
df=categories_df,
index=["name"],
@@ -91,7 +163,20 @@ class TestPostProcessing(SupersetTestCase):
metric_fill_value=1,
aggregates={"idx_nulls": {"operator": "sum"}},
)
self.assertEqual(df.sum()[0], 382)
self.assertEqual(df.sum()[1], 382)
def test_pivot_exceptions(self):
"""
Make sure pivot raises correct Exceptions
"""
# Missing index
self.assertRaises(
TypeError,
proc.pivot,
df=categories_df,
columns=["dept"],
aggregates=AGGREGATES_SINGLE,
)
# invalid index reference
self.assertRaises(
@@ -100,7 +185,7 @@ class TestPostProcessing(SupersetTestCase):
df=categories_df,
index=["abc"],
columns=["dept"],
aggregates=aggregates,
aggregates=AGGREGATES_SINGLE,
)
# invalid column reference
@@ -110,7 +195,7 @@ class TestPostProcessing(SupersetTestCase):
df=categories_df,
index=["dept"],
columns=["abc"],
aggregates=aggregates,
aggregates=AGGREGATES_SINGLE,
)
# invalid aggregate options