feat: run extra query on QueryObject and add compare operator for post_processing (#15279)

* rebase master and resolve conflicts

* pylint to makefile

* fix crash when pivot operator

* fix comments

* add precision argument

* query test

* wip

* fix ut

* rename

* set time_offsets to cache key

wip

* refactor get_df_payload

wip

* extra query cache

* cache ut

* normalize df

* fix timeoffset

* fix ut

* make cache key logging sense

* resolve conflicts

* backend follow up iteration 1

wip

* rolling window type

* rebase master

* py lint and minor follow ups

* pylintrc
This commit is contained in:
Yongjie Zhao
2021-07-28 15:34:39 +01:00
committed by GitHub
parent bdfc2dc9d5
commit 32d2aa0c40
17 changed files with 744 additions and 149 deletions

View File

@@ -38,6 +38,7 @@ from .fixtures.dataframes import (
names_df,
timeseries_df,
prophet_df,
timeseries_df2,
)
AGGREGATES_SINGLE = {"idx_nulls": {"operator": "sum"}}
@@ -422,6 +423,64 @@ class TestPostProcessing(SupersetTestCase):
columns={"abc": "abc"},
)
# diff by columns
post_df = proc.diff(df=timeseries_df2, columns={"y": "y", "z": "z"}, axis=1)
self.assertListEqual(post_df.columns.tolist(), ["label", "y", "z"])
self.assertListEqual(series_to_list(post_df["z"]), [0.0, 2.0, 8.0, 6.0])
def test_compare(self):
# `absolute` comparison
post_df = proc.compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="absolute",
)
self.assertListEqual(
post_df.columns.tolist(), ["label", "y", "z", "absolute__y__z",]
)
self.assertListEqual(
series_to_list(post_df["absolute__y__z"]), [0.0, -2.0, -8.0, -6.0],
)
# drop original columns
post_df = proc.compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="absolute",
drop_original_columns=True,
)
self.assertListEqual(post_df.columns.tolist(), ["label", "absolute__y__z",])
# `percentage` comparison
post_df = proc.compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="percentage",
)
self.assertListEqual(
post_df.columns.tolist(), ["label", "y", "z", "percentage__y__z",]
)
self.assertListEqual(
series_to_list(post_df["percentage__y__z"]), [0.0, -1.0, -4.0, -3],
)
# `ratio` comparison
post_df = proc.compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="ratio",
)
self.assertListEqual(
post_df.columns.tolist(), ["label", "y", "z", "ratio__y__z",]
)
self.assertListEqual(
series_to_list(post_df["ratio__y__z"]), [1.0, 0.5, 0.2, 0.25],
)
def test_cum(self):
# create new column (cumsum)
post_df = proc.cum(df=timeseries_df, columns={"y": "y2"}, operator="sum",)