feat: add resample operator in post processing (#16607)

* feat: add resample operator in post processing

* wip

* fill zero values

* updates

* fix ut
This commit is contained in:
Yongjie Zhao
2021-09-17 09:40:59 +01:00
committed by GitHub
parent a4f60010a1
commit cc1c6c1bb6
3 changed files with 46 additions and 0 deletions

View File

@@ -747,6 +747,7 @@ class ChartDataPostProcessingOperationSchema(Schema):
"sort",
"diff",
"compare",
"resample",
)
),
example="aggregate",

View File

@@ -915,3 +915,29 @@ def boxplot(
for metric in metrics
}
return aggregate(df, groupby=groupby, aggregates=aggregates)
def resample(
df: DataFrame,
rule: str,
method: str,
time_column: str,
fill_value: Optional[Union[float, int]] = None,
) -> DataFrame:
"""
resample a timeseries dataframe.
:param df: DataFrame to resample.
:param rule: The offset string representing target conversion.
:param method: How to fill the NaN value after resample.
:param time_column: existing columns in DataFrame.
:param fill_value: What values do fill missing.
:return: DataFrame after resample
:raises QueryObjectValidationError: If the request in incorrect
"""
df = df.set_index(time_column)
if method == "asfreq" and fill_value is not None:
df = df.resample(rule).asfreq(fill_value=fill_value)
else:
df = getattr(df.resample(rule), method)()
return df.reset_index()

View File

@@ -870,3 +870,22 @@ class TestPostProcessing(SupersetTestCase):
metrics=["cars"],
percentiles=[10, 90, 10],
)
def test_resample(self):
df = timeseries_df.copy()
df.index.name = "time_column"
df.reset_index(inplace=True)
post_df = proc.resample(
df=df, rule="1D", method="ffill", time_column="time_column",
)
self.assertListEqual(
post_df["label"].tolist(), ["x", "y", "y", "y", "z", "z", "q"]
)
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0])
post_df = proc.resample(
df=df, rule="1D", method="asfreq", time_column="time_column", fill_value=0,
)
self.assertListEqual(post_df["label"].tolist(), ["x", "y", 0, 0, "z", 0, "q"])
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 0, 0, 3.0, 0, 4.0])