mirror of
https://github.com/apache/superset.git
synced 2026-04-07 18:35:15 +00:00
feat: add resample operator in post processing (#16607)
* feat: add resample operator in post processing * wip * fill zero values * updates * fix ut
This commit is contained in:
@@ -747,6 +747,7 @@ class ChartDataPostProcessingOperationSchema(Schema):
|
||||
"sort",
|
||||
"diff",
|
||||
"compare",
|
||||
"resample",
|
||||
)
|
||||
),
|
||||
example="aggregate",
|
||||
|
||||
@@ -915,3 +915,29 @@ def boxplot(
|
||||
for metric in metrics
|
||||
}
|
||||
return aggregate(df, groupby=groupby, aggregates=aggregates)
|
||||
|
||||
|
||||
def resample(
|
||||
df: DataFrame,
|
||||
rule: str,
|
||||
method: str,
|
||||
time_column: str,
|
||||
fill_value: Optional[Union[float, int]] = None,
|
||||
) -> DataFrame:
|
||||
"""
|
||||
resample a timeseries dataframe.
|
||||
|
||||
:param df: DataFrame to resample.
|
||||
:param rule: The offset string representing target conversion.
|
||||
:param method: How to fill the NaN value after resample.
|
||||
:param time_column: existing columns in DataFrame.
|
||||
:param fill_value: What values do fill missing.
|
||||
:return: DataFrame after resample
|
||||
:raises QueryObjectValidationError: If the request in incorrect
|
||||
"""
|
||||
df = df.set_index(time_column)
|
||||
if method == "asfreq" and fill_value is not None:
|
||||
df = df.resample(rule).asfreq(fill_value=fill_value)
|
||||
else:
|
||||
df = getattr(df.resample(rule), method)()
|
||||
return df.reset_index()
|
||||
|
||||
@@ -870,3 +870,22 @@ class TestPostProcessing(SupersetTestCase):
|
||||
metrics=["cars"],
|
||||
percentiles=[10, 90, 10],
|
||||
)
|
||||
|
||||
def test_resample(self):
|
||||
df = timeseries_df.copy()
|
||||
df.index.name = "time_column"
|
||||
df.reset_index(inplace=True)
|
||||
|
||||
post_df = proc.resample(
|
||||
df=df, rule="1D", method="ffill", time_column="time_column",
|
||||
)
|
||||
self.assertListEqual(
|
||||
post_df["label"].tolist(), ["x", "y", "y", "y", "z", "z", "q"]
|
||||
)
|
||||
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0])
|
||||
|
||||
post_df = proc.resample(
|
||||
df=df, rule="1D", method="asfreq", time_column="time_column", fill_value=0,
|
||||
)
|
||||
self.assertListEqual(post_df["label"].tolist(), ["x", "y", 0, 0, "z", 0, "q"])
|
||||
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 0, 0, 3.0, 0, 4.0])
|
||||
|
||||
Reference in New Issue
Block a user