feat(advanced analysis): support MultiIndex column in post processing stage (#19116)

This commit is contained in:
Yongjie Zhao
2022-03-23 13:46:28 +08:00
committed by GitHub
parent 6083545e86
commit 375c03e084
55 changed files with 1267 additions and 772 deletions

View File

@@ -18,11 +18,14 @@
*/
import {
buildQueryContext,
DTTM_ALIAS,
PostProcessingResample,
QueryFormData,
} from '@superset-ui/core';
import { rollingWindowOperator } from '@superset-ui/chart-controls';
import {
flattenOperator,
rollingWindowOperator,
sortOperator,
} from '@superset-ui/chart-controls';
const TIME_GRAIN_MAP: Record<string, string> = {
PT1S: 'S',
@@ -47,12 +50,10 @@ const TIME_GRAIN_MAP: Record<string, string> = {
export default function buildQuery(formData: QueryFormData) {
return buildQueryContext(formData, baseQueryObject => {
// todo: move into full advanced analysis section here
const rollingProc = rollingWindowOperator(formData, baseQueryObject);
if (rollingProc) {
rollingProc.options = { ...rollingProc.options, is_pivot_df: false };
}
const { time_grain_sqla } = formData;
let resampleProc: PostProcessingResample | undefined;
let resampleProc: PostProcessingResample;
if (rollingProc && time_grain_sqla) {
const rule = TIME_GRAIN_MAP[time_grain_sqla];
if (rule) {
@@ -62,7 +63,6 @@ export default function buildQuery(formData: QueryFormData) {
method: 'asfreq',
rule,
fill_value: null,
time_column: DTTM_ALIAS,
},
};
}
@@ -72,16 +72,10 @@ export default function buildQuery(formData: QueryFormData) {
...baseQueryObject,
is_timeseries: true,
post_processing: [
{
operation: 'sort',
options: {
columns: {
[DTTM_ALIAS]: true,
},
},
},
sortOperator(formData, baseQueryObject),
resampleProc,
rollingProc,
flattenOperator(formData, baseQueryObject),
],
},
];

View File

@@ -22,7 +22,7 @@ import {
QueryObject,
normalizeOrderBy,
} from '@superset-ui/core';
import { pivotOperator } from '@superset-ui/chart-controls';
import { flattenOperator, pivotOperator } from '@superset-ui/chart-controls';
export default function buildQuery(formData: QueryFormData) {
const {
@@ -66,6 +66,7 @@ export default function buildQuery(formData: QueryFormData) {
is_timeseries: true,
post_processing: [
pivotOperator(formData1, { ...baseQueryObject, is_timeseries: true }),
flattenOperator(formData1, { ...baseQueryObject, is_timeseries: true }),
],
} as QueryObject;
return [normalizeOrderBy(queryObjectA)];
@@ -77,6 +78,7 @@ export default function buildQuery(formData: QueryFormData) {
is_timeseries: true,
post_processing: [
pivotOperator(formData2, { ...baseQueryObject, is_timeseries: true }),
flattenOperator(formData2, { ...baseQueryObject, is_timeseries: true }),
],
} as QueryObject;
return [normalizeOrderBy(queryObjectB)];

View File

@@ -22,42 +22,54 @@ import {
ensureIsArray,
QueryFormData,
normalizeOrderBy,
RollingType,
PostProcessingPivot,
} from '@superset-ui/core';
import {
rollingWindowOperator,
timeCompareOperator,
isValidTimeCompare,
sortOperator,
pivotOperator,
resampleOperator,
contributionOperator,
prophetOperator,
timeComparePivotOperator,
flattenOperator,
} from '@superset-ui/chart-controls';
export default function buildQuery(formData: QueryFormData) {
const { x_axis, groupby } = formData;
const is_timeseries = x_axis === DTTM_ALIAS || !x_axis;
return buildQueryContext(formData, baseQueryObject => {
const pivotOperatorInRuntime: PostProcessingPivot | undefined =
pivotOperator(formData, {
...baseQueryObject,
index: x_axis,
is_timeseries,
});
if (
pivotOperatorInRuntime &&
Object.values(RollingType).includes(formData.rolling_type)
) {
pivotOperatorInRuntime.options = {
...pivotOperatorInRuntime.options,
...{
flatten_columns: false,
reset_index: false,
},
};
}
/* the `pivotOperatorInRuntime` determines how to pivot the dataframe returned from the raw query.
1. If it's a time compared query, there will return a pivoted dataframe that append time compared metrics. for instance:
MAX(value) MAX(value)__1 year ago MIN(value) MIN(value)__1 year ago
city LA LA LA LA
__timestamp
2015-01-01 568.0 671.0 5.0 6.0
2015-02-01 407.0 649.0 4.0 3.0
2015-03-01 318.0 465.0 0.0 3.0
2. If it's a normal query, there will return a pivoted dataframe.
MAX(value) MIN(value)
city LA LA
__timestamp
2015-01-01 568.0 5.0
2015-02-01 407.0 4.0
2015-03-01 318.0 0.0
*/
const pivotOperatorInRuntime: PostProcessingPivot = isValidTimeCompare(
formData,
baseQueryObject,
)
? timeComparePivotOperator(formData, baseQueryObject)
: pivotOperator(formData, {
...baseQueryObject,
index: x_axis,
is_timeseries,
});
return [
{
@@ -70,13 +82,16 @@ export default function buildQuery(formData: QueryFormData) {
time_offsets: isValidTimeCompare(formData, baseQueryObject)
? formData.time_compare
: [],
/* Note that:
1. The resample, rolling, cum, timeCompare operators should be after pivot.
2. the flatOperator makes multiIndex Dataframe into flat Dataframe
*/
post_processing: [
resampleOperator(formData, baseQueryObject),
timeCompareOperator(formData, baseQueryObject),
sortOperator(formData, { ...baseQueryObject, is_timeseries: true }),
// in order to be able to rolling in multiple series, must do pivot before rollingOperator
pivotOperatorInRuntime,
rollingWindowOperator(formData, baseQueryObject),
timeCompareOperator(formData, baseQueryObject),
resampleOperator(formData, baseQueryObject),
flattenOperator(formData, baseQueryObject),
contributionOperator(formData, baseQueryObject),
prophetOperator(formData, baseQueryObject),
],