mirror of
https://github.com/apache/superset.git
synced 2026-04-20 08:34:37 +00:00
refactor: decouple pandas postprocessing operator (#18710)
This commit is contained in:
75
superset/utils/pandas_postprocessing/contribution.py
Normal file
75
superset/utils/pandas_postprocessing/contribution.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from decimal import Decimal
|
||||
from typing import List, Optional
|
||||
|
||||
from flask_babel import gettext as _
|
||||
from pandas import DataFrame
|
||||
|
||||
from superset.exceptions import QueryObjectValidationError
|
||||
from superset.utils.core import PostProcessingContributionOrientation
|
||||
from superset.utils.pandas_postprocessing.utils import validate_column_args
|
||||
|
||||
|
||||
@validate_column_args("columns")
|
||||
def contribution(
|
||||
df: DataFrame,
|
||||
orientation: Optional[
|
||||
PostProcessingContributionOrientation
|
||||
] = PostProcessingContributionOrientation.COLUMN,
|
||||
columns: Optional[List[str]] = None,
|
||||
rename_columns: Optional[List[str]] = None,
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Calculate cell contibution to row/column total for numeric columns.
|
||||
Non-numeric columns will be kept untouched.
|
||||
|
||||
If `columns` are specified, only calculate contributions on selected columns.
|
||||
|
||||
:param df: DataFrame containing all-numeric data (temporal column ignored)
|
||||
:param columns: Columns to calculate values from.
|
||||
:param rename_columns: The new labels for the calculated contribution columns.
|
||||
The original columns will not be removed.
|
||||
:param orientation: calculate by dividing cell with row/column total
|
||||
:return: DataFrame with contributions.
|
||||
"""
|
||||
contribution_df = df.copy()
|
||||
numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
|
||||
# verify column selections
|
||||
if columns:
|
||||
numeric_columns = numeric_df.columns.tolist()
|
||||
for col in columns:
|
||||
if col not in numeric_columns:
|
||||
raise QueryObjectValidationError(
|
||||
_(
|
||||
'Column "%(column)s" is not numeric or does not '
|
||||
"exists in the query results.",
|
||||
column=col,
|
||||
)
|
||||
)
|
||||
columns = columns or numeric_df.columns
|
||||
rename_columns = rename_columns or columns
|
||||
if len(rename_columns) != len(columns):
|
||||
raise QueryObjectValidationError(
|
||||
_("`rename_columns` must have the same length as `columns`.")
|
||||
)
|
||||
# limit to selected columns
|
||||
numeric_df = numeric_df[columns]
|
||||
axis = 0 if orientation == PostProcessingContributionOrientation.COLUMN else 1
|
||||
numeric_df = numeric_df / numeric_df.values.sum(axis=axis, keepdims=True)
|
||||
contribution_df[rename_columns] = numeric_df
|
||||
return contribution_df
|
||||
Reference in New Issue
Block a user