mirror of
https://github.com/apache/superset.git
synced 2026-04-09 19:35:21 +00:00
77 lines
3.1 KiB
Python
77 lines
3.1 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
from decimal import Decimal
|
|
from typing import List, Optional
|
|
|
|
from flask_babel import gettext as _
|
|
from pandas import DataFrame
|
|
|
|
from superset.exceptions import QueryObjectValidationError
|
|
from superset.utils.core import PostProcessingContributionOrientation
|
|
from superset.utils.pandas_postprocessing.utils import validate_column_args
|
|
|
|
|
|
@validate_column_args("columns")
|
|
def contribution(
|
|
df: DataFrame,
|
|
orientation: Optional[
|
|
PostProcessingContributionOrientation
|
|
] = PostProcessingContributionOrientation.COLUMN,
|
|
columns: Optional[List[str]] = None,
|
|
rename_columns: Optional[List[str]] = None,
|
|
) -> DataFrame:
|
|
"""
|
|
Calculate cell contibution to row/column total for numeric columns.
|
|
Non-numeric columns will be kept untouched.
|
|
|
|
If `columns` are specified, only calculate contributions on selected columns.
|
|
|
|
:param df: DataFrame containing all-numeric data (temporal column ignored)
|
|
:param columns: Columns to calculate values from.
|
|
:param rename_columns: The new labels for the calculated contribution columns.
|
|
The original columns will not be removed.
|
|
:param orientation: calculate by dividing cell with row/column total
|
|
:return: DataFrame with contributions.
|
|
"""
|
|
contribution_df = df.copy()
|
|
numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
|
|
numeric_df.fillna(0, inplace=True)
|
|
# verify column selections
|
|
if columns:
|
|
numeric_columns = numeric_df.columns.tolist()
|
|
for col in columns:
|
|
if col not in numeric_columns:
|
|
raise QueryObjectValidationError(
|
|
_(
|
|
'Column "%(column)s" is not numeric or does not '
|
|
"exists in the query results.",
|
|
column=col,
|
|
)
|
|
)
|
|
columns = columns or numeric_df.columns
|
|
rename_columns = rename_columns or columns
|
|
if len(rename_columns) != len(columns):
|
|
raise QueryObjectValidationError(
|
|
_("`rename_columns` must have the same length as `columns`.")
|
|
)
|
|
# limit to selected columns
|
|
numeric_df = numeric_df[columns]
|
|
axis = 0 if orientation == PostProcessingContributionOrientation.COLUMN else 1
|
|
numeric_df = numeric_df / numeric_df.values.sum(axis=axis, keepdims=True)
|
|
contribution_df[rename_columns] = numeric_df
|
|
return contribution_df
|