mirror of
https://github.com/apache/superset.git
synced 2026-05-28 11:15:24 +00:00
Follow-up to #40231 (merged), where a reviewer flagged a function-body `from datetime import datetime, timedelta` instead of a top-of-file import. Adds a `ruff-import-placement` pre-commit hook running `ruff check --select PLC0415 --preview --no-fix`. Per @rusackas's pushback on the first cut of this PR — which spammed 2,657 `# noqa: PLC0415` annotations across ~410 files without fixing anything — this revision is a much smaller surface area: 1. **Per-file-ignores** for whole directories where function-body imports are a deliberate pattern, not an oversight: - `superset/cli/**` and `scripts/**`: subcommand-deferred imports keep heavy modules out of the CLI startup path. - `superset/tasks/**`: Celery task bodies defer imports of the modules they orchestrate. - `superset/migrations/versions/**`: Alembic migrations interact with model state at runtime, not at module load. - `superset/mcp_service/**`: MCP tools lazy-load resources on invocation so the server can register many tools without paying their import cost at startup. - `superset/db_engine_specs/**`: engine specs defer driver imports so optional DB drivers don't have to be installed. - `superset/initialization/__init__.py`, `superset/extensions/__init__.py`, `superset/app.py`: the app-factory and extension wiring are intentionally full of circular-import workarounds. - `tests/**`: test files routinely defer imports for fixture isolation; the rule still applies to production code. 2. **Per-line `# noqa: PLC0415`** on the 259 remaining genuine circular-import sites (security/manager.py, sql/execution/executor.py, semantic_layers/labels.py, tags/core.py, core_api_injection.py, etc.). These are foundational modules where moving the imports up would actually break things. Net result: ~410 files / 2,657 grandfathered → ~73 files / 259 actual noqa annotations. The rule still catches every new function-body import outside the explicitly-allowed directories. Also: silences a pre-existing C901 on `mcp_service/sql_lab/tool/execute_sql.py` that fires under newer local ruff but not CI's pinned ruff 0.9.7 — blocks the local pre-commit run otherwise. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
171 lines
6.8 KiB
Python
171 lines
6.8 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
import logging
|
|
from typing import Optional, Union
|
|
|
|
import pandas as pd
|
|
from flask_babel import gettext as _
|
|
from pandas import DataFrame
|
|
|
|
from superset.exceptions import InvalidPostProcessingError
|
|
from superset.utils.core import DTTM_ALIAS
|
|
from superset.utils.decorators import suppress_logging
|
|
from superset.utils.pandas_postprocessing.utils import PROPHET_TIME_GRAIN_MAP
|
|
|
|
|
|
def _prophet_parse_seasonality(
|
|
input_value: Optional[Union[bool, int]],
|
|
) -> Union[bool, str, int]:
|
|
if input_value is None:
|
|
return "auto"
|
|
if isinstance(input_value, bool):
|
|
return input_value
|
|
try:
|
|
return int(input_value)
|
|
except ValueError:
|
|
return input_value
|
|
|
|
|
|
def _prophet_fit_and_predict( # pylint: disable=too-many-arguments
|
|
df: DataFrame,
|
|
confidence_interval: float,
|
|
yearly_seasonality: Union[bool, str, int],
|
|
weekly_seasonality: Union[bool, str, int],
|
|
daily_seasonality: Union[bool, str, int],
|
|
periods: int,
|
|
freq: str,
|
|
) -> DataFrame:
|
|
"""
|
|
Fit a prophet model and return a DataFrame with predicted results.
|
|
"""
|
|
try:
|
|
# `prophet` complains about `plotly` not being installed
|
|
with suppress_logging("prophet.plot"):
|
|
# pylint: disable=import-outside-toplevel
|
|
from prophet import Prophet # noqa: PLC0415
|
|
|
|
prophet_logger = logging.getLogger("prophet.plot")
|
|
prophet_logger.setLevel(logging.CRITICAL)
|
|
prophet_logger.setLevel(logging.NOTSET)
|
|
except ModuleNotFoundError as ex:
|
|
raise InvalidPostProcessingError(_("`prophet` package not installed")) from ex
|
|
model = Prophet(
|
|
interval_width=confidence_interval,
|
|
yearly_seasonality=yearly_seasonality,
|
|
weekly_seasonality=weekly_seasonality,
|
|
daily_seasonality=daily_seasonality,
|
|
)
|
|
if df["ds"].dt.tz:
|
|
df["ds"] = df["ds"].dt.tz_convert(None)
|
|
model.fit(df)
|
|
future = model.make_future_dataframe(periods=periods, freq=freq)
|
|
forecast = model.predict(future)[["ds", "yhat", "yhat_lower", "yhat_upper"]]
|
|
return forecast.join(df.set_index("ds"), on="ds").set_index(["ds"])
|
|
|
|
|
|
def prophet( # pylint: disable=too-many-arguments
|
|
df: DataFrame,
|
|
time_grain: str,
|
|
periods: int,
|
|
confidence_interval: float,
|
|
yearly_seasonality: Optional[Union[bool, int]] = None,
|
|
weekly_seasonality: Optional[Union[bool, int]] = None,
|
|
daily_seasonality: Optional[Union[bool, int]] = None,
|
|
index: Optional[str] = None,
|
|
) -> DataFrame:
|
|
"""
|
|
Add forecasts to each series in a timeseries dataframe, along with confidence
|
|
intervals for the prediction. For each series, the operation creates three
|
|
new columns with the column name suffixed with the following values:
|
|
|
|
- `__yhat`: the forecast for the given date
|
|
- `__yhat_lower`: the lower bound of the forecast for the given date
|
|
- `__yhat_upper`: the upper bound of the forecast for the given date
|
|
|
|
|
|
:param df: DataFrame containing all-numeric data (temporal column ignored)
|
|
:param time_grain: Time grain used to specify time period increments in prediction
|
|
:param periods: Time periods (in units of `time_grain`) to predict into the future
|
|
:param confidence_interval: Width of predicted confidence interval
|
|
:param yearly_seasonality: Should yearly seasonality be applied.
|
|
An integer value will specify Fourier order of seasonality.
|
|
:param weekly_seasonality: Should weekly seasonality be applied.
|
|
An integer value will specify Fourier order of seasonality, `None` will
|
|
automatically detect seasonality.
|
|
:param daily_seasonality: Should daily seasonality be applied.
|
|
An integer value will specify Fourier order of seasonality, `None` will
|
|
automatically detect seasonality.
|
|
:param index: the name of the column containing the x-axis data
|
|
:return: DataFrame with contributions, with temporal column at beginning if present
|
|
"""
|
|
index = index or DTTM_ALIAS
|
|
# validate inputs
|
|
if not time_grain:
|
|
raise InvalidPostProcessingError(_("Time grain missing"))
|
|
if time_grain not in PROPHET_TIME_GRAIN_MAP:
|
|
raise InvalidPostProcessingError(
|
|
_(
|
|
"Unsupported time grain: %(time_grain)s",
|
|
time_grain=time_grain,
|
|
)
|
|
)
|
|
freq = PROPHET_TIME_GRAIN_MAP[time_grain]
|
|
# check type at runtime due to marshmallow schema not being able to handle
|
|
# union types
|
|
if not isinstance(periods, int) or periods < 0:
|
|
raise InvalidPostProcessingError(_("Periods must be a whole number"))
|
|
if not confidence_interval or confidence_interval <= 0 or confidence_interval >= 1:
|
|
raise InvalidPostProcessingError(
|
|
_("Confidence interval must be between 0 and 1 (exclusive)")
|
|
)
|
|
if index not in df.columns:
|
|
raise InvalidPostProcessingError(_("DataFrame must include temporal column"))
|
|
if len(df.columns) < 2:
|
|
raise InvalidPostProcessingError(_("DataFrame include at least one series"))
|
|
|
|
target_df = DataFrame()
|
|
|
|
for column in [
|
|
column
|
|
for column in df.columns
|
|
if column != index
|
|
and pd.to_numeric(df[column], errors="coerce").notnull().all()
|
|
]:
|
|
fit_df = _prophet_fit_and_predict(
|
|
df=df[[index, column]].rename(columns={index: "ds", column: "y"}),
|
|
confidence_interval=confidence_interval,
|
|
yearly_seasonality=_prophet_parse_seasonality(yearly_seasonality),
|
|
weekly_seasonality=_prophet_parse_seasonality(weekly_seasonality),
|
|
daily_seasonality=_prophet_parse_seasonality(daily_seasonality),
|
|
periods=periods,
|
|
freq=freq,
|
|
)
|
|
new_columns = [
|
|
f"{column}__yhat",
|
|
f"{column}__yhat_lower",
|
|
f"{column}__yhat_upper",
|
|
f"{column}",
|
|
]
|
|
fit_df.columns = new_columns
|
|
if target_df.empty:
|
|
target_df = fit_df
|
|
else:
|
|
for new_column in new_columns:
|
|
target_df = target_df.assign(**{new_column: fit_df[new_column]})
|
|
target_df.reset_index(level=0, inplace=True)
|
|
return target_df.rename(columns={"ds": index})
|