mirror of
https://github.com/apache/superset.git
synced 2026-04-12 12:47:53 +00:00
222 lines
8.5 KiB
Python
222 lines
8.5 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from operator import eq, ge, gt, le, lt, ne
|
|
from timeit import default_timer
|
|
from typing import Any
|
|
from uuid import UUID
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from celery.exceptions import SoftTimeLimitExceeded
|
|
from flask import current_app as app
|
|
from flask_babel import lazy_gettext as _
|
|
|
|
from superset import jinja_context, security_manager
|
|
from superset.commands.base import BaseCommand
|
|
from superset.commands.report.exceptions import (
|
|
AlertQueryError,
|
|
AlertQueryInvalidTypeError,
|
|
AlertQueryMultipleColumnsError,
|
|
AlertQueryMultipleRowsError,
|
|
AlertQueryTimeout,
|
|
AlertValidatorConfigError,
|
|
)
|
|
from superset.reports.models import ReportSchedule, ReportScheduleValidatorType
|
|
from superset.tasks.utils import get_executor
|
|
from superset.utils import json
|
|
from superset.utils.core import override_user
|
|
from superset.utils.decorators import logs_context
|
|
from superset.utils.retries import retry_call
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
ALERT_SQL_LIMIT = 2
|
|
# All sql statements have an applied LIMIT,
|
|
# to avoid heavy loads done by a user mistake
|
|
OPERATOR_FUNCTIONS = {">=": ge, ">": gt, "<=": le, "<": lt, "==": eq, "!=": ne}
|
|
|
|
|
|
class AlertCommand(BaseCommand):
|
|
def __init__(self, report_schedule: ReportSchedule, execution_id: UUID):
|
|
self._report_schedule = report_schedule
|
|
self._execution_id = execution_id
|
|
self._result: float | None = None
|
|
|
|
def run(self) -> bool:
|
|
"""
|
|
Executes an alert SQL query and validates it.
|
|
Will set the report_schedule.last_value or last_value_row_json
|
|
with the query result
|
|
|
|
:return: bool, if the alert triggered or not
|
|
:raises AlertQueryError: SQL query is not valid
|
|
:raises AlertQueryInvalidTypeError: The output from the SQL query
|
|
is not an allowed type
|
|
:raises AlertQueryMultipleColumnsError: The SQL query returned multiple columns
|
|
:raises AlertQueryMultipleRowsError: The SQL query returned multiple rows
|
|
:raises AlertQueryTimeout: The SQL query received a celery soft timeout
|
|
:raises AlertValidatorConfigError: The validator query data is not valid
|
|
"""
|
|
self.validate()
|
|
|
|
if self._is_validator_not_null:
|
|
self._report_schedule.last_value_row_json = str(self._result)
|
|
return self._result not in (0, None, np.nan)
|
|
self._report_schedule.last_value = self._result
|
|
try:
|
|
operator = json.loads(self._report_schedule.validator_config_json)["op"]
|
|
threshold = json.loads(self._report_schedule.validator_config_json)[
|
|
"threshold"
|
|
]
|
|
return OPERATOR_FUNCTIONS[operator](self._result, threshold) # type: ignore
|
|
except (KeyError, json.JSONDecodeError) as ex:
|
|
raise AlertValidatorConfigError() from ex
|
|
|
|
def _validate_not_null(self, rows: np.recarray[Any, Any]) -> None:
|
|
self._validate_result(rows)
|
|
self._result = rows[0][1]
|
|
|
|
@staticmethod
|
|
def _validate_result(rows: np.recarray[Any, Any]) -> None:
|
|
# check if query return more than one row
|
|
if len(rows) > 1:
|
|
raise AlertQueryMultipleRowsError(
|
|
message=_(
|
|
"Alert query returned more than one row. %(num_rows)s rows returned", # noqa: E501
|
|
num_rows=len(rows),
|
|
)
|
|
)
|
|
# check if query returned more than one column
|
|
if len(rows[0]) > 2:
|
|
raise AlertQueryMultipleColumnsError(
|
|
# len is subtracted by 1 to discard pandas index column
|
|
_(
|
|
"Alert query returned more than one column. "
|
|
"%(num_cols)s columns returned",
|
|
num_cols=(len(rows[0]) - 1),
|
|
)
|
|
)
|
|
|
|
def _validate_operator(self, rows: np.recarray[Any, Any]) -> None:
|
|
self._validate_result(rows)
|
|
if rows[0][1] in (0, None, np.nan):
|
|
self._result = 0.0
|
|
return
|
|
try:
|
|
# Check if it's float or if we can convert it
|
|
self._result = float(rows[0][1])
|
|
return
|
|
except (AssertionError, TypeError, ValueError) as ex:
|
|
raise AlertQueryInvalidTypeError() from ex
|
|
|
|
@property
|
|
def _is_validator_not_null(self) -> bool:
|
|
return (
|
|
self._report_schedule.validator_type == ReportScheduleValidatorType.NOT_NULL
|
|
)
|
|
|
|
@property
|
|
def _is_validator_operator(self) -> bool:
|
|
return (
|
|
self._report_schedule.validator_type == ReportScheduleValidatorType.OPERATOR
|
|
)
|
|
|
|
def _get_alert_metadata_from_object(self) -> dict[str, Any]:
|
|
return {
|
|
"report_schedule_id": self._report_schedule.id,
|
|
"execution_id": self._execution_id,
|
|
}
|
|
|
|
@logs_context(context_func=_get_alert_metadata_from_object)
|
|
def _execute_query(self) -> pd.DataFrame:
|
|
"""
|
|
Executes the actual alert SQL query template
|
|
|
|
:return: A pandas dataframe
|
|
:raises AlertQueryError: SQL query is not valid
|
|
:raises AlertQueryTimeout: The SQL query received a celery soft timeout
|
|
"""
|
|
sql_template = jinja_context.get_template_processor(
|
|
database=self._report_schedule.database
|
|
)
|
|
rendered_sql = sql_template.process_template(self._report_schedule.sql)
|
|
try:
|
|
limited_rendered_sql = self._report_schedule.database.apply_limit_to_sql(
|
|
rendered_sql, ALERT_SQL_LIMIT
|
|
)
|
|
|
|
if app.config["MUTATE_ALERT_QUERY"]:
|
|
limited_rendered_sql = (
|
|
self._report_schedule.database.mutate_sql_based_on_config(
|
|
limited_rendered_sql
|
|
)
|
|
)
|
|
|
|
executor, username = get_executor( # pylint: disable=unused-variable
|
|
executors=app.config["ALERT_REPORTS_EXECUTORS"],
|
|
model=self._report_schedule,
|
|
)
|
|
user = security_manager.find_user(username)
|
|
with override_user(user):
|
|
start = default_timer()
|
|
df = self._report_schedule.database.get_df(sql=limited_rendered_sql)
|
|
stop = default_timer()
|
|
logger.info(
|
|
"Query for %s took %.2f ms",
|
|
self._execution_id,
|
|
(stop - start) * 1000.0,
|
|
)
|
|
return df
|
|
except SoftTimeLimitExceeded as ex:
|
|
logger.warning("A timeout occurred while executing the alert query: %s", ex)
|
|
raise AlertQueryTimeout() from ex
|
|
except Exception as ex:
|
|
logger.warning("An error occurred when running alert query")
|
|
# The exception message here can reveal to much information to malicious
|
|
# users, so we raise a generic message.
|
|
raise AlertQueryError(
|
|
message=_("An error occurred when running alert query")
|
|
) from ex
|
|
|
|
def validate(self) -> None:
|
|
"""
|
|
Validate the query result as a Pandas DataFrame
|
|
"""
|
|
# When there are transient errors when executing queries, users will get
|
|
# notified with the error stacktrace which can be avoided by retrying
|
|
df = retry_call(
|
|
self._execute_query,
|
|
exception=AlertQueryError,
|
|
max_tries=app.config["ALERT_REPORTS_QUERY_EXECUTION_MAX_TRIES"],
|
|
)
|
|
|
|
if df.empty and self._is_validator_not_null:
|
|
self._result = None
|
|
return
|
|
if df.empty and self._is_validator_operator:
|
|
self._result = 0.0
|
|
return
|
|
rows = df.to_records()
|
|
if self._is_validator_not_null:
|
|
self._validate_not_null(rows)
|
|
return
|
|
self._validate_operator(rows)
|