mirror of
https://github.com/apache/superset.git
synced 2026-04-23 18:14:56 +00:00
fix(reports): improve error handling for report schedule execution (#35800)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
3167a0dbc0
commit
c42e3c6837
@@ -177,19 +177,35 @@ class BaseReportState:
|
||||
"""
|
||||
Creates a Report execution log, uses the current computed last_value for Alerts
|
||||
"""
|
||||
log = ReportExecutionLog(
|
||||
scheduled_dttm=self._scheduled_dttm,
|
||||
start_dttm=self._start_dttm,
|
||||
end_dttm=datetime.utcnow(),
|
||||
value=self._report_schedule.last_value,
|
||||
value_row_json=self._report_schedule.last_value_row_json,
|
||||
state=self._report_schedule.last_state,
|
||||
error_message=error_message,
|
||||
report_schedule=self._report_schedule,
|
||||
uuid=self._execution_id,
|
||||
)
|
||||
db.session.add(log)
|
||||
db.session.commit() # pylint: disable=consider-using-transaction
|
||||
from sqlalchemy.orm.exc import StaleDataError
|
||||
|
||||
try:
|
||||
log = ReportExecutionLog(
|
||||
scheduled_dttm=self._scheduled_dttm,
|
||||
start_dttm=self._start_dttm,
|
||||
end_dttm=datetime.utcnow(),
|
||||
value=self._report_schedule.last_value,
|
||||
value_row_json=self._report_schedule.last_value_row_json,
|
||||
state=self._report_schedule.last_state,
|
||||
error_message=error_message,
|
||||
report_schedule=self._report_schedule,
|
||||
uuid=self._execution_id,
|
||||
)
|
||||
db.session.add(log)
|
||||
db.session.commit() # pylint: disable=consider-using-transaction
|
||||
except StaleDataError as ex:
|
||||
# Report schedule was modified or deleted by another process
|
||||
db.session.rollback()
|
||||
logger.warning(
|
||||
"Report schedule (execution %s) was modified or deleted "
|
||||
"during execution. This can occur when a report is deleted "
|
||||
"while running.",
|
||||
self._execution_id,
|
||||
)
|
||||
raise ReportScheduleUnexpectedError(
|
||||
"Report schedule was modified or deleted by another process "
|
||||
"during execution"
|
||||
) from ex
|
||||
|
||||
def _get_url(
|
||||
self,
|
||||
@@ -743,7 +759,7 @@ class ReportNotTriggeredErrorState(BaseReportState):
|
||||
current_states = [ReportState.NOOP, ReportState.ERROR]
|
||||
initial = True
|
||||
|
||||
def next(self) -> None:
|
||||
def next(self) -> None: # noqa: C901
|
||||
self.update_report_schedule_and_log(ReportState.WORKING)
|
||||
try:
|
||||
# If it's an alert check if the alert is triggered
|
||||
@@ -758,9 +774,21 @@ class ReportNotTriggeredErrorState(BaseReportState):
|
||||
if isinstance(first_ex, SupersetErrorsException):
|
||||
error_message = ";".join([error.message for error in first_ex.errors])
|
||||
|
||||
self.update_report_schedule_and_log(
|
||||
ReportState.ERROR, error_message=error_message
|
||||
)
|
||||
try:
|
||||
self.update_report_schedule_and_log(
|
||||
ReportState.ERROR, error_message=error_message
|
||||
)
|
||||
except ReportScheduleUnexpectedError as logging_ex:
|
||||
# Logging failed (likely StaleDataError), but we still want to
|
||||
# raise the original error so the root cause remains visible
|
||||
logger.warning(
|
||||
"Failed to log error for report schedule (execution %s) "
|
||||
"due to database issue",
|
||||
self._execution_id,
|
||||
exc_info=True,
|
||||
)
|
||||
# Re-raise the original exception, not the logging failure
|
||||
raise first_ex from logging_ex
|
||||
|
||||
# TODO (dpgaspar) convert this logic to a new state eg: ERROR_ON_GRACE
|
||||
if not self.is_in_error_grace_period():
|
||||
@@ -776,12 +804,26 @@ class ReportNotTriggeredErrorState(BaseReportState):
|
||||
second_error_message = ";".join(
|
||||
[error.message for error in second_ex.errors]
|
||||
)
|
||||
except ReportScheduleUnexpectedError:
|
||||
# send_error failed due to logging issue, log and continue
|
||||
# to raise the original error
|
||||
logger.warning(
|
||||
"Failed to send error notification due to database issue",
|
||||
exc_info=True,
|
||||
)
|
||||
except Exception as second_ex: # pylint: disable=broad-except
|
||||
second_error_message = str(second_ex)
|
||||
finally:
|
||||
self.update_report_schedule_and_log(
|
||||
ReportState.ERROR, error_message=second_error_message
|
||||
)
|
||||
try:
|
||||
self.update_report_schedule_and_log(
|
||||
ReportState.ERROR, error_message=second_error_message
|
||||
)
|
||||
except ReportScheduleUnexpectedError:
|
||||
# Logging failed again, log it but don't let it hide first_ex
|
||||
logger.warning(
|
||||
"Failed to log final error state due to database issue",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@@ -851,9 +893,22 @@ class ReportSuccessState(BaseReportState):
|
||||
self.send()
|
||||
self.update_report_schedule_and_log(ReportState.SUCCESS)
|
||||
except Exception as ex: # pylint: disable=broad-except
|
||||
self.update_report_schedule_and_log(
|
||||
ReportState.ERROR, error_message=str(ex)
|
||||
)
|
||||
try:
|
||||
self.update_report_schedule_and_log(
|
||||
ReportState.ERROR, error_message=str(ex)
|
||||
)
|
||||
except ReportScheduleUnexpectedError as logging_ex:
|
||||
# Logging failed (likely StaleDataError), but we still want to
|
||||
# raise the original error so the root cause remains visible
|
||||
logger.warning(
|
||||
"Failed to log error for report schedule (execution %s) "
|
||||
"due to database issue",
|
||||
self._execution_id,
|
||||
exc_info=True,
|
||||
)
|
||||
# Re-raise the original exception, not the logging failure
|
||||
raise ex from logging_ex
|
||||
raise
|
||||
|
||||
|
||||
class ReportScheduleStateMachine: # pylint: disable=too-few-public-methods
|
||||
|
||||
@@ -20,8 +20,10 @@ from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
from flask import current_app
|
||||
from sqlalchemy.orm.exc import StaleDataError
|
||||
|
||||
from superset.commands.dashboard.permalink.create import CreateDashboardPermalinkCommand
|
||||
from superset.commands.report.exceptions import ReportScheduleUnexpectedError
|
||||
from superset.commands.report.execute import AsyncExecuteReportScheduleCommand
|
||||
from superset.models.dashboard import Dashboard
|
||||
from superset.reports.models import ReportSourceFormat
|
||||
@@ -118,3 +120,40 @@ def test_report_with_header_data(
|
||||
assert header_data.get("notification_source") == ReportSourceFormat.DASHBOARD
|
||||
assert header_data.get("notification_type") == report_schedule.type
|
||||
assert len(send_email_smtp_mock.call_args.kwargs["header_data"]) == 8
|
||||
|
||||
|
||||
@patch("superset.reports.notifications.email.send_email_smtp")
|
||||
@patch("superset.commands.report.execute.DashboardScreenshot")
|
||||
@pytest.mark.usefixtures("login_as_admin")
|
||||
def test_report_schedule_stale_data_error_preserves_cause(
|
||||
dashboard_screenshot_mock: MagicMock,
|
||||
send_email_smtp_mock: MagicMock,
|
||||
tabbed_dashboard: Dashboard, # noqa: F811
|
||||
) -> None:
|
||||
"""
|
||||
Test that when db.session.commit raises StaleDataError during logging,
|
||||
we surface ReportScheduleUnexpectedError while preserving the original
|
||||
StaleDataError as the cause.
|
||||
"""
|
||||
dashboard_screenshot_mock.get_screenshot.return_value = b"test-image"
|
||||
current_app.config["ALERT_REPORTS_NOTIFICATION_DRY_RUN"] = False
|
||||
|
||||
with create_dashboard_report(
|
||||
dashboard=tabbed_dashboard,
|
||||
extra={},
|
||||
name="test stale data error",
|
||||
) as report_schedule:
|
||||
# Mock db.session.commit to raise StaleDataError during log creation
|
||||
with patch("superset.db.session.commit") as mock_commit:
|
||||
mock_commit.side_effect = StaleDataError("test stale data")
|
||||
|
||||
# Execute the report and expect ReportScheduleUnexpectedError
|
||||
with pytest.raises(ReportScheduleUnexpectedError) as exc_info:
|
||||
AsyncExecuteReportScheduleCommand(
|
||||
str(uuid4()), report_schedule.id, datetime.utcnow()
|
||||
).run()
|
||||
|
||||
# Verify the original StaleDataError is preserved as the cause
|
||||
assert exc_info.value.__cause__ is not None
|
||||
assert isinstance(exc_info.value.__cause__, StaleDataError)
|
||||
assert str(exc_info.value.__cause__) == "test stale data"
|
||||
|
||||
Reference in New Issue
Block a user