fix(sqllab): validate results backend writes and enhance 410 diagnostics (#36222)

This commit is contained in:
Daniel Vaz Gaspar
2025-11-21 15:05:48 +00:00
committed by GitHub
parent 979d385eea
commit 348b19cb4c
3 changed files with 211 additions and 22 deletions

View File

@@ -59,30 +59,18 @@ class SqlExecutionResultsCommand(BaseCommand):
)
)
read_from_results_backend_start = now_as_float()
self._blob = results_backend.get(self._key)
app.config["STATS_LOGGER"].timing(
"sqllab.query.results_backend_read",
now_as_float() - read_from_results_backend_start,
)
if not self._blob:
raise SupersetErrorException(
SupersetError(
message=__(
"Data could not be retrieved from the results backend. You "
"need to re-run the original query."
),
error_type=SupersetErrorType.RESULTS_BACKEND_ERROR,
level=ErrorLevel.ERROR,
),
status=410,
)
stats_logger = app.config["STATS_LOGGER"]
# Check if query exists in database first (fast, avoids unnecessary S3 call)
self._query = (
db.session.query(Query).filter_by(results_key=self._key).one_or_none()
)
if self._query is None:
logger.warning(
"404 Error - Query not found in database for key: %s",
self._key,
)
stats_logger.incr("sqllab.results_backend.404_query_not_found")
raise SupersetErrorException(
SupersetError(
message=__(
@@ -95,6 +83,43 @@ class SqlExecutionResultsCommand(BaseCommand):
status=404,
)
# Now fetch results from backend (query exists, so this is a valid request)
read_from_results_backend_start = now_as_float()
self._blob = results_backend.get(self._key)
stats_logger.timing(
"sqllab.query.results_backend_read",
now_as_float() - read_from_results_backend_start,
)
if not self._blob:
# Query exists in DB but results not in S3 - enhanced diagnostics
query_age_seconds = now_as_float() - (
self._query.end_time if self._query.end_time else now_as_float()
)
logger.warning(
"410 Error - Query exists in DB but results not in results backend"
" Query ID: %s, Status: %s, Age: %.2f seconds, "
"End time: %s, Results key: %s",
self._query.id,
self._query.status,
query_age_seconds,
self._query.end_time,
self._key,
)
stats_logger.incr("sqllab.results_backend.410_results_missing")
raise SupersetErrorException(
SupersetError(
message=__(
"Data could not be retrieved from the results backend. You "
"need to re-run the original query."
),
error_type=SupersetErrorType.RESULTS_BACKEND_ERROR,
level=ErrorLevel.ERROR,
),
status=410,
)
def run(
self,
) -> dict[str, Any]: