diff --git a/superset/commands/sql_lab/results.py b/superset/commands/sql_lab/results.py index cf89c3d237d..0ba5100cada 100644 --- a/superset/commands/sql_lab/results.py +++ b/superset/commands/sql_lab/results.py @@ -59,30 +59,18 @@ class SqlExecutionResultsCommand(BaseCommand): ) ) - read_from_results_backend_start = now_as_float() - self._blob = results_backend.get(self._key) - app.config["STATS_LOGGER"].timing( - "sqllab.query.results_backend_read", - now_as_float() - read_from_results_backend_start, - ) - - if not self._blob: - raise SupersetErrorException( - SupersetError( - message=__( - "Data could not be retrieved from the results backend. You " - "need to re-run the original query." - ), - error_type=SupersetErrorType.RESULTS_BACKEND_ERROR, - level=ErrorLevel.ERROR, - ), - status=410, - ) + stats_logger = app.config["STATS_LOGGER"] + # Check if query exists in database first (fast, avoids unnecessary S3 call) self._query = ( db.session.query(Query).filter_by(results_key=self._key).one_or_none() ) if self._query is None: + logger.warning( + "404 Error - Query not found in database for key: %s", + self._key, + ) + stats_logger.incr("sqllab.results_backend.404_query_not_found") raise SupersetErrorException( SupersetError( message=__( @@ -95,6 +83,43 @@ class SqlExecutionResultsCommand(BaseCommand): status=404, ) + # Now fetch results from backend (query exists, so this is a valid request) + read_from_results_backend_start = now_as_float() + self._blob = results_backend.get(self._key) + stats_logger.timing( + "sqllab.query.results_backend_read", + now_as_float() - read_from_results_backend_start, + ) + + if not self._blob: + # Query exists in DB but results not in S3 - enhanced diagnostics + query_age_seconds = now_as_float() - ( + self._query.end_time if self._query.end_time else now_as_float() + ) + logger.warning( + "410 Error - Query exists in DB but results not in results backend" + " Query ID: %s, Status: %s, Age: %.2f seconds, " + "End time: %s, Results key: %s", + self._query.id, + self._query.status, + query_age_seconds, + self._query.end_time, + self._key, + ) + stats_logger.incr("sqllab.results_backend.410_results_missing") + + raise SupersetErrorException( + SupersetError( + message=__( + "Data could not be retrieved from the results backend. You " + "need to re-run the original query." + ), + error_type=SupersetErrorType.RESULTS_BACKEND_ERROR, + level=ErrorLevel.ERROR, + ), + status=410, + ) + def run( self, ) -> dict[str, Any]: diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 5de5164d8aa..8ce20da5bb6 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -582,10 +582,50 @@ def execute_sql_statements( # noqa: C901 "*** serialized payload size: %i", getsizeof(serialized_payload) ) logger.debug("*** compressed payload size: %i", getsizeof(compressed)) - results_backend.set(key, compressed, cache_timeout) - query.results_key = key - query.status = QueryStatus.SUCCESS + # Store results in backend and check if write succeeded + write_success = results_backend.set(key, compressed, cache_timeout) + if not write_success: + # Backend write failed - log error and don't set results_key + logger.error( + "Query %s: Failed to store results in backend, key: %s", + str(query_id), + key, + ) + stats_logger.incr("sqllab.results_backend.write_failure") + # Don't set results_key to prevent 410 errors when fetching + query.results_key = None + + # For async queries (not returning results inline), mark as FAILED + # because results are inaccessible to the user + if not return_results: + query.status = QueryStatus.FAILED + query.error_message = ( + "Failed to store query results in the results backend. " + "Please try again or contact your administrator." + ) + db.session.commit() + raise SupersetErrorException( + SupersetError( + message=__( + "Failed to store query results. Please try again." + ), + error_type=SupersetErrorType.RESULTS_BACKEND_ERROR, + level=ErrorLevel.ERROR, + ) + ) + else: + # Write succeeded - set results_key in database + query.results_key = key + logger.info( + "Query %s: Successfully stored results in backend, key: %s", + str(query_id), + key, + ) + + # Only set SUCCESS if we didn't already set FAILED above + if query.status != QueryStatus.FAILED: + query.status = QueryStatus.SUCCESS db.session.commit() if return_results: diff --git a/tests/integration_tests/sql_lab/test_execute_sql_statements.py b/tests/integration_tests/sql_lab/test_execute_sql_statements.py index bea0d2b1406..752ad1e674d 100644 --- a/tests/integration_tests/sql_lab/test_execute_sql_statements.py +++ b/tests/integration_tests/sql_lab/test_execute_sql_statements.py @@ -14,8 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from unittest.mock import MagicMock, patch + from flask import current_app +from superset import db from superset.common.db_query_status import QueryStatus from superset.models.core import Database from superset.models.sql_lab import Query @@ -54,3 +57,124 @@ def test_non_async_execute(non_async_example_db: Database, example_query: Query) if non_async_example_db.db_engine_spec.engine_name == "hive": assert example_query.tracking_url_raw + + +@patch("superset.sql_lab.results_backend") +def test_results_backend_write_failure( + mock_results_backend: MagicMock, + async_example_db: Database, + example_query: Query, +): + """Test async query marked FAILED when results_backend.set() False""" + import pytest + + from superset.exceptions import SupersetErrorException + + # Mock results backend to simulate write failure + mock_results_backend.set.return_value = False + + # Execute query with store_results=True, return_results=False (async mode) + # Should raise exception because results can't be stored + with pytest.raises(SupersetErrorException) as exc_info: + execute_sql_statements( + example_query.id, + "select 1 as foo;", + store_results=True, + return_results=False, + start_time=now_as_float(), + expand_data=False, + log_params=dict(), # noqa: C408 + ) + + # Verify exception message + assert "Failed to store query results" in str(exc_info.value.error.message) + + # Refresh query from database to get updated state + db.session.refresh(example_query) + + # Assert query status is FAILED (results inaccessible for async queries) + assert example_query.status == QueryStatus.FAILED + + # Assert results_key is None (because backend write failed) + assert example_query.results_key is None + + # Assert error message is set + assert "Failed to store query results" in example_query.error_message + + # Assert backend.set() was called + assert mock_results_backend.set.called + + +@patch("superset.sql_lab.results_backend") +def test_results_backend_write_success( + mock_results_backend: MagicMock, + async_example_db: Database, + example_query: Query, +): + """Test that query.results_key is set when results_backend.set() True""" + # Mock results backend to simulate successful write + mock_results_backend.set.return_value = True + + # Execute query with store_results=True (async mode) + execute_sql_statements( + example_query.id, + "select 1 as foo;", + store_results=True, + return_results=False, + start_time=now_as_float(), + expand_data=False, + log_params=dict(), # noqa: C408 + ) + + # Refresh query from database to get updated state + db.session.refresh(example_query) + + # Assert query status is SUCCESS + assert example_query.status == QueryStatus.SUCCESS + + # Assert results_key is set (UUID format) + assert example_query.results_key is not None + assert len(example_query.results_key) == 36 # UUID length with dashes + + # Assert backend.set() was called + assert mock_results_backend.set.called + + +@patch("superset.sql_lab.results_backend") +def test_results_backend_write_failure_sync_mode( + mock_results_backend: MagicMock, + non_async_example_db: Database, + example_query: Query, +): + """Test sync query SUCCESS when cache write fails (results inline)""" + # Mock results backend to simulate write failure + mock_results_backend.set.return_value = False + + # Execute query with return_results=True (sync mode - results returned inline) + result = execute_sql_statements( + example_query.id, + "select 1 as foo;", + store_results=True, + return_results=True, + start_time=now_as_float(), + expand_data=True, + log_params=dict(), # noqa: C408 + ) + + # Should return results inline even when cache write fails + assert result + assert result["query_id"] == example_query.id + assert result["status"] == QueryStatus.SUCCESS + assert result["data"] == [{"foo": 1}] + + # Refresh query from database to get updated state + db.session.refresh(example_query) + + # Assert query status is SUCCESS (results were returned inline) + assert example_query.status == QueryStatus.SUCCESS + + # Assert results_key is None (because backend write failed) + assert example_query.results_key is None + + # Assert backend.set() was called + assert mock_results_backend.set.called