mirror of
https://github.com/apache/superset.git
synced 2026-05-28 03:05:13 +00:00
fix(bigquery): limit result set size to prevent browser memory crashes
Implement memory-aware progressive fetching in BigQuery's fetch_data method. Large result sets (950+ MB) previously crashed Chrome by loading everything into memory at once. The fix samples an initial batch to estimate row size, then fetches only as many rows as fit within the BQ_FETCH_MAX_MB config limit (default 200 MB). A warning toast is shown to users when results are truncated. This is always-on with no feature flag -- operators control the budget via the BQ_FETCH_MAX_MB config constant. Originally by @ethan-l-geotab in apache#36387. Co-authored-by: ethan-l-geotab <ethanliong@geotab.com> Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1379,3 +1379,125 @@ def test_get_df_payload_invalidates_cache_missing_applied_filter_columns():
|
||||
assert mock_cache.is_loaded is False, (
|
||||
"Cache should be inv when no applied_filter_columns and query has filters"
|
||||
)
|
||||
|
||||
|
||||
def test_get_df_payload_bq_memory_limited_warning():
|
||||
"""
|
||||
Test that get_df_payload includes a warning when BigQuery results are
|
||||
truncated due to the memory limit (g.bq_memory_limited is set).
|
||||
"""
|
||||
from superset.common.query_object import QueryObject
|
||||
|
||||
mock_query_context = MagicMock()
|
||||
mock_query_context.force = False
|
||||
mock_query_context.form_data = {"slice_id": 42}
|
||||
|
||||
mock_datasource = MagicMock()
|
||||
mock_datasource.column_names = ["col1"]
|
||||
mock_datasource.uid = "test_ds"
|
||||
mock_datasource.cache_timeout = None
|
||||
mock_datasource.changed_on = None
|
||||
mock_datasource.get_extra_cache_keys.return_value = []
|
||||
mock_datasource.data = MagicMock()
|
||||
mock_datasource.data.get.return_value = {}
|
||||
|
||||
processor = QueryContextProcessor(mock_query_context)
|
||||
processor._qc_datasource = mock_datasource
|
||||
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
columns=["col1"],
|
||||
)
|
||||
|
||||
with patch(
|
||||
"superset.common.query_context_processor.QueryCacheManager"
|
||||
) as mock_cache_manager:
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.is_loaded = True
|
||||
mock_cache.df = pd.DataFrame({"col1": [1, 2, 3]})
|
||||
mock_cache.query = "SELECT col1 FROM table"
|
||||
mock_cache.error_message = None
|
||||
mock_cache.status = "success"
|
||||
mock_cache.applied_filter_columns = ["col1"]
|
||||
mock_cache.applied_template_filters = []
|
||||
mock_cache.rejected_filter_columns = []
|
||||
mock_cache.annotation_data = {}
|
||||
mock_cache.is_cached = True
|
||||
mock_cache.sql_rowcount = 3
|
||||
mock_cache.cache_dttm = "2024-01-01T00:00:00"
|
||||
mock_cache.queried_dttm = "2024-01-01T00:00:00"
|
||||
mock_cache_manager.get.return_value = mock_cache
|
||||
|
||||
with patch.object(query_obj, "validate", return_value=None):
|
||||
with patch.object(processor, "query_cache_key", return_value="key"):
|
||||
with patch.object(processor, "get_cache_timeout", return_value=3600):
|
||||
# Simulate BigQuery memory-limited flag being set on Flask g
|
||||
with patch("superset.common.query_context_processor.g") as mock_g:
|
||||
mock_g.bq_memory_limited = True
|
||||
mock_g.bq_memory_limited_row_count = 5000
|
||||
|
||||
result = processor.get_df_payload(query_obj, force_cached=False)
|
||||
|
||||
assert result["warning"] is not None
|
||||
assert "Chart 42" in result["warning"]
|
||||
assert "5,000 rows" in result["warning"]
|
||||
assert "memory constraints" in result["warning"]
|
||||
|
||||
|
||||
def test_get_df_payload_no_warning_when_not_memory_limited():
|
||||
"""
|
||||
Test that get_df_payload does not include a warning when BigQuery
|
||||
results were not truncated.
|
||||
"""
|
||||
from superset.common.query_object import QueryObject
|
||||
|
||||
mock_query_context = MagicMock()
|
||||
mock_query_context.force = False
|
||||
mock_query_context.form_data = {}
|
||||
|
||||
mock_datasource = MagicMock()
|
||||
mock_datasource.column_names = ["col1"]
|
||||
mock_datasource.uid = "test_ds"
|
||||
mock_datasource.cache_timeout = None
|
||||
mock_datasource.changed_on = None
|
||||
mock_datasource.get_extra_cache_keys.return_value = []
|
||||
mock_datasource.data = MagicMock()
|
||||
mock_datasource.data.get.return_value = {}
|
||||
|
||||
processor = QueryContextProcessor(mock_query_context)
|
||||
processor._qc_datasource = mock_datasource
|
||||
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
columns=["col1"],
|
||||
)
|
||||
|
||||
with patch(
|
||||
"superset.common.query_context_processor.QueryCacheManager"
|
||||
) as mock_cache_manager:
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.is_loaded = True
|
||||
mock_cache.df = pd.DataFrame({"col1": [1, 2]})
|
||||
mock_cache.query = "SELECT col1 FROM table"
|
||||
mock_cache.error_message = None
|
||||
mock_cache.status = "success"
|
||||
mock_cache.applied_filter_columns = ["col1"]
|
||||
mock_cache.applied_template_filters = []
|
||||
mock_cache.rejected_filter_columns = []
|
||||
mock_cache.annotation_data = {}
|
||||
mock_cache.is_cached = True
|
||||
mock_cache.sql_rowcount = 2
|
||||
mock_cache.cache_dttm = "2024-01-01T00:00:00"
|
||||
mock_cache.queried_dttm = "2024-01-01T00:00:00"
|
||||
mock_cache_manager.get.return_value = mock_cache
|
||||
|
||||
with patch.object(query_obj, "validate", return_value=None):
|
||||
with patch.object(processor, "query_cache_key", return_value="key"):
|
||||
with patch.object(processor, "get_cache_timeout", return_value=3600):
|
||||
# g.bq_memory_limited is not set (default)
|
||||
with patch("superset.common.query_context_processor.g") as mock_g:
|
||||
mock_g.bq_memory_limited = False
|
||||
|
||||
result = processor.get_df_payload(query_obj, force_cached=False)
|
||||
|
||||
assert result["warning"] is None
|
||||
|
||||
Reference in New Issue
Block a user