mirror of
https://github.com/apache/superset.git
synced 2026-05-21 15:55:10 +00:00
fix(bigquery): address review comments on memory-limited fetch
- Use has_app_context()/has_request_context() guards so fetch_data is safe to call outside a Flask request (fixes RuntimeError on g writes and current_app access in non-request paths) - Replace sys.getsizeof(str(batch)) with per-row getsizeof sum for more accurate memory estimation without the str() allocation - Fix false-positive truncation: fetch remaining+1 rows and check len > remaining to confirm more data exists beyond the cap - Reset g.bq_memory_limited/g.bq_memory_limited_row_count after reading in get_df_payload to prevent flag leaking across multiple queries in the same request - Wrap warning string in _() for i18n - Add warning field to ChartDataResponseResult Marshmallow schema - Pass noDuplicate: true to addWarningToast to suppress duplicate toasts when a multi-query chart has multiple truncated responses Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -810,7 +810,7 @@ export function exploreJSON(
|
||||
);
|
||||
(queriesResponse as QueryData[]).forEach(response => {
|
||||
if (response.warning) {
|
||||
dispatch(addWarningToast(response.warning));
|
||||
dispatch(addWarningToast(response.warning, { noDuplicate: true }));
|
||||
}
|
||||
});
|
||||
return dispatch(
|
||||
|
||||
@@ -1561,6 +1561,10 @@ class ChartDataResponseResult(Schema):
|
||||
required=False,
|
||||
allow_none=True,
|
||||
)
|
||||
warning = fields.String(
|
||||
metadata={"description": "Warning message when results were truncated"},
|
||||
allow_none=True,
|
||||
)
|
||||
|
||||
|
||||
class ChartDataResponseSchema(Schema):
|
||||
|
||||
@@ -193,11 +193,17 @@ class QueryContextProcessor:
|
||||
warning: str | None = None
|
||||
if getattr(g, "bq_memory_limited", False):
|
||||
row_count = getattr(g, "bq_memory_limited_row_count", len(cache.df))
|
||||
# Reset flags immediately so subsequent queries in the same request
|
||||
# don't inherit this warning
|
||||
g.bq_memory_limited = False
|
||||
g.bq_memory_limited_row_count = 0
|
||||
chart_id = (self._query_context.form_data or {}).get("slice_id", "")
|
||||
prefix = f"Chart {chart_id}: " if chart_id else ""
|
||||
warning = (
|
||||
f"{prefix}Results truncated to {row_count:,} rows"
|
||||
" due to memory constraints."
|
||||
warning = _(
|
||||
"%(prefix)sResults truncated to %(row_count)s rows"
|
||||
" due to memory constraints.",
|
||||
prefix=prefix,
|
||||
row_count=f"{row_count:,}",
|
||||
)
|
||||
|
||||
return {
|
||||
|
||||
@@ -28,7 +28,7 @@ from typing import Any, TYPE_CHECKING, TypedDict
|
||||
import pandas as pd
|
||||
from apispec import APISpec
|
||||
from apispec.ext.marshmallow import MarshmallowPlugin
|
||||
from flask import current_app, g
|
||||
from flask import current_app, g, has_app_context, has_request_context
|
||||
from flask_babel import gettext as __
|
||||
from marshmallow import fields, Schema
|
||||
from marshmallow.exceptions import ValidationError
|
||||
@@ -305,7 +305,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]:
|
||||
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]: # noqa: C901
|
||||
"""
|
||||
Progressive fetch for BigQuery to prevent browser memory overload.
|
||||
|
||||
@@ -314,7 +314,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
Falls back to the parent implementation on any error.
|
||||
"""
|
||||
max_mb: int = (
|
||||
current_app.config.get("BQ_FETCH_MAX_MB", 200) if current_app else 200
|
||||
current_app.config.get("BQ_FETCH_MAX_MB", 200) if has_app_context() else 200
|
||||
)
|
||||
max_bytes = max_mb * 1024 * 1024
|
||||
|
||||
@@ -323,16 +323,17 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
first_batch: list[Any] = cursor.fetchmany(initial_batch_size)
|
||||
|
||||
if not first_batch:
|
||||
g.bq_memory_limited = False
|
||||
g.bq_memory_limited_row_count = 0
|
||||
if has_request_context():
|
||||
g.bq_memory_limited = False
|
||||
g.bq_memory_limited_row_count = 0
|
||||
return []
|
||||
|
||||
# Support BigQuery Row objects (PR #4071)
|
||||
if type(first_batch[0]).__name__ == "Row":
|
||||
first_batch = [r.values() for r in first_batch]
|
||||
|
||||
# Estimate how many rows fit in the memory budget
|
||||
first_batch_bytes = sys.getsizeof(str(first_batch))
|
||||
# Estimate how many rows fit in the memory budget using per-row sizes
|
||||
first_batch_bytes = sum(sys.getsizeof(row) for row in first_batch)
|
||||
rows_fetched = len(first_batch)
|
||||
avg_bytes_per_row = first_batch_bytes / rows_fetched
|
||||
total_rows_for_target = int(max_bytes / avg_bytes_per_row)
|
||||
@@ -347,21 +348,26 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
memory_limited = (
|
||||
remaining_rows <= 0 and rows_fetched == initial_batch_size
|
||||
)
|
||||
g.bq_memory_limited = memory_limited
|
||||
g.bq_memory_limited_row_count = len(first_batch)
|
||||
if has_request_context():
|
||||
g.bq_memory_limited = memory_limited
|
||||
g.bq_memory_limited_row_count = len(first_batch)
|
||||
return first_batch
|
||||
|
||||
# Fetch the rest up to the budget
|
||||
second_batch: list[Any] = cursor.fetchmany(remaining_rows) or []
|
||||
# Fetch one extra row to confirm truncation without false positives
|
||||
second_batch: list[Any] = cursor.fetchmany(remaining_rows + 1) or []
|
||||
if second_batch and type(second_batch[0]).__name__ == "Row":
|
||||
second_batch = [r.values() for r in second_batch]
|
||||
|
||||
# Truncation is confirmed only when more rows exist beyond the budget
|
||||
memory_limited = len(second_batch) > remaining_rows
|
||||
if memory_limited:
|
||||
second_batch = second_batch[:remaining_rows]
|
||||
|
||||
data = first_batch + second_batch
|
||||
|
||||
# If we received exactly what we asked for, more rows may exist
|
||||
memory_limited = len(second_batch) == remaining_rows
|
||||
g.bq_memory_limited = memory_limited
|
||||
g.bq_memory_limited_row_count = len(data)
|
||||
if has_request_context():
|
||||
g.bq_memory_limited = memory_limited
|
||||
g.bq_memory_limited_row_count = len(data)
|
||||
return data
|
||||
|
||||
except Exception: # pylint: disable=broad-except
|
||||
@@ -369,8 +375,9 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
|
||||
data = super().fetch_data(cursor, limit)
|
||||
if data and type(data[0]).__name__ == "Row":
|
||||
data = [r.values() for r in data] # type: ignore
|
||||
g.bq_memory_limited = False
|
||||
g.bq_memory_limited_row_count = len(data) if data else 0
|
||||
if has_request_context():
|
||||
g.bq_memory_limited = False
|
||||
g.bq_memory_limited_row_count = len(data) if data else 0
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user