fix(bigquery): address review comments on memory-limited fetch

- Use has_app_context()/has_request_context() guards so fetch_data
  is safe to call outside a Flask request (fixes RuntimeError on g
  writes and current_app access in non-request paths)
- Replace sys.getsizeof(str(batch)) with per-row getsizeof sum for
  more accurate memory estimation without the str() allocation
- Fix false-positive truncation: fetch remaining+1 rows and check
  len > remaining to confirm more data exists beyond the cap
- Reset g.bq_memory_limited/g.bq_memory_limited_row_count after
  reading in get_df_payload to prevent flag leaking across multiple
  queries in the same request
- Wrap warning string in _() for i18n
- Add warning field to ChartDataResponseResult Marshmallow schema
- Pass noDuplicate: true to addWarningToast to suppress duplicate
  toasts when a multi-query chart has multiple truncated responses

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Code
2026-04-07 13:16:58 -07:00
parent ffe7addcd2
commit 1894b77b44
4 changed files with 38 additions and 21 deletions

View File

@@ -810,7 +810,7 @@ export function exploreJSON(
);
(queriesResponse as QueryData[]).forEach(response => {
if (response.warning) {
dispatch(addWarningToast(response.warning));
dispatch(addWarningToast(response.warning, { noDuplicate: true }));
}
});
return dispatch(

View File

@@ -1561,6 +1561,10 @@ class ChartDataResponseResult(Schema):
required=False,
allow_none=True,
)
warning = fields.String(
metadata={"description": "Warning message when results were truncated"},
allow_none=True,
)
class ChartDataResponseSchema(Schema):

View File

@@ -193,11 +193,17 @@ class QueryContextProcessor:
warning: str | None = None
if getattr(g, "bq_memory_limited", False):
row_count = getattr(g, "bq_memory_limited_row_count", len(cache.df))
# Reset flags immediately so subsequent queries in the same request
# don't inherit this warning
g.bq_memory_limited = False
g.bq_memory_limited_row_count = 0
chart_id = (self._query_context.form_data or {}).get("slice_id", "")
prefix = f"Chart {chart_id}: " if chart_id else ""
warning = (
f"{prefix}Results truncated to {row_count:,} rows"
" due to memory constraints."
warning = _(
"%(prefix)sResults truncated to %(row_count)s rows"
" due to memory constraints.",
prefix=prefix,
row_count=f"{row_count:,}",
)
return {

View File

@@ -28,7 +28,7 @@ from typing import Any, TYPE_CHECKING, TypedDict
import pandas as pd
from apispec import APISpec
from apispec.ext.marshmallow import MarshmallowPlugin
from flask import current_app, g
from flask import current_app, g, has_app_context, has_request_context
from flask_babel import gettext as __
from marshmallow import fields, Schema
from marshmallow.exceptions import ValidationError
@@ -305,7 +305,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
return None
@classmethod
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]:
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]: # noqa: C901
"""
Progressive fetch for BigQuery to prevent browser memory overload.
@@ -314,7 +314,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
Falls back to the parent implementation on any error.
"""
max_mb: int = (
current_app.config.get("BQ_FETCH_MAX_MB", 200) if current_app else 200
current_app.config.get("BQ_FETCH_MAX_MB", 200) if has_app_context() else 200
)
max_bytes = max_mb * 1024 * 1024
@@ -323,16 +323,17 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
first_batch: list[Any] = cursor.fetchmany(initial_batch_size)
if not first_batch:
g.bq_memory_limited = False
g.bq_memory_limited_row_count = 0
if has_request_context():
g.bq_memory_limited = False
g.bq_memory_limited_row_count = 0
return []
# Support BigQuery Row objects (PR #4071)
if type(first_batch[0]).__name__ == "Row":
first_batch = [r.values() for r in first_batch]
# Estimate how many rows fit in the memory budget
first_batch_bytes = sys.getsizeof(str(first_batch))
# Estimate how many rows fit in the memory budget using per-row sizes
first_batch_bytes = sum(sys.getsizeof(row) for row in first_batch)
rows_fetched = len(first_batch)
avg_bytes_per_row = first_batch_bytes / rows_fetched
total_rows_for_target = int(max_bytes / avg_bytes_per_row)
@@ -347,21 +348,26 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
memory_limited = (
remaining_rows <= 0 and rows_fetched == initial_batch_size
)
g.bq_memory_limited = memory_limited
g.bq_memory_limited_row_count = len(first_batch)
if has_request_context():
g.bq_memory_limited = memory_limited
g.bq_memory_limited_row_count = len(first_batch)
return first_batch
# Fetch the rest up to the budget
second_batch: list[Any] = cursor.fetchmany(remaining_rows) or []
# Fetch one extra row to confirm truncation without false positives
second_batch: list[Any] = cursor.fetchmany(remaining_rows + 1) or []
if second_batch and type(second_batch[0]).__name__ == "Row":
second_batch = [r.values() for r in second_batch]
# Truncation is confirmed only when more rows exist beyond the budget
memory_limited = len(second_batch) > remaining_rows
if memory_limited:
second_batch = second_batch[:remaining_rows]
data = first_batch + second_batch
# If we received exactly what we asked for, more rows may exist
memory_limited = len(second_batch) == remaining_rows
g.bq_memory_limited = memory_limited
g.bq_memory_limited_row_count = len(data)
if has_request_context():
g.bq_memory_limited = memory_limited
g.bq_memory_limited_row_count = len(data)
return data
except Exception: # pylint: disable=broad-except
@@ -369,8 +375,9 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
data = super().fetch_data(cursor, limit)
if data and type(data[0]).__name__ == "Row":
data = [r.values() for r in data] # type: ignore
g.bq_memory_limited = False
g.bq_memory_limited_row_count = len(data) if data else 0
if has_request_context():
g.bq_memory_limited = False
g.bq_memory_limited_row_count = len(data) if data else 0
return data
@staticmethod