diff --git a/superset-frontend/src/components/Chart/chartAction.ts b/superset-frontend/src/components/Chart/chartAction.ts index 039c3f14068..f04b6c6b7bd 100644 --- a/superset-frontend/src/components/Chart/chartAction.ts +++ b/superset-frontend/src/components/Chart/chartAction.ts @@ -810,7 +810,7 @@ export function exploreJSON( ); (queriesResponse as QueryData[]).forEach(response => { if (response.warning) { - dispatch(addWarningToast(response.warning)); + dispatch(addWarningToast(response.warning, { noDuplicate: true })); } }); return dispatch( diff --git a/superset/charts/schemas.py b/superset/charts/schemas.py index 81b9509cd0a..4c8bbbf08a0 100644 --- a/superset/charts/schemas.py +++ b/superset/charts/schemas.py @@ -1561,6 +1561,10 @@ class ChartDataResponseResult(Schema): required=False, allow_none=True, ) + warning = fields.String( + metadata={"description": "Warning message when results were truncated"}, + allow_none=True, + ) class ChartDataResponseSchema(Schema): diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 0f05c3649d9..5b39c8f95a7 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -193,11 +193,17 @@ class QueryContextProcessor: warning: str | None = None if getattr(g, "bq_memory_limited", False): row_count = getattr(g, "bq_memory_limited_row_count", len(cache.df)) + # Reset flags immediately so subsequent queries in the same request + # don't inherit this warning + g.bq_memory_limited = False + g.bq_memory_limited_row_count = 0 chart_id = (self._query_context.form_data or {}).get("slice_id", "") prefix = f"Chart {chart_id}: " if chart_id else "" - warning = ( - f"{prefix}Results truncated to {row_count:,} rows" - " due to memory constraints." + warning = _( + "%(prefix)sResults truncated to %(row_count)s rows" + " due to memory constraints.", + prefix=prefix, + row_count=f"{row_count:,}", ) return { diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 1bf91eb205c..0f3355fdfc2 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -28,7 +28,7 @@ from typing import Any, TYPE_CHECKING, TypedDict import pandas as pd from apispec import APISpec from apispec.ext.marshmallow import MarshmallowPlugin -from flask import current_app, g +from flask import current_app, g, has_app_context, has_request_context from flask_babel import gettext as __ from marshmallow import fields, Schema from marshmallow.exceptions import ValidationError @@ -305,7 +305,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met return None @classmethod - def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]: + def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]: # noqa: C901 """ Progressive fetch for BigQuery to prevent browser memory overload. @@ -314,7 +314,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met Falls back to the parent implementation on any error. """ max_mb: int = ( - current_app.config.get("BQ_FETCH_MAX_MB", 200) if current_app else 200 + current_app.config.get("BQ_FETCH_MAX_MB", 200) if has_app_context() else 200 ) max_bytes = max_mb * 1024 * 1024 @@ -323,16 +323,17 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met first_batch: list[Any] = cursor.fetchmany(initial_batch_size) if not first_batch: - g.bq_memory_limited = False - g.bq_memory_limited_row_count = 0 + if has_request_context(): + g.bq_memory_limited = False + g.bq_memory_limited_row_count = 0 return [] # Support BigQuery Row objects (PR #4071) if type(first_batch[0]).__name__ == "Row": first_batch = [r.values() for r in first_batch] - # Estimate how many rows fit in the memory budget - first_batch_bytes = sys.getsizeof(str(first_batch)) + # Estimate how many rows fit in the memory budget using per-row sizes + first_batch_bytes = sum(sys.getsizeof(row) for row in first_batch) rows_fetched = len(first_batch) avg_bytes_per_row = first_batch_bytes / rows_fetched total_rows_for_target = int(max_bytes / avg_bytes_per_row) @@ -347,21 +348,26 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met memory_limited = ( remaining_rows <= 0 and rows_fetched == initial_batch_size ) - g.bq_memory_limited = memory_limited - g.bq_memory_limited_row_count = len(first_batch) + if has_request_context(): + g.bq_memory_limited = memory_limited + g.bq_memory_limited_row_count = len(first_batch) return first_batch - # Fetch the rest up to the budget - second_batch: list[Any] = cursor.fetchmany(remaining_rows) or [] + # Fetch one extra row to confirm truncation without false positives + second_batch: list[Any] = cursor.fetchmany(remaining_rows + 1) or [] if second_batch and type(second_batch[0]).__name__ == "Row": second_batch = [r.values() for r in second_batch] + # Truncation is confirmed only when more rows exist beyond the budget + memory_limited = len(second_batch) > remaining_rows + if memory_limited: + second_batch = second_batch[:remaining_rows] + data = first_batch + second_batch - # If we received exactly what we asked for, more rows may exist - memory_limited = len(second_batch) == remaining_rows - g.bq_memory_limited = memory_limited - g.bq_memory_limited_row_count = len(data) + if has_request_context(): + g.bq_memory_limited = memory_limited + g.bq_memory_limited_row_count = len(data) return data except Exception: # pylint: disable=broad-except @@ -369,8 +375,9 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met data = super().fetch_data(cursor, limit) if data and type(data[0]).__name__ == "Row": data = [r.values() for r in data] # type: ignore - g.bq_memory_limited = False - g.bq_memory_limited_row_count = len(data) if data else 0 + if has_request_context(): + g.bq_memory_limited = False + g.bq_memory_limited_row_count = len(data) if data else 0 return data @staticmethod