Compare commits

...

3 Commits

Author SHA1 Message Date
Enzo Martellucci
b2acbd286a fix(chart): respect ALLOW_FULL_CSV_EXPORT and fix streaming CSV export 2026-05-21 15:35:49 +02:00
Enzo Martellucci
e8317b15e2 refactor(chart): rename fullCsvMaxRows to fullExportMaxRows 2026-05-21 14:38:53 +02:00
Enzo Martellucci
584917b467 fix(chart): respect ALLOW_FULL_CSV_EXPORT and fix streaming CSV export 2026-05-21 14:25:32 +02:00
10 changed files with 197 additions and 27 deletions

View File

@@ -90,7 +90,13 @@ const defaultState = {
superset_can_explore: false,
superset_can_share: false,
superset_can_csv: false,
common: { conf: { SUPERSET_WEBSERVER_TIMEOUT: 0, SQL_MAX_ROW: 666 } },
common: {
conf: {
SUPERSET_WEBSERVER_TIMEOUT: 0,
SQL_MAX_ROW: 666,
TABLE_VIZ_MAX_ROW_SERVER: 999,
},
},
},
dashboardLayout: {
present: {},
@@ -201,7 +207,7 @@ test('should call exportChart when exportCSV is clicked', async () => {
stubbedExportCSV.mockRestore();
});
test('should call exportChart with row_limit props.maxRows when exportFullCSV is clicked', async () => {
test('should call exportChart with row_limit TABLE_VIZ_MAX_ROW_SERVER when exportFullCSV is clicked', async () => {
(global as any).featureFlags = {
[FeatureFlag.AllowFullCsvExport]: true,
};
@@ -222,7 +228,8 @@ test('should call exportChart with row_limit props.maxRows when exportFullCSV is
expect(stubbedExportCSV).toHaveBeenCalledWith(
expect.objectContaining({
formData: expect.objectContaining({
row_limit: 666,
row_limit: 999,
full_export: true,
dashboardId: 111,
}),
resultType: 'full',
@@ -256,7 +263,7 @@ test('should call exportChart when exportXLSX is clicked', async () => {
stubbedExportXLSX.mockRestore();
});
test('should call exportChart with row_limit props.maxRows when exportFullXLSX is clicked', async () => {
test('should call exportChart with row_limit TABLE_VIZ_MAX_ROW_SERVER when exportFullXLSX is clicked', async () => {
(global as any).featureFlags = {
[FeatureFlag.AllowFullCsvExport]: true,
};
@@ -277,7 +284,8 @@ test('should call exportChart with row_limit props.maxRows when exportFullXLSX i
expect(stubbedExportXLSX).toHaveBeenCalledWith(
expect.objectContaining({
formData: expect.objectContaining({
row_limit: 666,
row_limit: 999,
full_export: true,
dashboardId: 111,
}),
resultType: 'full',

View File

@@ -224,8 +224,10 @@ const Chart = (props: ChartProps) => {
const emitCrossFilters = useSelector(
(state: RootState) => !!state.dashboardInfo.crossFiltersEnabled,
);
const maxRows: number = useSelector(
(state: RootState) => state.dashboardInfo.common.conf.SQL_MAX_ROW as number,
const fullExportMaxRows: number = useSelector(
(state: RootState) =>
(state.dashboardInfo.common.conf.TABLE_VIZ_MAX_ROW_SERVER as number) ||
(state.dashboardInfo.common.conf.SQL_MAX_ROW as number),
);
const streamingThreshold: number = useSelector(
(state: RootState) =>
@@ -480,7 +482,7 @@ const Chart = (props: ChartProps) => {
(formData as JsonObject).dashboardId = dashboardInfo.id;
const exportTable = useCallback(
(format: string, isFullCSV: boolean, isPivot = false) => {
(format: string, isFullExport: boolean, isPivot = false) => {
const logAction =
format === 'csv'
? LOG_ACTIONS_EXPORT_CSV_DASHBOARD_CHART
@@ -490,8 +492,11 @@ const Chart = (props: ChartProps) => {
is_cached: isCached,
});
const exportFormData = isFullCSV
? { ...formData, row_limit: maxRows }
// For a "full" export, raise the requested row_limit and flag the
// request with full_export so the backend lifts the row-limit cap to
// TABLE_VIZ_MAX_ROW_SERVER (gated by the ALLOW_FULL_CSV_EXPORT flag).
const exportFormData = isFullExport
? { ...formData, row_limit: fullExportMaxRows, full_export: true }
: formData;
const resultType = isPivot ? 'post_processed' : 'full';
@@ -579,7 +584,7 @@ const Chart = (props: ChartProps) => {
sliceVizType,
isCached,
formData,
maxRows,
fullExportMaxRows,
dataMaskOwnState,
chartState,
props.id,

View File

@@ -66,7 +66,22 @@ class StreamingCSVExportCommand(BaseStreamingCSVExportCommand):
# Note: datasource should already be attached to a session from query_context
datasource = self._query_context.datasource
query_obj = self._query_context.queries[0]
sql_query = datasource.get_query_str(query_obj.to_dict())
query_dict = query_obj.to_dict()
# Use get_query_str_extended (single, clean statement) instead of
# get_query_str, which returns a multi-statement string (prequeries +
# main SQL joined by ";" with a trailing ";"). The base command runs the
# SQL through SQLAlchemy text(), which only accepts a single statement,
# so the multi-statement form fails on engines that emit prequeries
# (e.g. PostgreSQL/Snowflake "SET search_path"). Prequeries still run
# via the connect-event listener registered in Database.get_sqla_engine.
# get_query_str_extended lives on ExploreMixin, not the Explorable
# Protocol, so guard with getattr for datasources that lack it.
get_extended = getattr(datasource, "get_query_str_extended", None)
if callable(get_extended):
sql_query = get_extended(query_dict).sql
else:
sql_query = datasource.get_query_str(query_dict)
database = getattr(datasource, "database", None)
catalog = getattr(datasource, "catalog", None)
schema = getattr(datasource, "schema", None)

View File

@@ -74,6 +74,11 @@ class QueryContextFactory: # pylint: disable=too-few-public-methods
bool(form_data.get("server_pagination")) if form_data else False
)
# A "full" CSV/Excel export raises the row-limit ceiling to
# TABLE_VIZ_MAX_ROW_SERVER (when ALLOW_FULL_CSV_EXPORT is enabled).
# The marker is set by the frontend's "Export to full ..." actions.
full_export = bool(form_data.get("full_export")) if form_data else False
queries_ = [
self._process_query_object(
datasource_model_instance,
@@ -82,6 +87,7 @@ class QueryContextFactory: # pylint: disable=too-few-public-methods
result_type,
datasource=datasource,
server_pagination=server_pagination,
full_export=full_export,
**query_obj,
),
)

View File

@@ -58,6 +58,7 @@ class QueryObjectFactory: # pylint: disable=too-few-public-methods
time_range: str | None = None,
time_shift: str | None = None,
server_pagination: bool | None = None,
full_export: bool | None = None,
**kwargs: Any,
) -> QueryObject:
datasource_model_instance = None
@@ -66,9 +67,12 @@ class QueryObjectFactory: # pylint: disable=too-few-public-methods
processed_extras = self._process_extras(extras)
result_type = kwargs.setdefault("result_type", parent_result_type)
# Process row limit taking server pagination into account
# Process row limit taking server pagination and full export into account
row_limit = self._process_row_limit(
row_limit, result_type, server_pagination=server_pagination
row_limit,
result_type,
server_pagination=server_pagination,
full_export=full_export,
)
processed_time_range = self._process_time_range(
@@ -106,12 +110,14 @@ class QueryObjectFactory: # pylint: disable=too-few-public-methods
row_limit: int | None,
result_type: ChartDataResultType,
server_pagination: bool | None = None,
full_export: bool | None = None,
) -> int:
"""Process row limit taking into account server pagination.
:param row_limit: The requested row limit
:param result_type: The type of result being processed
:param server_pagination: Whether server-side pagination is enabled
:param full_export: Whether this is a "full" CSV/Excel export request
:return: The processed row limit
"""
default_row_limit = (
@@ -122,6 +128,7 @@ class QueryObjectFactory: # pylint: disable=too-few-public-methods
return apply_max_row_limit(
row_limit or default_row_limit,
server_pagination=server_pagination,
full_export=full_export,
)
@staticmethod

View File

@@ -1317,7 +1317,9 @@ MAPBOX_API_KEY = os.environ.get("MAPBOX_API_KEY", "")
# Maximum number of rows returned for any analytical database query
SQL_MAX_ROW = 100000
# Maximum number of rows for any query with Server Pagination in Table Viz type
# Maximum number of rows for any query with Server Pagination in Table Viz type.
# This also serves as the row-count ceiling for "full" CSV/Excel exports when the
# ALLOW_FULL_CSV_EXPORT feature flag is enabled (see apply_max_row_limit).
TABLE_VIZ_MAX_ROW_SERVER = 500000

View File

@@ -2051,13 +2051,17 @@ def parse_boolean_string(bool_str: str | None) -> bool:
def apply_max_row_limit(
limit: int,
server_pagination: bool | None = None,
full_export: bool | None = None,
) -> int:
"""
Override row limit based on server pagination setting
Override row limit based on server pagination / full-export settings
:param limit: requested row limit
:param server_pagination: whether server-side pagination
is enabled, defaults to None
:param full_export: whether this is a "full" CSV/Excel export request,
which raises the ceiling to TABLE_VIZ_MAX_ROW_SERVER when the
ALLOW_FULL_CSV_EXPORT feature flag is enabled, defaults to None
:return: Capped row limit
>>> apply_max_row_limit(600000, server_pagination=True) # Server pagination
@@ -2069,13 +2073,25 @@ def apply_max_row_limit(
>>> apply_max_row_limit(0) # Zero returns default max limit
50000
"""
# Imported locally to avoid a circular import: superset.extensions pulls in
# superset.security.manager / superset.utils.cache_manager, both of which
# import superset.utils.core.
# pylint: disable=import-outside-toplevel
from superset.extensions import feature_flag_manager
max_limit = (
app.config["TABLE_VIZ_MAX_ROW_SERVER"]
if server_pagination
else app.config["SQL_MAX_ROW"]
# A "full" CSV/Excel export is allowed past the regular SQL_MAX_ROW cap, but
# only when the operator has opted in via the ALLOW_FULL_CSV_EXPORT flag.
# server_pagination is a separate, independent reason to raise the cap and is
# NOT gated by that flag.
allow_full_export = full_export and feature_flag_manager.is_feature_enabled(
"ALLOW_FULL_CSV_EXPORT"
)
# Both raised cases share the same ceiling, TABLE_VIZ_MAX_ROW_SERVER (a
# bounded, predictable maximum); see its definition in config.py.
if server_pagination or allow_full_export:
max_limit = app.config["TABLE_VIZ_MAX_ROW_SERVER"]
else:
max_limit = app.config["SQL_MAX_ROW"]
if limit != 0:
return min(max_limit, limit)
return max_limit

View File

@@ -29,6 +29,7 @@ def _setup_chart_mocks(
sql: str = "SELECT * FROM test",
catalog: str | None = None,
schema: str | None = None,
prequeries: list[str] | None = None,
) -> tuple[MockerFixture, MockerFixture, MockerFixture]:
"""Set up common mocks for chart streaming export tests."""
mock_db = mocker.patch("superset.commands.streaming_export.base.db")
@@ -37,12 +38,22 @@ def _setup_chart_mocks(
query_context = mocker.MagicMock()
datasource = mocker.MagicMock()
datasource.get_query_str.return_value = sql
# The command prefers get_query_str_extended (clean single statement);
# get_query_str returns the legacy multi-statement form.
extended = mocker.MagicMock()
extended.sql = sql
extended.prequeries = prequeries or []
datasource.get_query_str_extended.return_value = extended
datasource.get_query_str.return_value = (
";\n\n".join((prequeries or []) + [sql]) + ";"
)
datasource.database = mocker.MagicMock()
datasource.catalog = catalog
datasource.schema = schema
query_context.datasource = datasource
query_context.queries = [mocker.MagicMock()]
query_obj = mocker.MagicMock()
query_obj.to_dict.return_value = {"row_limit": 100}
query_context.queries = [query_obj]
mock_session.merge.return_value = datasource.database
return mock_db, query_context, datasource
@@ -296,3 +307,39 @@ def test_catalog_and_schema_passed_to_engine(mocker: MockerFixture) -> None:
catalog="my_catalog",
schema="my_schema",
)
def test_uses_extended_sql_single_statement(mocker: MockerFixture) -> None:
"""SQL generation uses get_query_str_extended (no prequeries, no trailing ;).
get_query_str returns a multi-statement string that SQLAlchemy text()
rejects; the command must use the clean single-statement extended form.
"""
_, query_context, datasource = _setup_chart_mocks(
mocker,
sql="SELECT * FROM test",
prequeries=["SET search_path = my_schema"],
)
command = StreamingCSVExportCommand(query_context)
sql_query, _, _, _ = command._get_sql_and_database()
assert sql_query == "SELECT * FROM test"
assert ";\n\n" not in sql_query
assert not sql_query.endswith(";")
datasource.get_query_str.assert_not_called()
def test_falls_back_to_get_query_str_without_extended(
mocker: MockerFixture,
) -> None:
"""Datasources lacking get_query_str_extended fall back to get_query_str."""
_, query_context, datasource = _setup_chart_mocks(mocker)
# SemanticView and other Explorables may not implement the extended form.
del datasource.get_query_str_extended
command = StreamingCSVExportCommand(query_context)
sql_query, _, _, _ = command._get_sql_and_database()
datasource.get_query_str.assert_called_once()
assert "SELECT * FROM test" in sql_query

View File

@@ -30,6 +30,7 @@ def create_app_config() -> dict[str, Any]:
"DEFAULT_RELATIVE_END_TIME": "today",
"SAMPLES_ROW_LIMIT": 1000,
"SQL_MAX_ROW": 100000,
"TABLE_VIZ_MAX_ROW_SERVER": 500000,
}
@@ -48,12 +49,12 @@ def connector_registry() -> Mock:
def apply_max_row_limit(
limit: int,
server_pagination: bool | None = None,
full_export: bool | None = None,
) -> int:
max_limit = (
create_app_config()["TABLE_VIZ_MAX_ROW_SERVER"]
if server_pagination
else create_app_config()["SQL_MAX_ROW"]
)
if server_pagination or full_export:
max_limit = create_app_config()["TABLE_VIZ_MAX_ROW_SERVER"]
else:
max_limit = create_app_config()["SQL_MAX_ROW"]
if limit != 0:
return min(max_limit, limit)
return max_limit
@@ -109,6 +110,36 @@ class TestQueryObjectFactory:
assert query_object.row_limit == 100
assert query_object.row_offset == 200
def test_query_context_full_export_raises_limit(
self,
query_object_factory: QueryObjectFactory,
raw_query_context: dict[str, Any],
):
"""full_export raises the row-limit ceiling to TABLE_VIZ_MAX_ROW_SERVER."""
raw_query_object = raw_query_context["queries"][0]
raw_query_object["row_limit"] = 300000
query_object = query_object_factory.create(
raw_query_context["result_type"],
full_export=True,
**raw_query_object,
)
# Without full_export this would be capped at SQL_MAX_ROW (100000).
assert query_object.row_limit == 300000
def test_query_context_limit_capped_without_full_export(
self,
query_object_factory: QueryObjectFactory,
raw_query_context: dict[str, Any],
):
"""A regular request stays capped at SQL_MAX_ROW."""
raw_query_object = raw_query_context["queries"][0]
raw_query_object["row_limit"] = 300000
query_object = query_object_factory.create(
raw_query_context["result_type"],
**raw_query_object,
)
assert query_object.row_limit == 100000
def test_query_context_null_post_processing_op(
self,
query_object_factory: QueryObjectFactory,

View File

@@ -28,6 +28,7 @@ from pytest_mock import MockerFixture
from superset.exceptions import SupersetException
from superset.utils.core import (
apply_max_row_limit,
cast_to_boolean,
check_is_safe_zip,
DateColumn,
@@ -53,6 +54,7 @@ from superset.utils.core import (
sanitize_url,
)
from tests.conftest import with_config
from tests.unit_tests.conftest import with_feature_flags
ADHOC_FILTER: QueryObjectFilterClause = {
"col": "foo",
@@ -1730,3 +1732,34 @@ def test_markdown_with_markup_wrap() -> None:
assert isinstance(result, Markup)
assert "<strong>bold</strong>" in str(result)
@with_config({"SQL_MAX_ROW": 100000, "TABLE_VIZ_MAX_ROW_SERVER": 500000})
def test_apply_max_row_limit_default_cap() -> None:
"""A regular request is capped at SQL_MAX_ROW."""
assert apply_max_row_limit(300000) == 100000
assert apply_max_row_limit(5000) == 5000
# 0 means "no explicit limit" -> default max
assert apply_max_row_limit(0) == 100000
@with_config({"SQL_MAX_ROW": 100000, "TABLE_VIZ_MAX_ROW_SERVER": 500000})
def test_apply_max_row_limit_server_pagination() -> None:
"""server_pagination raises the cap to TABLE_VIZ_MAX_ROW_SERVER."""
assert apply_max_row_limit(300000, server_pagination=True) == 300000
assert apply_max_row_limit(900000, server_pagination=True) == 500000
@with_config({"SQL_MAX_ROW": 100000, "TABLE_VIZ_MAX_ROW_SERVER": 500000})
@with_feature_flags(ALLOW_FULL_CSV_EXPORT=True)
def test_apply_max_row_limit_full_export_with_flag() -> None:
"""full_export raises the cap to TABLE_VIZ_MAX_ROW_SERVER when the flag is on."""
assert apply_max_row_limit(300000, full_export=True) == 300000
assert apply_max_row_limit(900000, full_export=True) == 500000
@with_config({"SQL_MAX_ROW": 100000, "TABLE_VIZ_MAX_ROW_SERVER": 500000})
@with_feature_flags(ALLOW_FULL_CSV_EXPORT=False)
def test_apply_max_row_limit_full_export_without_flag() -> None:
"""full_export has no effect when ALLOW_FULL_CSV_EXPORT is disabled."""
assert apply_max_row_limit(300000, full_export=True) == 100000