Compare commits

...

2 Commits

Author SHA1 Message Date
Evan Rusackas
13a4ca9699 fix(csv): handle Decimal in streaming export; tighten CSV_EXPORT tests
Addresses review feedback on #38170:

- `_format_row_values` only rewrote the decimal separator for `float`
  values, so `decimal.Decimal` (returned for NUMERIC/DECIMAL columns by
  most SQLAlchemy drivers) and numpy numeric types kept the default
  `.` separator, bypassing the CSV_EXPORT config. Broaden the check to
  cover `Decimal` and `numbers.Real`, while explicitly excluding
  `bool` (which is a numeric subtype in Python).
- `test_apply_client_processing_csv_format_custom_separator` accepted
  both `Alice;1,5` and `Alice;1.5`, so a regression that drops the
  `decimal` option would still pass. Require the comma form and
  explicitly forbid the dot form.
- `test_apply_client_processing_csv_pivot_table_custom_separator` only
  checked that `;` or `,` appeared anywhere in the output (trivially
  true for any CSV). Assert the full pivoted header/value lines and
  forbid the dotted `1234.56` form.
- Add a new regression test exercising the streaming export path with
  `decimal.Decimal` values to lock in the broadened type check.
2026-04-22 11:53:02 -07:00
Evan Rusackas
07b0be630b fix(csv): respect CSV_EXPORT config for decimal separator and delimiter
This commit fixes GitHub issue #32371 where the CSV_EXPORT configuration
(specifically the `decimal` and `sep` parameters) was not being applied
correctly in certain CSV export scenarios.

The issue had two parts:
1. Custom decimal separator (e.g., `decimal: ","`) was ignored in exports
2. Pivoted CSV exports triggered errors when using custom CSV_EXPORT settings

Changes:
- superset/charts/client_processing.py: Pass CSV_EXPORT config to to_csv()
  when exporting processed (pivoted) CSV data in apply_client_processing()
- superset/commands/streaming_export/base.py: Add support for CSV_EXPORT
  config in streaming exports by:
  - Using the `sep` parameter as the CSV delimiter
  - Formatting float values with the custom `decimal` separator

The fix ensures that CSV exports consistently respect the CSV_EXPORT
configuration across all export paths (regular, pivoted, and streaming).

Fixes #32371

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-22 12:22:57 -08:00
4 changed files with 362 additions and 4 deletions

View File

@@ -389,7 +389,9 @@ def apply_client_processing( # noqa: C901
query["data"] = processed_df.to_dict() query["data"] = processed_df.to_dict()
elif query["result_format"] == ChartDataResultFormat.CSV: elif query["result_format"] == ChartDataResultFormat.CSV:
buf = StringIO() buf = StringIO()
processed_df.to_csv(buf, index=show_default_index) # Apply CSV_EXPORT config for consistent CSV formatting
csv_export_config = current_app.config.get("CSV_EXPORT", {})
processed_df.to_csv(buf, index=show_default_index, **csv_export_config)
buf.seek(0) buf.seek(0)
query["data"] = buf.getvalue() query["data"] = buf.getvalue()

View File

@@ -24,6 +24,8 @@ import logging
import time import time
from abc import abstractmethod from abc import abstractmethod
from contextlib import contextmanager from contextlib import contextmanager
from decimal import Decimal
from numbers import Real
from typing import Any, Callable, Generator from typing import Any, Callable, Generator
from flask import current_app as app, g, has_app_context from flask import current_app as app, g, has_app_context
@@ -107,16 +109,55 @@ class BaseStreamingCSVExportCommand(BaseCommand):
buffer.truncate() buffer.truncate()
return header_data, total_bytes return header_data, total_bytes
def _format_row_values(
self, row: tuple[Any, ...], decimal_separator: str | None
) -> list[Any]:
"""
Format row values, applying custom decimal separator if specified.
Args:
row: Database row as a tuple
decimal_separator: Custom decimal separator (e.g., ",") or None
Returns:
List of formatted values
"""
if not decimal_separator or decimal_separator == ".":
return list(row)
formatted: list[Any] = []
for value in row:
# Apply the custom decimal separator to any real numeric value
# (float, decimal.Decimal, numpy numeric types, ...). Booleans are
# technically a numeric type in Python but should never be rewritten
# as numbers in CSV output.
if isinstance(value, bool):
formatted.append(value)
elif isinstance(value, (float, Decimal, Real)):
# Format numeric values with custom decimal separator
formatted.append(str(value).replace(".", decimal_separator))
else:
formatted.append(value)
return formatted
def _process_rows( def _process_rows(
self, self,
result_proxy: Any, result_proxy: Any,
csv_writer: Any, csv_writer: Any,
buffer: io.StringIO, buffer: io.StringIO,
limit: int | None, limit: int | None,
decimal_separator: str | None = None,
) -> Generator[tuple[str, int, int], None, None]: ) -> Generator[tuple[str, int, int], None, None]:
""" """
Process database rows and yield CSV data chunks. Process database rows and yield CSV data chunks.
Args:
result_proxy: SQLAlchemy result proxy
csv_writer: CSV writer instance
buffer: StringIO buffer for CSV data
limit: Maximum number of rows to process, or None for unlimited
decimal_separator: Custom decimal separator (e.g., ",") or None
Yields tuples of (data_chunk, row_count, byte_count). Yields tuples of (data_chunk, row_count, byte_count).
""" """
row_count = 0 row_count = 0
@@ -128,7 +169,9 @@ class BaseStreamingCSVExportCommand(BaseCommand):
if limit is not None and row_count >= limit: if limit is not None and row_count >= limit:
break break
csv_writer.writerow(row) # Format values with custom decimal separator if needed
formatted_row = self._format_row_values(row, decimal_separator)
csv_writer.writerow(formatted_row)
row_count += 1 row_count += 1
# Check buffer size and flush if needed # Check buffer size and flush if needed
@@ -156,6 +199,11 @@ class BaseStreamingCSVExportCommand(BaseCommand):
start_time = time.time() start_time = time.time()
total_bytes = 0 total_bytes = 0
# Get CSV export configuration
csv_export_config = app.config.get("CSV_EXPORT", {})
delimiter = csv_export_config.get("sep", ",")
decimal_separator = csv_export_config.get("decimal", ".")
with db.session() as session: with db.session() as session:
# Merge database to prevent DetachedInstanceError # Merge database to prevent DetachedInstanceError
merged_database = session.merge(database) merged_database = session.merge(database)
@@ -170,8 +218,11 @@ class BaseStreamingCSVExportCommand(BaseCommand):
columns = list(result_proxy.keys()) columns = list(result_proxy.keys())
# Use StringIO with csv.writer for proper escaping # Use StringIO with csv.writer for proper escaping
# Apply delimiter from CSV_EXPORT config
buffer = io.StringIO() buffer = io.StringIO()
csv_writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL) csv_writer = csv.writer(
buffer, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL
)
# Write CSV header # Write CSV header
header_data, header_bytes = self._write_csv_header( header_data, header_bytes = self._write_csv_header(
@@ -183,7 +234,7 @@ class BaseStreamingCSVExportCommand(BaseCommand):
# Process rows and yield chunks # Process rows and yield chunks
row_count = 0 row_count = 0
for data_chunk, rows_processed, chunk_bytes in self._process_rows( for data_chunk, rows_processed, chunk_bytes in self._process_rows(
result_proxy, csv_writer, buffer, limit result_proxy, csv_writer, buffer, limit, decimal_separator
): ):
total_bytes += chunk_bytes total_bytes += chunk_bytes
row_count = rows_processed row_count = rows_processed

View File

@@ -2788,3 +2788,114 @@ def test_apply_client_processing_csv_format_default_na_behavior():
assert ( assert (
"Alice," in lines[2] "Alice," in lines[2]
) # Second data row should have empty last_name (NA converted to null) ) # Second data row should have empty last_name (NA converted to null)
@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
def test_apply_client_processing_csv_format_custom_separator():
"""
Test that apply_client_processing respects CSV_EXPORT config
for custom separator and decimal character.
This is a regression test for GitHub issue #32371.
"""
# CSV data with numeric values
csv_data = "name,value\nAlice,1.5\nBob,2.75"
result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": csv_data,
}
]
}
form_data = {
"datasource": "1__table",
"viz_type": "table",
"slice_id": 1,
"url_params": {},
"metrics": [],
"groupby": [],
"columns": ["name", "value"],
"extra_form_data": {},
"force": False,
"result_format": "csv",
"result_type": "results",
}
processed_result = apply_client_processing(result, form_data)
output_data = processed_result["queries"][0]["data"]
lines = output_data.strip().split("\n")
# With sep=";", columns should be separated by semicolon
assert lines[0] == "name;value"
# With decimal=",", decimal values must use comma as separator.
# Asserting the exact formatted value ensures a regression that drops
# the `decimal` option (so floats keep a dot) will be caught.
assert "Alice;1,5" in lines[1]
assert "Bob;2,75" in lines[2]
# Guard explicitly against the dot form slipping through.
assert "1.5" not in lines[1]
assert "2.75" not in lines[2]
@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
def test_apply_client_processing_csv_pivot_table_custom_separator():
"""
Test that apply_client_processing respects CSV_EXPORT config
for pivot table exports with custom separator and decimal character.
This is a regression test for GitHub issue #32371 - specifically for
pivoted CSV exports which were not respecting the CSV_EXPORT config.
"""
# CSV data with a numeric metric
csv_data = "COUNT(metric)\n1234.56"
result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": csv_data,
}
]
}
form_data = {
"datasource": "1__table",
"viz_type": "pivot_table_v2",
"slice_id": 1,
"url_params": {},
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {"column_name": "metric"},
"expressionType": "SIMPLE",
"label": "COUNT(metric)",
}
],
"metricsLayout": "COLUMNS",
"aggregateFunction": "Sum",
"extra_form_data": {},
"force": False,
"result_format": "csv",
"result_type": "results",
}
processed_result = apply_client_processing(result, form_data)
output_data = processed_result["queries"][0]["data"]
lines = output_data.strip().split("\n")
# After pivoting a single metric with no groupby rows/columns, the
# CSV for the "COUNT(metric)" column and "Total (Sum)" row should
# reflect the CSV_EXPORT config: semicolons as field separators and
# commas as the decimal separator.
assert lines[0] == ";COUNT(metric)"
assert "Total (Sum);1234,56" in lines[1]
# Guard explicitly against the dot form slipping through, which is
# what the previous (broken) implementation produced.
assert "1234.56" not in output_data

View File

@@ -16,6 +16,7 @@
# under the License. # under the License.
"""Unit tests for SQL Lab Streaming CSV Export Command.""" """Unit tests for SQL Lab Streaming CSV Export Command."""
from decimal import Decimal
from unittest.mock import MagicMock, Mock, patch from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
@@ -538,3 +539,196 @@ def test_null_values_handling(mocker, mock_query):
assert "1,,100" in csv_data assert "1,,100" in csv_data
assert "2,test," in csv_data assert "2,test," in csv_data
assert ",," in csv_data assert ",," in csv_data
def test_csv_export_config_custom_separator(mocker, mock_query):
"""
Test that streaming CSV export respects CSV_EXPORT config
for custom separator (sep).
This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "name"]
mock_result.fetchmany.side_effect = [
[(1, "Alice"), (2, "Bob")],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
# Mock the app config to use semicolon separator
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# With sep=";", columns should be separated by semicolon
assert "id;name" in csv_data
assert "1;Alice" in csv_data
assert "2;Bob" in csv_data
def test_csv_export_config_custom_decimal(mocker, mock_query):
"""
Test that streaming CSV export respects CSV_EXPORT config
for custom decimal separator.
This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "price"]
mock_result.fetchmany.side_effect = [
[(1, 12.34), (2, 56.78)],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
# Mock the app config to use comma as decimal separator
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# With decimal=",", float values should use comma
assert "12,34" in csv_data
assert "56,78" in csv_data
def test_csv_export_config_combined_sep_and_decimal(mocker, mock_query):
"""
Test that streaming CSV export respects both sep and decimal from CSV_EXPORT.
This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "name", "price"]
mock_result.fetchmany.side_effect = [
[(1, "Widget", 99.99), (2, "Gadget", 149.50)],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
# Mock the app config to use European format
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# Verify header uses semicolon separator
assert "id;name;price" in csv_data
# Verify data uses semicolon separator and comma decimal
assert "1;Widget;99,99" in csv_data
assert "2;Gadget;149,5" in csv_data or "2;Gadget;149,50" in csv_data
def test_csv_export_config_custom_decimal_for_decimal_type(mocker, mock_query):
"""
Streaming CSV export must respect the custom decimal separator for
``decimal.Decimal`` values too — SQLAlchemy commonly returns NUMERIC /
DECIMAL columns as ``Decimal`` rather than ``float``.
Regression test for GitHub issue #32371 / PR #38170 review feedback.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "price"]
mock_result.fetchmany.side_effect = [
[(1, Decimal("12.34")), (2, Decimal("56.78"))],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# Decimal values must be formatted with the custom separator, not left
# with the default ``.`` which would slip through a ``float``-only check.
assert "1;12,34" in csv_data
assert "2;56,78" in csv_data
assert "12.34" not in csv_data
assert "56.78" not in csv_data