Compare commits

...

2 Commits

Author SHA1 Message Date
Evan Rusackas
13a4ca9699 fix(csv): handle Decimal in streaming export; tighten CSV_EXPORT tests
Addresses review feedback on #38170:

- `_format_row_values` only rewrote the decimal separator for `float`
  values, so `decimal.Decimal` (returned for NUMERIC/DECIMAL columns by
  most SQLAlchemy drivers) and numpy numeric types kept the default
  `.` separator, bypassing the CSV_EXPORT config. Broaden the check to
  cover `Decimal` and `numbers.Real`, while explicitly excluding
  `bool` (which is a numeric subtype in Python).
- `test_apply_client_processing_csv_format_custom_separator` accepted
  both `Alice;1,5` and `Alice;1.5`, so a regression that drops the
  `decimal` option would still pass. Require the comma form and
  explicitly forbid the dot form.
- `test_apply_client_processing_csv_pivot_table_custom_separator` only
  checked that `;` or `,` appeared anywhere in the output (trivially
  true for any CSV). Assert the full pivoted header/value lines and
  forbid the dotted `1234.56` form.
- Add a new regression test exercising the streaming export path with
  `decimal.Decimal` values to lock in the broadened type check.
2026-04-22 11:53:02 -07:00
Evan Rusackas
07b0be630b fix(csv): respect CSV_EXPORT config for decimal separator and delimiter
This commit fixes GitHub issue #32371 where the CSV_EXPORT configuration
(specifically the `decimal` and `sep` parameters) was not being applied
correctly in certain CSV export scenarios.

The issue had two parts:
1. Custom decimal separator (e.g., `decimal: ","`) was ignored in exports
2. Pivoted CSV exports triggered errors when using custom CSV_EXPORT settings

Changes:
- superset/charts/client_processing.py: Pass CSV_EXPORT config to to_csv()
  when exporting processed (pivoted) CSV data in apply_client_processing()
- superset/commands/streaming_export/base.py: Add support for CSV_EXPORT
  config in streaming exports by:
  - Using the `sep` parameter as the CSV delimiter
  - Formatting float values with the custom `decimal` separator

The fix ensures that CSV exports consistently respect the CSV_EXPORT
configuration across all export paths (regular, pivoted, and streaming).

Fixes #32371

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-22 12:22:57 -08:00
4 changed files with 362 additions and 4 deletions

View File

@@ -389,7 +389,9 @@ def apply_client_processing( # noqa: C901
query["data"] = processed_df.to_dict()
elif query["result_format"] == ChartDataResultFormat.CSV:
buf = StringIO()
processed_df.to_csv(buf, index=show_default_index)
# Apply CSV_EXPORT config for consistent CSV formatting
csv_export_config = current_app.config.get("CSV_EXPORT", {})
processed_df.to_csv(buf, index=show_default_index, **csv_export_config)
buf.seek(0)
query["data"] = buf.getvalue()

View File

@@ -24,6 +24,8 @@ import logging
import time
from abc import abstractmethod
from contextlib import contextmanager
from decimal import Decimal
from numbers import Real
from typing import Any, Callable, Generator
from flask import current_app as app, g, has_app_context
@@ -107,16 +109,55 @@ class BaseStreamingCSVExportCommand(BaseCommand):
buffer.truncate()
return header_data, total_bytes
def _format_row_values(
self, row: tuple[Any, ...], decimal_separator: str | None
) -> list[Any]:
"""
Format row values, applying custom decimal separator if specified.
Args:
row: Database row as a tuple
decimal_separator: Custom decimal separator (e.g., ",") or None
Returns:
List of formatted values
"""
if not decimal_separator or decimal_separator == ".":
return list(row)
formatted: list[Any] = []
for value in row:
# Apply the custom decimal separator to any real numeric value
# (float, decimal.Decimal, numpy numeric types, ...). Booleans are
# technically a numeric type in Python but should never be rewritten
# as numbers in CSV output.
if isinstance(value, bool):
formatted.append(value)
elif isinstance(value, (float, Decimal, Real)):
# Format numeric values with custom decimal separator
formatted.append(str(value).replace(".", decimal_separator))
else:
formatted.append(value)
return formatted
def _process_rows(
self,
result_proxy: Any,
csv_writer: Any,
buffer: io.StringIO,
limit: int | None,
decimal_separator: str | None = None,
) -> Generator[tuple[str, int, int], None, None]:
"""
Process database rows and yield CSV data chunks.
Args:
result_proxy: SQLAlchemy result proxy
csv_writer: CSV writer instance
buffer: StringIO buffer for CSV data
limit: Maximum number of rows to process, or None for unlimited
decimal_separator: Custom decimal separator (e.g., ",") or None
Yields tuples of (data_chunk, row_count, byte_count).
"""
row_count = 0
@@ -128,7 +169,9 @@ class BaseStreamingCSVExportCommand(BaseCommand):
if limit is not None and row_count >= limit:
break
csv_writer.writerow(row)
# Format values with custom decimal separator if needed
formatted_row = self._format_row_values(row, decimal_separator)
csv_writer.writerow(formatted_row)
row_count += 1
# Check buffer size and flush if needed
@@ -156,6 +199,11 @@ class BaseStreamingCSVExportCommand(BaseCommand):
start_time = time.time()
total_bytes = 0
# Get CSV export configuration
csv_export_config = app.config.get("CSV_EXPORT", {})
delimiter = csv_export_config.get("sep", ",")
decimal_separator = csv_export_config.get("decimal", ".")
with db.session() as session:
# Merge database to prevent DetachedInstanceError
merged_database = session.merge(database)
@@ -170,8 +218,11 @@ class BaseStreamingCSVExportCommand(BaseCommand):
columns = list(result_proxy.keys())
# Use StringIO with csv.writer for proper escaping
# Apply delimiter from CSV_EXPORT config
buffer = io.StringIO()
csv_writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL)
csv_writer = csv.writer(
buffer, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL
)
# Write CSV header
header_data, header_bytes = self._write_csv_header(
@@ -183,7 +234,7 @@ class BaseStreamingCSVExportCommand(BaseCommand):
# Process rows and yield chunks
row_count = 0
for data_chunk, rows_processed, chunk_bytes in self._process_rows(
result_proxy, csv_writer, buffer, limit
result_proxy, csv_writer, buffer, limit, decimal_separator
):
total_bytes += chunk_bytes
row_count = rows_processed

View File

@@ -2788,3 +2788,114 @@ def test_apply_client_processing_csv_format_default_na_behavior():
assert (
"Alice," in lines[2]
) # Second data row should have empty last_name (NA converted to null)
@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
def test_apply_client_processing_csv_format_custom_separator():
"""
Test that apply_client_processing respects CSV_EXPORT config
for custom separator and decimal character.
This is a regression test for GitHub issue #32371.
"""
# CSV data with numeric values
csv_data = "name,value\nAlice,1.5\nBob,2.75"
result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": csv_data,
}
]
}
form_data = {
"datasource": "1__table",
"viz_type": "table",
"slice_id": 1,
"url_params": {},
"metrics": [],
"groupby": [],
"columns": ["name", "value"],
"extra_form_data": {},
"force": False,
"result_format": "csv",
"result_type": "results",
}
processed_result = apply_client_processing(result, form_data)
output_data = processed_result["queries"][0]["data"]
lines = output_data.strip().split("\n")
# With sep=";", columns should be separated by semicolon
assert lines[0] == "name;value"
# With decimal=",", decimal values must use comma as separator.
# Asserting the exact formatted value ensures a regression that drops
# the `decimal` option (so floats keep a dot) will be caught.
assert "Alice;1,5" in lines[1]
assert "Bob;2,75" in lines[2]
# Guard explicitly against the dot form slipping through.
assert "1.5" not in lines[1]
assert "2.75" not in lines[2]
@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
def test_apply_client_processing_csv_pivot_table_custom_separator():
"""
Test that apply_client_processing respects CSV_EXPORT config
for pivot table exports with custom separator and decimal character.
This is a regression test for GitHub issue #32371 - specifically for
pivoted CSV exports which were not respecting the CSV_EXPORT config.
"""
# CSV data with a numeric metric
csv_data = "COUNT(metric)\n1234.56"
result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": csv_data,
}
]
}
form_data = {
"datasource": "1__table",
"viz_type": "pivot_table_v2",
"slice_id": 1,
"url_params": {},
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {"column_name": "metric"},
"expressionType": "SIMPLE",
"label": "COUNT(metric)",
}
],
"metricsLayout": "COLUMNS",
"aggregateFunction": "Sum",
"extra_form_data": {},
"force": False,
"result_format": "csv",
"result_type": "results",
}
processed_result = apply_client_processing(result, form_data)
output_data = processed_result["queries"][0]["data"]
lines = output_data.strip().split("\n")
# After pivoting a single metric with no groupby rows/columns, the
# CSV for the "COUNT(metric)" column and "Total (Sum)" row should
# reflect the CSV_EXPORT config: semicolons as field separators and
# commas as the decimal separator.
assert lines[0] == ";COUNT(metric)"
assert "Total (Sum);1234,56" in lines[1]
# Guard explicitly against the dot form slipping through, which is
# what the previous (broken) implementation produced.
assert "1234.56" not in output_data

View File

@@ -16,6 +16,7 @@
# under the License.
"""Unit tests for SQL Lab Streaming CSV Export Command."""
from decimal import Decimal
from unittest.mock import MagicMock, Mock, patch
import pytest
@@ -538,3 +539,196 @@ def test_null_values_handling(mocker, mock_query):
assert "1,,100" in csv_data
assert "2,test," in csv_data
assert ",," in csv_data
def test_csv_export_config_custom_separator(mocker, mock_query):
"""
Test that streaming CSV export respects CSV_EXPORT config
for custom separator (sep).
This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "name"]
mock_result.fetchmany.side_effect = [
[(1, "Alice"), (2, "Bob")],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
# Mock the app config to use semicolon separator
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# With sep=";", columns should be separated by semicolon
assert "id;name" in csv_data
assert "1;Alice" in csv_data
assert "2;Bob" in csv_data
def test_csv_export_config_custom_decimal(mocker, mock_query):
"""
Test that streaming CSV export respects CSV_EXPORT config
for custom decimal separator.
This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "price"]
mock_result.fetchmany.side_effect = [
[(1, 12.34), (2, 56.78)],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
# Mock the app config to use comma as decimal separator
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# With decimal=",", float values should use comma
assert "12,34" in csv_data
assert "56,78" in csv_data
def test_csv_export_config_combined_sep_and_decimal(mocker, mock_query):
"""
Test that streaming CSV export respects both sep and decimal from CSV_EXPORT.
This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "name", "price"]
mock_result.fetchmany.side_effect = [
[(1, "Widget", 99.99), (2, "Gadget", 149.50)],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
# Mock the app config to use European format
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# Verify header uses semicolon separator
assert "id;name;price" in csv_data
# Verify data uses semicolon separator and comma decimal
assert "1;Widget;99,99" in csv_data
assert "2;Gadget;149,5" in csv_data or "2;Gadget;149,50" in csv_data
def test_csv_export_config_custom_decimal_for_decimal_type(mocker, mock_query):
"""
Streaming CSV export must respect the custom decimal separator for
``decimal.Decimal`` values too — SQLAlchemy commonly returns NUMERIC /
DECIMAL columns as ``Decimal`` rather than ``float``.
Regression test for GitHub issue #32371 / PR #38170 review feedback.
"""
mock_query.select_sql = "SELECT * FROM test"
mock_result = MagicMock()
mock_result.keys.return_value = ["id", "price"]
mock_result.fetchmany.side_effect = [
[(1, Decimal("12.34")), (2, Decimal("56.78"))],
[],
]
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None
mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)
command = StreamingSqlResultExportCommand("test_client_123")
command.validate()
csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)
# Decimal values must be formatted with the custom separator, not left
# with the default ``.`` which would slip through a ``float``-only check.
assert "1;12,34" in csv_data
assert "2;56,78" in csv_data
assert "12.34" not in csv_data
assert "56.78" not in csv_data