mirror of
https://github.com/apache/superset.git
synced 2026-04-30 21:44:40 +00:00
Compare commits
2 Commits
fix/check-
...
fix-32371-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
13a4ca9699 | ||
|
|
07b0be630b |
@@ -389,7 +389,9 @@ def apply_client_processing( # noqa: C901
|
||||
query["data"] = processed_df.to_dict()
|
||||
elif query["result_format"] == ChartDataResultFormat.CSV:
|
||||
buf = StringIO()
|
||||
processed_df.to_csv(buf, index=show_default_index)
|
||||
# Apply CSV_EXPORT config for consistent CSV formatting
|
||||
csv_export_config = current_app.config.get("CSV_EXPORT", {})
|
||||
processed_df.to_csv(buf, index=show_default_index, **csv_export_config)
|
||||
buf.seek(0)
|
||||
query["data"] = buf.getvalue()
|
||||
|
||||
|
||||
@@ -24,6 +24,8 @@ import logging
|
||||
import time
|
||||
from abc import abstractmethod
|
||||
from contextlib import contextmanager
|
||||
from decimal import Decimal
|
||||
from numbers import Real
|
||||
from typing import Any, Callable, Generator
|
||||
|
||||
from flask import current_app as app, g, has_app_context
|
||||
@@ -107,16 +109,55 @@ class BaseStreamingCSVExportCommand(BaseCommand):
|
||||
buffer.truncate()
|
||||
return header_data, total_bytes
|
||||
|
||||
def _format_row_values(
|
||||
self, row: tuple[Any, ...], decimal_separator: str | None
|
||||
) -> list[Any]:
|
||||
"""
|
||||
Format row values, applying custom decimal separator if specified.
|
||||
|
||||
Args:
|
||||
row: Database row as a tuple
|
||||
decimal_separator: Custom decimal separator (e.g., ",") or None
|
||||
|
||||
Returns:
|
||||
List of formatted values
|
||||
"""
|
||||
if not decimal_separator or decimal_separator == ".":
|
||||
return list(row)
|
||||
|
||||
formatted: list[Any] = []
|
||||
for value in row:
|
||||
# Apply the custom decimal separator to any real numeric value
|
||||
# (float, decimal.Decimal, numpy numeric types, ...). Booleans are
|
||||
# technically a numeric type in Python but should never be rewritten
|
||||
# as numbers in CSV output.
|
||||
if isinstance(value, bool):
|
||||
formatted.append(value)
|
||||
elif isinstance(value, (float, Decimal, Real)):
|
||||
# Format numeric values with custom decimal separator
|
||||
formatted.append(str(value).replace(".", decimal_separator))
|
||||
else:
|
||||
formatted.append(value)
|
||||
return formatted
|
||||
|
||||
def _process_rows(
|
||||
self,
|
||||
result_proxy: Any,
|
||||
csv_writer: Any,
|
||||
buffer: io.StringIO,
|
||||
limit: int | None,
|
||||
decimal_separator: str | None = None,
|
||||
) -> Generator[tuple[str, int, int], None, None]:
|
||||
"""
|
||||
Process database rows and yield CSV data chunks.
|
||||
|
||||
Args:
|
||||
result_proxy: SQLAlchemy result proxy
|
||||
csv_writer: CSV writer instance
|
||||
buffer: StringIO buffer for CSV data
|
||||
limit: Maximum number of rows to process, or None for unlimited
|
||||
decimal_separator: Custom decimal separator (e.g., ",") or None
|
||||
|
||||
Yields tuples of (data_chunk, row_count, byte_count).
|
||||
"""
|
||||
row_count = 0
|
||||
@@ -128,7 +169,9 @@ class BaseStreamingCSVExportCommand(BaseCommand):
|
||||
if limit is not None and row_count >= limit:
|
||||
break
|
||||
|
||||
csv_writer.writerow(row)
|
||||
# Format values with custom decimal separator if needed
|
||||
formatted_row = self._format_row_values(row, decimal_separator)
|
||||
csv_writer.writerow(formatted_row)
|
||||
row_count += 1
|
||||
|
||||
# Check buffer size and flush if needed
|
||||
@@ -156,6 +199,11 @@ class BaseStreamingCSVExportCommand(BaseCommand):
|
||||
start_time = time.time()
|
||||
total_bytes = 0
|
||||
|
||||
# Get CSV export configuration
|
||||
csv_export_config = app.config.get("CSV_EXPORT", {})
|
||||
delimiter = csv_export_config.get("sep", ",")
|
||||
decimal_separator = csv_export_config.get("decimal", ".")
|
||||
|
||||
with db.session() as session:
|
||||
# Merge database to prevent DetachedInstanceError
|
||||
merged_database = session.merge(database)
|
||||
@@ -170,8 +218,11 @@ class BaseStreamingCSVExportCommand(BaseCommand):
|
||||
columns = list(result_proxy.keys())
|
||||
|
||||
# Use StringIO with csv.writer for proper escaping
|
||||
# Apply delimiter from CSV_EXPORT config
|
||||
buffer = io.StringIO()
|
||||
csv_writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL)
|
||||
csv_writer = csv.writer(
|
||||
buffer, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL
|
||||
)
|
||||
|
||||
# Write CSV header
|
||||
header_data, header_bytes = self._write_csv_header(
|
||||
@@ -183,7 +234,7 @@ class BaseStreamingCSVExportCommand(BaseCommand):
|
||||
# Process rows and yield chunks
|
||||
row_count = 0
|
||||
for data_chunk, rows_processed, chunk_bytes in self._process_rows(
|
||||
result_proxy, csv_writer, buffer, limit
|
||||
result_proxy, csv_writer, buffer, limit, decimal_separator
|
||||
):
|
||||
total_bytes += chunk_bytes
|
||||
row_count = rows_processed
|
||||
|
||||
@@ -2788,3 +2788,114 @@ def test_apply_client_processing_csv_format_default_na_behavior():
|
||||
assert (
|
||||
"Alice," in lines[2]
|
||||
) # Second data row should have empty last_name (NA converted to null)
|
||||
|
||||
|
||||
@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
|
||||
def test_apply_client_processing_csv_format_custom_separator():
|
||||
"""
|
||||
Test that apply_client_processing respects CSV_EXPORT config
|
||||
for custom separator and decimal character.
|
||||
|
||||
This is a regression test for GitHub issue #32371.
|
||||
"""
|
||||
# CSV data with numeric values
|
||||
csv_data = "name,value\nAlice,1.5\nBob,2.75"
|
||||
|
||||
result = {
|
||||
"queries": [
|
||||
{
|
||||
"result_format": ChartDataResultFormat.CSV,
|
||||
"data": csv_data,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
form_data = {
|
||||
"datasource": "1__table",
|
||||
"viz_type": "table",
|
||||
"slice_id": 1,
|
||||
"url_params": {},
|
||||
"metrics": [],
|
||||
"groupby": [],
|
||||
"columns": ["name", "value"],
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "csv",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
processed_result = apply_client_processing(result, form_data)
|
||||
|
||||
output_data = processed_result["queries"][0]["data"]
|
||||
lines = output_data.strip().split("\n")
|
||||
|
||||
# With sep=";", columns should be separated by semicolon
|
||||
assert lines[0] == "name;value"
|
||||
# With decimal=",", decimal values must use comma as separator.
|
||||
# Asserting the exact formatted value ensures a regression that drops
|
||||
# the `decimal` option (so floats keep a dot) will be caught.
|
||||
assert "Alice;1,5" in lines[1]
|
||||
assert "Bob;2,75" in lines[2]
|
||||
# Guard explicitly against the dot form slipping through.
|
||||
assert "1.5" not in lines[1]
|
||||
assert "2.75" not in lines[2]
|
||||
|
||||
|
||||
@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
|
||||
def test_apply_client_processing_csv_pivot_table_custom_separator():
|
||||
"""
|
||||
Test that apply_client_processing respects CSV_EXPORT config
|
||||
for pivot table exports with custom separator and decimal character.
|
||||
|
||||
This is a regression test for GitHub issue #32371 - specifically for
|
||||
pivoted CSV exports which were not respecting the CSV_EXPORT config.
|
||||
"""
|
||||
# CSV data with a numeric metric
|
||||
csv_data = "COUNT(metric)\n1234.56"
|
||||
|
||||
result = {
|
||||
"queries": [
|
||||
{
|
||||
"result_format": ChartDataResultFormat.CSV,
|
||||
"data": csv_data,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
form_data = {
|
||||
"datasource": "1__table",
|
||||
"viz_type": "pivot_table_v2",
|
||||
"slice_id": 1,
|
||||
"url_params": {},
|
||||
"groupbyColumns": [],
|
||||
"groupbyRows": [],
|
||||
"metrics": [
|
||||
{
|
||||
"aggregate": "COUNT",
|
||||
"column": {"column_name": "metric"},
|
||||
"expressionType": "SIMPLE",
|
||||
"label": "COUNT(metric)",
|
||||
}
|
||||
],
|
||||
"metricsLayout": "COLUMNS",
|
||||
"aggregateFunction": "Sum",
|
||||
"extra_form_data": {},
|
||||
"force": False,
|
||||
"result_format": "csv",
|
||||
"result_type": "results",
|
||||
}
|
||||
|
||||
processed_result = apply_client_processing(result, form_data)
|
||||
|
||||
output_data = processed_result["queries"][0]["data"]
|
||||
lines = output_data.strip().split("\n")
|
||||
|
||||
# After pivoting a single metric with no groupby rows/columns, the
|
||||
# CSV for the "COUNT(metric)" column and "Total (Sum)" row should
|
||||
# reflect the CSV_EXPORT config: semicolons as field separators and
|
||||
# commas as the decimal separator.
|
||||
assert lines[0] == ";COUNT(metric)"
|
||||
assert "Total (Sum);1234,56" in lines[1]
|
||||
# Guard explicitly against the dot form slipping through, which is
|
||||
# what the previous (broken) implementation produced.
|
||||
assert "1234.56" not in output_data
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
# under the License.
|
||||
"""Unit tests for SQL Lab Streaming CSV Export Command."""
|
||||
|
||||
from decimal import Decimal
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
@@ -538,3 +539,196 @@ def test_null_values_handling(mocker, mock_query):
|
||||
assert "1,,100" in csv_data
|
||||
assert "2,test," in csv_data
|
||||
assert ",," in csv_data
|
||||
|
||||
|
||||
def test_csv_export_config_custom_separator(mocker, mock_query):
|
||||
"""
|
||||
Test that streaming CSV export respects CSV_EXPORT config
|
||||
for custom separator (sep).
|
||||
|
||||
This is a regression test for GitHub issue #32371.
|
||||
"""
|
||||
mock_query.select_sql = "SELECT * FROM test"
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.keys.return_value = ["id", "name"]
|
||||
mock_result.fetchmany.side_effect = [
|
||||
[(1, "Alice"), (2, "Bob")],
|
||||
[],
|
||||
]
|
||||
|
||||
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
|
||||
|
||||
mock_connection = MagicMock()
|
||||
mock_connection.execution_options.return_value.execute.return_value = mock_result
|
||||
mock_connection.__enter__.return_value = mock_connection
|
||||
mock_connection.__exit__.return_value = None
|
||||
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.connect.return_value = mock_connection
|
||||
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
|
||||
mock_engine
|
||||
)
|
||||
|
||||
# Mock the app config to use semicolon separator
|
||||
mocker.patch(
|
||||
"superset.commands.streaming_export.base.app.config.get",
|
||||
return_value={"sep": ";", "encoding": "utf-8"},
|
||||
)
|
||||
|
||||
command = StreamingSqlResultExportCommand("test_client_123")
|
||||
command.validate()
|
||||
|
||||
csv_generator_callable = command.run()
|
||||
generator = csv_generator_callable()
|
||||
csv_data = "".join(generator)
|
||||
|
||||
# With sep=";", columns should be separated by semicolon
|
||||
assert "id;name" in csv_data
|
||||
assert "1;Alice" in csv_data
|
||||
assert "2;Bob" in csv_data
|
||||
|
||||
|
||||
def test_csv_export_config_custom_decimal(mocker, mock_query):
|
||||
"""
|
||||
Test that streaming CSV export respects CSV_EXPORT config
|
||||
for custom decimal separator.
|
||||
|
||||
This is a regression test for GitHub issue #32371.
|
||||
"""
|
||||
mock_query.select_sql = "SELECT * FROM test"
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.keys.return_value = ["id", "price"]
|
||||
mock_result.fetchmany.side_effect = [
|
||||
[(1, 12.34), (2, 56.78)],
|
||||
[],
|
||||
]
|
||||
|
||||
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
|
||||
|
||||
mock_connection = MagicMock()
|
||||
mock_connection.execution_options.return_value.execute.return_value = mock_result
|
||||
mock_connection.__enter__.return_value = mock_connection
|
||||
mock_connection.__exit__.return_value = None
|
||||
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.connect.return_value = mock_connection
|
||||
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
|
||||
mock_engine
|
||||
)
|
||||
|
||||
# Mock the app config to use comma as decimal separator
|
||||
mocker.patch(
|
||||
"superset.commands.streaming_export.base.app.config.get",
|
||||
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
|
||||
)
|
||||
|
||||
command = StreamingSqlResultExportCommand("test_client_123")
|
||||
command.validate()
|
||||
|
||||
csv_generator_callable = command.run()
|
||||
generator = csv_generator_callable()
|
||||
csv_data = "".join(generator)
|
||||
|
||||
# With decimal=",", float values should use comma
|
||||
assert "12,34" in csv_data
|
||||
assert "56,78" in csv_data
|
||||
|
||||
|
||||
def test_csv_export_config_combined_sep_and_decimal(mocker, mock_query):
|
||||
"""
|
||||
Test that streaming CSV export respects both sep and decimal from CSV_EXPORT.
|
||||
|
||||
This is a regression test for GitHub issue #32371.
|
||||
"""
|
||||
mock_query.select_sql = "SELECT * FROM test"
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.keys.return_value = ["id", "name", "price"]
|
||||
mock_result.fetchmany.side_effect = [
|
||||
[(1, "Widget", 99.99), (2, "Gadget", 149.50)],
|
||||
[],
|
||||
]
|
||||
|
||||
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
|
||||
|
||||
mock_connection = MagicMock()
|
||||
mock_connection.execution_options.return_value.execute.return_value = mock_result
|
||||
mock_connection.__enter__.return_value = mock_connection
|
||||
mock_connection.__exit__.return_value = None
|
||||
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.connect.return_value = mock_connection
|
||||
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
|
||||
mock_engine
|
||||
)
|
||||
|
||||
# Mock the app config to use European format
|
||||
mocker.patch(
|
||||
"superset.commands.streaming_export.base.app.config.get",
|
||||
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
|
||||
)
|
||||
|
||||
command = StreamingSqlResultExportCommand("test_client_123")
|
||||
command.validate()
|
||||
|
||||
csv_generator_callable = command.run()
|
||||
generator = csv_generator_callable()
|
||||
csv_data = "".join(generator)
|
||||
|
||||
# Verify header uses semicolon separator
|
||||
assert "id;name;price" in csv_data
|
||||
# Verify data uses semicolon separator and comma decimal
|
||||
assert "1;Widget;99,99" in csv_data
|
||||
assert "2;Gadget;149,5" in csv_data or "2;Gadget;149,50" in csv_data
|
||||
|
||||
|
||||
def test_csv_export_config_custom_decimal_for_decimal_type(mocker, mock_query):
|
||||
"""
|
||||
Streaming CSV export must respect the custom decimal separator for
|
||||
``decimal.Decimal`` values too — SQLAlchemy commonly returns NUMERIC /
|
||||
DECIMAL columns as ``Decimal`` rather than ``float``.
|
||||
|
||||
Regression test for GitHub issue #32371 / PR #38170 review feedback.
|
||||
"""
|
||||
mock_query.select_sql = "SELECT * FROM test"
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.keys.return_value = ["id", "price"]
|
||||
mock_result.fetchmany.side_effect = [
|
||||
[(1, Decimal("12.34")), (2, Decimal("56.78"))],
|
||||
[],
|
||||
]
|
||||
|
||||
mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
|
||||
|
||||
mock_connection = MagicMock()
|
||||
mock_connection.execution_options.return_value.execute.return_value = mock_result
|
||||
mock_connection.__enter__.return_value = mock_connection
|
||||
mock_connection.__exit__.return_value = None
|
||||
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.connect.return_value = mock_connection
|
||||
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
|
||||
mock_engine
|
||||
)
|
||||
|
||||
mocker.patch(
|
||||
"superset.commands.streaming_export.base.app.config.get",
|
||||
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
|
||||
)
|
||||
|
||||
command = StreamingSqlResultExportCommand("test_client_123")
|
||||
command.validate()
|
||||
|
||||
csv_generator_callable = command.run()
|
||||
generator = csv_generator_callable()
|
||||
csv_data = "".join(generator)
|
||||
|
||||
# Decimal values must be formatted with the custom separator, not left
|
||||
# with the default ``.`` which would slip through a ``float``-only check.
|
||||
assert "1;12,34" in csv_data
|
||||
assert "2;56,78" in csv_data
|
||||
assert "12.34" not in csv_data
|
||||
assert "56.78" not in csv_data
|
||||
|
||||
Reference in New Issue
Block a user