mirror of
https://github.com/apache/superset.git
synced 2026-04-18 07:35:09 +00:00
fix(csv-upload): log detailed errors during chunk concatenation for debugging (#36108)
This commit is contained in:
committed by
GitHub
parent
fb7d0e0e3d
commit
9d06a5888f
@@ -1352,3 +1352,68 @@ def test_csv_reader_progressive_encoding_detection():
|
||||
|
||||
# Test that the method handles the sample sizes properly
|
||||
assert all(size > 0 for size in read_sizes), "All sample sizes should be positive"
|
||||
|
||||
|
||||
def test_csv_reader_chunk_concatenation_error_logging():
|
||||
"""Test that pd.concat errors during chunking are logged and re-raised."""
|
||||
from unittest.mock import patch
|
||||
|
||||
# Create a large CSV that will trigger chunking (>100k rows)
|
||||
large_data = [["col1", "col2"]]
|
||||
for i in range(100001):
|
||||
large_data.append([f"val{i}", str(i)])
|
||||
|
||||
csv_reader = CSVReader(options=CSVReaderOptions())
|
||||
|
||||
# Mock pd.concat to raise an exception
|
||||
with patch(
|
||||
"superset.commands.database.uploaders.csv_reader.pd.concat"
|
||||
) as mock_concat:
|
||||
mock_concat.side_effect = ValueError(
|
||||
"Cannot concatenate chunks with different dtypes"
|
||||
)
|
||||
|
||||
with pytest.raises(DatabaseUploadFailed) as exc_info:
|
||||
csv_reader.file_to_dataframe(create_csv_file(large_data))
|
||||
|
||||
# Verify the exception is still raised (wrapped as DatabaseUploadFailed)
|
||||
assert "Cannot concatenate chunks with different dtypes" in str(exc_info.value)
|
||||
|
||||
# Verify concat was called (meaning chunking happened)
|
||||
assert mock_concat.called
|
||||
|
||||
|
||||
def test_csv_reader_chunk_concatenation_error_warning(caplog):
|
||||
"""Test that pd.concat errors during chunking log a warning message."""
|
||||
from unittest.mock import patch
|
||||
|
||||
# Create a large CSV that will trigger chunking (>100k rows)
|
||||
large_data = [["col1", "col2"]]
|
||||
for i in range(100001):
|
||||
large_data.append([f"val{i}", str(i)])
|
||||
|
||||
csv_reader = CSVReader(options=CSVReaderOptions())
|
||||
|
||||
# Mock pd.concat to raise an exception
|
||||
with patch(
|
||||
"superset.commands.database.uploaders.csv_reader.pd.concat"
|
||||
) as mock_concat:
|
||||
mock_concat.side_effect = ValueError(
|
||||
"Cannot concatenate chunks with different dtypes"
|
||||
)
|
||||
|
||||
import logging
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
with pytest.raises(DatabaseUploadFailed):
|
||||
csv_reader.file_to_dataframe(create_csv_file(large_data))
|
||||
|
||||
# Verify warning was logged
|
||||
assert any(
|
||||
"Error concatenating CSV chunks" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
assert any(
|
||||
"inconsistent date parsing across chunks" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user