fix(csv upload): Correctly casting to string numbers with floating points (e+) (#35586)

This commit is contained in:
Luiz Otavio
2025-10-10 15:01:03 -03:00
committed by GitHub
parent de0bd37a66
commit 17ebbdd966
2 changed files with 58 additions and 1 deletions

View File

@@ -326,6 +326,26 @@ class CSVReader(BaseDataReader):
CSVReader._cast_single_column(df, column, dtype, kwargs)
return df
@staticmethod
def _split_types(types: dict[str, str]) -> tuple[dict[str, str], dict[str, str]]:
"""
Split column data types into custom and pandas-native types.
:param types: Dictionary mapping column names to data types
:return: Tuple of (custom_types, pandas_types) dictionaries
"""
pandas_types = {
col: dtype
for col, dtype in types.items()
if dtype in ("str", "object", "string")
}
custom_types = {
col: dtype
for col, dtype in types.items()
if dtype not in ("str", "object", "string")
}
return custom_types, pandas_types
@staticmethod
def _read_csv( # noqa: C901
file: FileStorage,
@@ -357,7 +377,17 @@ class CSVReader(BaseDataReader):
kwargs["low_memory"] = False
try:
types = kwargs.pop("dtype", None)
types = None
if "dtype" in kwargs and kwargs["dtype"]:
custom_types, pandas_types = CSVReader._split_types(kwargs["dtype"])
if pandas_types:
kwargs["dtype"] = pandas_types
else:
kwargs.pop("dtype", None)
# Custom types for our manual casting
types = custom_types if custom_types else None
if "chunksize" in kwargs:
chunks = []
total_rows = 0