mirror of
https://github.com/apache/superset.git
synced 2026-04-18 23:55:00 +00:00
fix(csv upload): Correctly casting to string numbers with floating points (e+) (#35586)
This commit is contained in:
@@ -326,6 +326,26 @@ class CSVReader(BaseDataReader):
|
||||
CSVReader._cast_single_column(df, column, dtype, kwargs)
|
||||
return df
|
||||
|
||||
@staticmethod
|
||||
def _split_types(types: dict[str, str]) -> tuple[dict[str, str], dict[str, str]]:
|
||||
"""
|
||||
Split column data types into custom and pandas-native types.
|
||||
|
||||
:param types: Dictionary mapping column names to data types
|
||||
:return: Tuple of (custom_types, pandas_types) dictionaries
|
||||
"""
|
||||
pandas_types = {
|
||||
col: dtype
|
||||
for col, dtype in types.items()
|
||||
if dtype in ("str", "object", "string")
|
||||
}
|
||||
custom_types = {
|
||||
col: dtype
|
||||
for col, dtype in types.items()
|
||||
if dtype not in ("str", "object", "string")
|
||||
}
|
||||
return custom_types, pandas_types
|
||||
|
||||
@staticmethod
|
||||
def _read_csv( # noqa: C901
|
||||
file: FileStorage,
|
||||
@@ -357,7 +377,17 @@ class CSVReader(BaseDataReader):
|
||||
kwargs["low_memory"] = False
|
||||
|
||||
try:
|
||||
types = kwargs.pop("dtype", None)
|
||||
types = None
|
||||
if "dtype" in kwargs and kwargs["dtype"]:
|
||||
custom_types, pandas_types = CSVReader._split_types(kwargs["dtype"])
|
||||
if pandas_types:
|
||||
kwargs["dtype"] = pandas_types
|
||||
else:
|
||||
kwargs.pop("dtype", None)
|
||||
|
||||
# Custom types for our manual casting
|
||||
types = custom_types if custom_types else None
|
||||
|
||||
if "chunksize" in kwargs:
|
||||
chunks = []
|
||||
total_rows = 0
|
||||
|
||||
Reference in New Issue
Block a user