fix(csv upload): Correctly casting to string numbers with floating points (e+) (#35586)

2026-04-18 23:55:00 +00:00 · 2025-10-10 15:01:03 -03:00
parent de0bd37a66
commit 17ebbdd966
2 changed files with 58 additions and 1 deletions
--- a/superset/commands/database/uploaders/csv_reader.py
+++ b/superset/commands/database/uploaders/csv_reader.py
@@ -326,6 +326,26 @@ class CSVReader(BaseDataReader):
            CSVReader._cast_single_column(df, column, dtype, kwargs)
        return df

+    @staticmethod
+    def _split_types(types: dict[str, str]) -> tuple[dict[str, str], dict[str, str]]:
+        """
+        Split column data types into custom and pandas-native types.
+
+        :param types: Dictionary mapping column names to data types
+        :return: Tuple of (custom_types, pandas_types) dictionaries
+        """
+        pandas_types = {
+            col: dtype
+            for col, dtype in types.items()
+            if dtype in ("str", "object", "string")
+        }
+        custom_types = {
+            col: dtype
+            for col, dtype in types.items()
+            if dtype not in ("str", "object", "string")
+        }
+        return custom_types, pandas_types
+
    @staticmethod
    def _read_csv(  # noqa: C901
        file: FileStorage,
@@ -357,7 +377,17 @@ class CSVReader(BaseDataReader):
        kwargs["low_memory"] = False

        try:
-            types = kwargs.pop("dtype", None)
+            types = None
+            if "dtype" in kwargs and kwargs["dtype"]:
+                custom_types, pandas_types = CSVReader._split_types(kwargs["dtype"])
+                if pandas_types:
+                    kwargs["dtype"] = pandas_types
+                else:
+                    kwargs.pop("dtype", None)
+
+                # Custom types for our manual casting
+                types = custom_types if custom_types else None
+
            if "chunksize" in kwargs:
                chunks = []
                total_rows = 0