diff --git a/superset/utils/excel.py b/superset/utils/excel.py index d34446832a8..46e1a1f071a 100644 --- a/superset/utils/excel.py +++ b/superset/utils/excel.py @@ -56,10 +56,24 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any: def apply_column_types( df: pd.DataFrame, column_types: list[GenericDataType] ) -> pd.DataFrame: + """ + Applies the column types to the dataframe to prepare for an excel export + + :param df: The dataframe to apply the column types to + :param column_types: The types of the columns + :return: The dataframe with the column types applied + """ for column, column_type in zip(df.columns, column_types, strict=False): if column_type == GenericDataType.NUMERIC: try: df[column] = pd.to_numeric(df[column]) + # if the number is too large, convert it to a string + # Excel does not support numbers larger than 10^15 + df[column] = df[column].apply( + lambda x: str(x) + if isinstance(x, (int, float)) and abs(x) > 10**15 + else x + ) except ValueError: df[column] = df[column].astype(str) elif pd.api.types.is_datetime64tz_dtype(df[column]): diff --git a/tests/unit_tests/utils/excel_tests.py b/tests/unit_tests/utils/excel_tests.py index deb6d3d0b4e..b07fe7a4f0d 100644 --- a/tests/unit_tests/utils/excel_tests.py +++ b/tests/unit_tests/utils/excel_tests.py @@ -105,3 +105,27 @@ def test_column_data_types_with_failing_conversion(): assert not is_numeric_dtype(df["col1"]) assert not is_numeric_dtype(df["col2"]) assert not is_numeric_dtype(df["col3"]) + + +def test_column_data_types_with_large_numeric_values(): + df = pd.DataFrame( + { + "big_number": [ + 10**14, + 999999999999999, + 10**15 + 1, + 10**16, + 1100108628127863, + 2**54, + ], + } + ) + apply_column_types(df, [GenericDataType.NUMERIC]) + assert df["big_number"].tolist() == [ + 100000000000000, + 999999999999999, + "1000000000000001", + "10000000000000000", + "1100108628127863", + "18014398509481984", + ]