feat: make sure to quote formulas on Excel export (#31166)

This commit is contained in:
Beto Dealmeida
2024-11-26 18:16:44 -05:00
committed by GitHub
parent 529aed5da1
commit 45668e31fc
2 changed files with 34 additions and 0 deletions

View File

@@ -22,9 +22,30 @@ import pandas as pd
from superset.utils.core import GenericDataType
def quote_formulas(df: pd.DataFrame) -> pd.DataFrame:
"""
Make sure to quote any formulas for security reasons.
"""
formula_prefixes = {"=", "+", "-", "@"}
for col in df.select_dtypes(include="object").columns:
df[col] = df[col].apply(
lambda x: (
f"'{x}"
if isinstance(x, str) and len(x) and x[0] in formula_prefixes
else x
)
)
return df
def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
output = io.BytesIO()
# make sure formulas are quoted, to prevent malicious injections
df = quote_formulas(df)
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
df.to_excel(writer, **kwargs)

View File

@@ -34,6 +34,19 @@ def test_timezone_conversion() -> None:
assert pd.read_excel(contents)["dt"][0] == "2023-01-01 00:00:00+00:00"
def test_quote_formulas() -> None:
"""
Test that formulas are quoted in Excel.
"""
df = pd.DataFrame({"formula": ["=SUM(A1:A2)", "normal", "@SUM(A1:A2)"]})
contents = df_to_excel(df)
assert pd.read_excel(contents)["formula"].tolist() == [
"'=SUM(A1:A2)",
"normal",
"'@SUM(A1:A2)",
]
def test_column_data_types_with_one_numeric_column():
df = pd.DataFrame(
{