mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
feat: make sure to quote formulas on Excel export (#31166)
This commit is contained in:
@@ -22,9 +22,30 @@ import pandas as pd
|
||||
from superset.utils.core import GenericDataType
|
||||
|
||||
|
||||
def quote_formulas(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Make sure to quote any formulas for security reasons.
|
||||
"""
|
||||
formula_prefixes = {"=", "+", "-", "@"}
|
||||
|
||||
for col in df.select_dtypes(include="object").columns:
|
||||
df[col] = df[col].apply(
|
||||
lambda x: (
|
||||
f"'{x}"
|
||||
if isinstance(x, str) and len(x) and x[0] in formula_prefixes
|
||||
else x
|
||||
)
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
|
||||
output = io.BytesIO()
|
||||
|
||||
# make sure formulas are quoted, to prevent malicious injections
|
||||
df = quote_formulas(df)
|
||||
|
||||
# pylint: disable=abstract-class-instantiated
|
||||
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
|
||||
df.to_excel(writer, **kwargs)
|
||||
|
||||
@@ -34,6 +34,19 @@ def test_timezone_conversion() -> None:
|
||||
assert pd.read_excel(contents)["dt"][0] == "2023-01-01 00:00:00+00:00"
|
||||
|
||||
|
||||
def test_quote_formulas() -> None:
|
||||
"""
|
||||
Test that formulas are quoted in Excel.
|
||||
"""
|
||||
df = pd.DataFrame({"formula": ["=SUM(A1:A2)", "normal", "@SUM(A1:A2)"]})
|
||||
contents = df_to_excel(df)
|
||||
assert pd.read_excel(contents)["formula"].tolist() == [
|
||||
"'=SUM(A1:A2)",
|
||||
"normal",
|
||||
"'@SUM(A1:A2)",
|
||||
]
|
||||
|
||||
|
||||
def test_column_data_types_with_one_numeric_column():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user