refactor: upload data unification, less permissions and less endpoints (#31959)

2026-04-20 00:24:38 +00:00 · 2025-01-28 11:09:55 +00:00
parent 09c1987de4
commit 1b375b715c
14 changed files with 312 additions and 489 deletions
--- a/superset/databases/schemas.py
+++ b/superset/databases/schemas.py
@@ -45,6 +45,7 @@ from superset.commands.database.ssh_tunnel.exceptions import (
    SSHTunnelInvalidCredentials,
    SSHTunnelMissingCredentials,
 )
+from superset.commands.database.uploaders.base import UploadFileType
 from superset.constants import PASSWORD_MASK
 from superset.databases.types import (  # pylint:disable=unused-import
    EncryptedDict,  # noqa: F401
@@ -1081,20 +1082,22 @@ class DelimitedListField(fields.List):
            ) from exc


-class BaseUploadFilePostSchema(Schema):
-    _extension_config_key = ""
-
+class BaseUploadFilePostSchemaMixin(Schema):
    @validates("file")
    def validate_file_extension(self, file: FileStorage) -> None:
-        allowed_extensions = current_app.config["ALLOWED_EXTENSIONS"].intersection(
-            current_app.config[self._extension_config_key]
-        )
+        allowed_extensions = current_app.config["ALLOWED_EXTENSIONS"]
        file_suffix = Path(file.filename).suffix
        if not file_suffix or file_suffix[1:] not in allowed_extensions:
            raise ValidationError([_("File extension is not allowed.")])


-class BaseUploadPostSchema(BaseUploadFilePostSchema):
+class UploadPostSchema(BaseUploadFilePostSchemaMixin):
+    type = fields.Enum(
+        UploadFileType,
+        required=True,
+        by_value=True,
+        metadata={"description": "File type to upload"},
+    )
    already_exists = fields.String(
        load_default="fail",
        validate=OneOf(choices=("fail", "replace", "append")),
@@ -1123,43 +1126,26 @@ class BaseUploadPostSchema(BaseUploadFilePostSchema):
        metadata={"description": "The name of the table to be created/appended"},
    )

-
-class ColumnarUploadPostSchema(BaseUploadPostSchema):
-    """
-    Schema for Columnar Upload
-    """
-
-    _extension_config_key = "COLUMNAR_EXTENSIONS"
-
+    # ------------
+    # CSV Schema
+    # ------------
    file = fields.Raw(
        required=True,
        metadata={
-            "description": "The Columnar file to upload",
-            "type": "string",
-            "format": "binary",
-        },
-    )
-
-
-class CSVUploadPostSchema(BaseUploadPostSchema):
-    """
-    Schema for CSV Upload
-    """
-
-    _extension_config_key = "CSV_EXTENSIONS"
-
-    file = fields.Raw(
-        required=True,
-        metadata={
-            "description": "The CSV file to upload",
+            "description": "The file to upload",
            "type": "string",
            "format": "text/csv",
        },
    )
-    delimiter = fields.String(metadata={"description": "The delimiter of the CSV file"})
+    delimiter = fields.String(
+        metadata={
+            "description": "[CSV only] The character used to separate values in the CSV"
+            " file (e.g., a comma, semicolon, or tab)."
+        }
+    )
    column_data_types = fields.String(
        metadata={
-            "description": "A dictionary with column names and "
+            "description": "[CSV only] A dictionary with column names and "
            "their data types if you need to change "
            "the defaults. Example: {'user_id':'int'}. "
            "Check Python Pandas library for supported data types"
@@ -1167,57 +1153,69 @@ class CSVUploadPostSchema(BaseUploadPostSchema):
    )
    day_first = fields.Boolean(
        metadata={
-            "description": "DD/MM format dates, international and European format"
+            "description": "[CSV only] DD/MM format dates, international and European"
+            " format"
        }
    )
    skip_blank_lines = fields.Boolean(
-        metadata={"description": "Skip blank lines in the CSV file."}
+        metadata={"description": "[CSV only] Skip blank lines in the CSV file."}
    )
    skip_initial_space = fields.Boolean(
-        metadata={"description": "Skip spaces after delimiter."}
+        metadata={"description": "[CSV only] Skip spaces after delimiter."}
    )
    column_dates = DelimitedListField(
        fields.String(),
        metadata={
-            "description": "A list of column names that should be "
+            "description": "[CSV and Excel only] A list of column names that should be "
            "parsed as dates. Example: date,timestamp"
        },
    )
    decimal_character = fields.String(
        metadata={
-            "description": "Character to recognize as decimal point. Default is '.'"
+            "description": "[CSV and Excel only] Character to recognize as decimal"
+            " point. Default is '.'"
        }
    )
    header_row = fields.Integer(
        metadata={
-            "description": "Row containing the headers to use as column names"
-            "(0 is first line of data). Leave empty if there is no header row."
+            "description": "[CSV and Excel only] Row containing the headers to use as"
+            " column names (0 is first line of data). Leave empty if"
+            " there is no header row."
        }
    )
    index_column = fields.String(
        metadata={
-            "description": "Column to use as the row labels of the dataframe. "
-            "Leave empty if no index column"
+            "description": "[CSV and Excel only] Column to use as the row labels of the"
+            " dataframe. Leave empty if no index column"
        }
    )
    null_values = DelimitedListField(
        fields.String(),
        metadata={
-            "description": "A list of strings that should be treated as null. "
-            "Examples: '' for empty strings, 'None', 'N/A',"
-            "Warning: Hive database supports only a single value"
+            "description": "[CSV and Excel only] A list of strings that should be "
+            "treated as null. Examples: '' for empty strings, 'None',"
+            " 'N/A', Warning: Hive database supports only a single value"
        },
    )
    rows_to_read = fields.Integer(
        metadata={
-            "description": "Number of rows to read from the file. "
+            "description": "[CSV and Excel only] Number of rows to read from the file. "
            "If None, reads all rows."
        },
        allow_none=True,
        validate=Range(min=1),
    )
    skip_rows = fields.Integer(
-        metadata={"description": "Number of rows to skip at start of file."}
+        metadata={
+            "description": "[CSV and Excel only] Number of rows to skip at start"
+            " of file."
+        }
+    )
+    sheet_name = fields.String(
+        metadata={
+            "description": "[Excel only]] Strings used for sheet names "
+            "(default is the first sheet)."
+        }
    )

    @post_load
@@ -1234,79 +1232,17 @@ class CSVUploadPostSchema(BaseUploadPostSchema):
        return data


-class ExcelUploadPostSchema(BaseUploadPostSchema):
+class UploadFileMetadataPostSchema(BaseUploadFilePostSchemaMixin):
    """
-    Schema for Excel Upload
+    Schema for Upload file metadata.
    """

-    _extension_config_key = "EXCEL_EXTENSIONS"
-
-    file = fields.Raw(
+    type = fields.Enum(
+        UploadFileType,
        required=True,
-        metadata={
-            "description": "The Excel file to upload",
-            "type": "string",
-            "format": "binary",
-        },
+        by_value=True,
+        metadata={"description": "File type to upload"},
    )
-    sheet_name = fields.String(
-        metadata={
-            "description": "Strings used for sheet names "
-            "(default is the first sheet)."
-        }
-    )
-    column_dates = DelimitedListField(
-        fields.String(),
-        metadata={
-            "description": "A list of column names that should be "
-            "parsed as dates. Example: date,timestamp"
-        },
-    )
-    decimal_character = fields.String(
-        metadata={
-            "description": "Character to recognize as decimal point. Default is '.'"
-        }
-    )
-    header_row = fields.Integer(
-        metadata={
-            "description": "Row containing the headers to use as column names"
-            "(0 is first line of data). Leave empty if there is no header row."
-        }
-    )
-    index_column = fields.String(
-        metadata={
-            "description": "Column to use as the row labels of the dataframe. "
-            "Leave empty if no index column"
-        }
-    )
-    null_values = DelimitedListField(
-        fields.String(),
-        metadata={
-            "description": "A list of strings that should be treated as null. "
-            "Examples: '' for empty strings, 'None', 'N/A',"
-            "Warning: Hive database supports only a single value"
-        },
-    )
-    rows_to_read = fields.Integer(
-        metadata={
-            "description": "Number of rows to read from the file. "
-            "If None, reads all rows."
-        },
-        allow_none=True,
-        validate=Range(min=1),
-    )
-    skip_rows = fields.Integer(
-        metadata={"description": "Number of rows to skip at start of file."}
-    )
-
-
-class CSVMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
-    """
-    Schema for CSV metadata.
-    """
-
-    _extension_config_key = "CSV_EXTENSIONS"
-
    file = fields.Raw(
        required=True,
        metadata={
@@ -1315,30 +1251,12 @@ class CSVMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
            "format": "binary",
        },
    )
-    delimiter = fields.String(metadata={"description": "The delimiter of the CSV file"})
-    header_row = fields.Integer(
+    delimiter = fields.String(
        metadata={
-            "description": "Row containing the headers to use as column names"
-            "(0 is first line of data). Leave empty if there is no header row."
+            "description": "The character used to separate values in the CSV file"
+            " (e.g., a comma, semicolon, or tab)."
        }
    )
-
-
-class ExcelMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
-    """
-    Schema for CSV metadata.
-    """
-
-    _extension_config_key = "EXCEL_EXTENSIONS"
-
-    file = fields.Raw(
-        required=True,
-        metadata={
-            "description": "The file to upload",
-            "type": "string",
-            "format": "binary",
-        },
-    )
    header_row = fields.Integer(
        metadata={
            "description": "Row containing the headers to use as column names"
@@ -1347,23 +1265,6 @@ class ExcelMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
    )


-class ColumnarMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
-    """
-    Schema for CSV metadata.
-    """
-
-    _extension_config_key = "COLUMNAR_EXTENSIONS"
-
-    file = fields.Raw(
-        required=True,
-        metadata={
-            "description": "The file to upload",
-            "type": "string",
-            "format": "binary",
-        },
-    )
-
-
 class UploadFileMetadataItemSchema(Schema):
    sheet_name = fields.String(metadata={"description": "The name of the sheet"})
    column_names = fields.List(