mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
feat: Add parquet upload (#14449)
* allow csv upload to accept parquet file * fix mypy * fix if statement * add test for specificying columns in CSV upload * clean up test * change order in test * fix failures * upload parquet to seperate table in test * fix error message * fix mypy again * rename other extensions to columnar * add new form for columnar upload * add support for zip files * undo csv form changes except usecols * add more tests for zip * isort & black * pylint * fix trailing space * address more review comments * pylint * black * resolve remaining issues
This commit is contained in:
committed by
GitHub
parent
ad8336a5b4
commit
d25b0967a1
@@ -21,7 +21,13 @@ from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
|
||||
from flask_appbuilder.forms import DynamicForm
|
||||
from flask_babel import lazy_gettext as _
|
||||
from flask_wtf.file import FileAllowed, FileField, FileRequired
|
||||
from wtforms import BooleanField, IntegerField, SelectField, StringField
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
IntegerField,
|
||||
MultipleFileField,
|
||||
SelectField,
|
||||
StringField,
|
||||
)
|
||||
from wtforms.ext.sqlalchemy.fields import QuerySelectField
|
||||
from wtforms.validators import DataRequired, Length, NumberRange, Optional
|
||||
|
||||
@@ -163,6 +169,15 @@ class CsvToDatabaseForm(DynamicForm):
|
||||
_("Mangle Duplicate Columns"),
|
||||
description=_('Specify duplicate columns as "X.0, X.1".'),
|
||||
)
|
||||
usecols = JsonListField(
|
||||
_("Use Columns"),
|
||||
default=None,
|
||||
description=_(
|
||||
"Json list of the column names that should be read. "
|
||||
"If not None, only these columns will be read from the file."
|
||||
),
|
||||
validators=[Optional()],
|
||||
)
|
||||
skipinitialspace = BooleanField(
|
||||
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
|
||||
)
|
||||
@@ -402,3 +417,130 @@ class ExcelToDatabaseForm(DynamicForm):
|
||||
'Use [""] for empty string.'
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ColumnarToDatabaseForm(DynamicForm):
|
||||
# pylint: disable=E0211
|
||||
def columnar_allowed_dbs() -> List[Database]: # type: ignore
|
||||
# TODO: change allow_csv_upload to allow_file_upload
|
||||
columnar_enabled_dbs = (
|
||||
db.session.query(Database).filter_by(allow_csv_upload=True).all()
|
||||
)
|
||||
return [
|
||||
columnar_enabled_db
|
||||
for columnar_enabled_db in columnar_enabled_dbs
|
||||
if ColumnarToDatabaseForm.at_least_one_schema_is_allowed(
|
||||
columnar_enabled_db
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def at_least_one_schema_is_allowed(database: Database) -> bool:
|
||||
"""
|
||||
If the user has access to the database or all datasource
|
||||
1. if schemas_allowed_for_csv_upload is empty
|
||||
a) if database does not support schema
|
||||
user is able to upload columnar without specifying schema name
|
||||
b) if database supports schema
|
||||
user is able to upload columnar to any schema
|
||||
2. if schemas_allowed_for_csv_upload is not empty
|
||||
a) if database does not support schema
|
||||
This situation is impossible and upload will fail
|
||||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_csv_upload
|
||||
elif the user does not access to the database or all datasource
|
||||
1. if schemas_allowed_for_csv_upload is empty
|
||||
a) if database does not support schema
|
||||
user is unable to upload columnar
|
||||
b) if database supports schema
|
||||
user is unable to upload columnar
|
||||
2. if schemas_allowed_for_csv_upload is not empty
|
||||
a) if database does not support schema
|
||||
This situation is impossible and user is unable to upload columnar
|
||||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_csv_upload
|
||||
"""
|
||||
if security_manager.can_access_database(database):
|
||||
return True
|
||||
schemas = database.get_schema_access_for_csv_upload()
|
||||
if schemas and security_manager.schemas_accessible_by_user(
|
||||
database, schemas, False
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
name = StringField(
|
||||
_("Table Name"),
|
||||
description=_("Name of table to be created from columnar data."),
|
||||
validators=[DataRequired()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
columnar_file = MultipleFileField(
|
||||
_("Columnar File"),
|
||||
description=_("Select a Columnar file to be uploaded to a database."),
|
||||
validators=[
|
||||
DataRequired(),
|
||||
FileAllowed(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(
|
||||
config["COLUMNAR_EXTENSIONS"]
|
||||
),
|
||||
_(
|
||||
"Only the following file extensions are allowed: "
|
||||
"%(allowed_extensions)s",
|
||||
allowed_extensions=", ".join(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(
|
||||
config["COLUMNAR_EXTENSIONS"]
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
con = QuerySelectField(
|
||||
_("Database"),
|
||||
query_factory=columnar_allowed_dbs,
|
||||
get_pk=lambda a: a.id,
|
||||
get_label=lambda a: a.database_name,
|
||||
)
|
||||
schema = StringField(
|
||||
_("Schema"),
|
||||
description=_("Specify a schema (if database flavor supports this)."),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
if_exists = SelectField(
|
||||
_("Table Exists"),
|
||||
description=_(
|
||||
"If table exists do one of the following: "
|
||||
"Fail (do nothing), Replace (drop and recreate table) "
|
||||
"or Append (insert data)."
|
||||
),
|
||||
choices=[
|
||||
("fail", _("Fail")),
|
||||
("replace", _("Replace")),
|
||||
("append", _("Append")),
|
||||
],
|
||||
validators=[DataRequired()],
|
||||
)
|
||||
usecols = JsonListField(
|
||||
_("Use Columns"),
|
||||
default=None,
|
||||
description=_(
|
||||
"Json list of the column names that should be read. "
|
||||
"If not None, only these columns will be read from the file."
|
||||
),
|
||||
validators=[Optional()],
|
||||
)
|
||||
index = BooleanField(
|
||||
_("Dataframe Index"), description=_("Write dataframe index as a column.")
|
||||
)
|
||||
index_label = StringField(
|
||||
_("Column Label(s)"),
|
||||
description=_(
|
||||
"Column label for index column(s). If None is given "
|
||||
"and Dataframe Index is True, Index Names are used."
|
||||
),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user