feat: Add parquet upload (#14449)

* allow csv upload to accept parquet file

* fix mypy

* fix if statement

* add test for specificying columns in CSV upload

* clean up test

* change order in test

* fix failures

* upload parquet to seperate table in test

* fix error message

* fix mypy again

* rename other extensions to columnar

* add new form for columnar upload

* add support for zip files

* undo csv form changes except usecols

* add more tests for zip

* isort & black

* pylint

* fix trailing space

* address more review comments

* pylint

* black

* resolve remaining issues
This commit is contained in:
Shiva Raisinghani
2021-08-31 00:20:25 -07:00
committed by GitHub
parent ad8336a5b4
commit d25b0967a1
10 changed files with 493 additions and 10 deletions

View File

@@ -21,7 +21,13 @@ from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
from flask_appbuilder.forms import DynamicForm
from flask_babel import lazy_gettext as _
from flask_wtf.file import FileAllowed, FileField, FileRequired
from wtforms import BooleanField, IntegerField, SelectField, StringField
from wtforms import (
BooleanField,
IntegerField,
MultipleFileField,
SelectField,
StringField,
)
from wtforms.ext.sqlalchemy.fields import QuerySelectField
from wtforms.validators import DataRequired, Length, NumberRange, Optional
@@ -163,6 +169,15 @@ class CsvToDatabaseForm(DynamicForm):
_("Mangle Duplicate Columns"),
description=_('Specify duplicate columns as "X.0, X.1".'),
)
usecols = JsonListField(
_("Use Columns"),
default=None,
description=_(
"Json list of the column names that should be read. "
"If not None, only these columns will be read from the file."
),
validators=[Optional()],
)
skipinitialspace = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
)
@@ -402,3 +417,130 @@ class ExcelToDatabaseForm(DynamicForm):
'Use [""] for empty string.'
),
)
class ColumnarToDatabaseForm(DynamicForm):
# pylint: disable=E0211
def columnar_allowed_dbs() -> List[Database]: # type: ignore
# TODO: change allow_csv_upload to allow_file_upload
columnar_enabled_dbs = (
db.session.query(Database).filter_by(allow_csv_upload=True).all()
)
return [
columnar_enabled_db
for columnar_enabled_db in columnar_enabled_dbs
if ColumnarToDatabaseForm.at_least_one_schema_is_allowed(
columnar_enabled_db
)
]
@staticmethod
def at_least_one_schema_is_allowed(database: Database) -> bool:
"""
If the user has access to the database or all datasource
1. if schemas_allowed_for_csv_upload is empty
a) if database does not support schema
user is able to upload columnar without specifying schema name
b) if database supports schema
user is able to upload columnar to any schema
2. if schemas_allowed_for_csv_upload is not empty
a) if database does not support schema
This situation is impossible and upload will fail
b) if database supports schema
user is able to upload to schema in schemas_allowed_for_csv_upload
elif the user does not access to the database or all datasource
1. if schemas_allowed_for_csv_upload is empty
a) if database does not support schema
user is unable to upload columnar
b) if database supports schema
user is unable to upload columnar
2. if schemas_allowed_for_csv_upload is not empty
a) if database does not support schema
This situation is impossible and user is unable to upload columnar
b) if database supports schema
user is able to upload to schema in schemas_allowed_for_csv_upload
"""
if security_manager.can_access_database(database):
return True
schemas = database.get_schema_access_for_csv_upload()
if schemas and security_manager.schemas_accessible_by_user(
database, schemas, False
):
return True
return False
name = StringField(
_("Table Name"),
description=_("Name of table to be created from columnar data."),
validators=[DataRequired()],
widget=BS3TextFieldWidget(),
)
columnar_file = MultipleFileField(
_("Columnar File"),
description=_("Select a Columnar file to be uploaded to a database."),
validators=[
DataRequired(),
FileAllowed(
config["ALLOWED_EXTENSIONS"].intersection(
config["COLUMNAR_EXTENSIONS"]
),
_(
"Only the following file extensions are allowed: "
"%(allowed_extensions)s",
allowed_extensions=", ".join(
config["ALLOWED_EXTENSIONS"].intersection(
config["COLUMNAR_EXTENSIONS"]
)
),
),
),
],
)
con = QuerySelectField(
_("Database"),
query_factory=columnar_allowed_dbs,
get_pk=lambda a: a.id,
get_label=lambda a: a.database_name,
)
schema = StringField(
_("Schema"),
description=_("Specify a schema (if database flavor supports this)."),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
if_exists = SelectField(
_("Table Exists"),
description=_(
"If table exists do one of the following: "
"Fail (do nothing), Replace (drop and recreate table) "
"or Append (insert data)."
),
choices=[
("fail", _("Fail")),
("replace", _("Replace")),
("append", _("Append")),
],
validators=[DataRequired()],
)
usecols = JsonListField(
_("Use Columns"),
default=None,
description=_(
"Json list of the column names that should be read. "
"If not None, only these columns will be read from the file."
),
validators=[Optional()],
)
index = BooleanField(
_("Dataframe Index"), description=_("Write dataframe index as a column.")
)
index_label = StringField(
_("Column Label(s)"),
description=_(
"Column label for index column(s). If None is given "
"and Dataframe Index is True, Index Names are used."
),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)