Upload excel (#9825)

This commit is contained in:
Hossein Torabi
2020-07-03 09:58:30 +04:30
committed by GitHub
parent cf60f664a4
commit fdd28c1a5e
9 changed files with 465 additions and 26 deletions

View File

@@ -91,11 +91,15 @@ class CsvToDatabaseForm(DynamicForm):
validators=[
FileRequired(),
FileAllowed(
config["ALLOWED_EXTENSIONS"],
config["ALLOWED_EXTENSIONS"].intersection(config["CSV_EXTENSIONS"]),
_(
"Only the following file extensions are allowed: "
"%(allowed_extensions)s",
allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
allowed_extensions=", ".join(
config["ALLOWED_EXTENSIONS"].intersection(
config["CSV_EXTENSIONS"]
)
),
),
),
],
@@ -206,3 +210,169 @@ class CsvToDatabaseForm(DynamicForm):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
class ExcelToDatabaseForm(DynamicForm):
# pylint: disable=E0211
def excel_allowed_dbs(): # type: ignore
excel_allowed_dbs = []
# TODO: change allow_csv_upload to allow_file_upload
excel_enabled_dbs = (
db.session.query(Database).filter_by(allow_csv_upload=True).all()
)
for excel_enabled_db in excel_enabled_dbs:
if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
excel_allowed_dbs.append(excel_enabled_db)
return excel_allowed_dbs
@staticmethod
def at_least_one_schema_is_allowed(database: Database) -> bool:
"""
If the user has access to the database or all datasource
1. if schemas_allowed_for_csv_upload is empty
a) if database does not support schema
user is able to upload excel without specifying schema name
b) if database supports schema
user is able to upload excel to any schema
2. if schemas_allowed_for_csv_upload is not empty
a) if database does not support schema
This situation is impossible and upload will fail
b) if database supports schema
user is able to upload to schema in schemas_allowed_for_csv_upload
elif the user does not access to the database or all datasource
1. if schemas_allowed_for_csv_upload is empty
a) if database does not support schema
user is unable to upload excel
b) if database supports schema
user is unable to upload excel
2. if schemas_allowed_for_csv_upload is not empty
a) if database does not support schema
This situation is impossible and user is unable to upload excel
b) if database supports schema
user is able to upload to schema in schemas_allowed_for_csv_upload
"""
if (
security_manager.database_access(database)
or security_manager.all_datasource_access()
):
return True
schemas = database.get_schema_access_for_csv_upload()
if schemas and security_manager.schemas_accessible_by_user(
database, schemas, False
):
return True
return False
name = StringField(
_("Table Name"),
description=_("Name of table to be created from excel data."),
validators=[DataRequired()],
widget=BS3TextFieldWidget(),
)
excel_file = FileField(
_("Excel File"),
description=_("Select a Excel file to be uploaded to a database."),
validators=[
FileRequired(),
FileAllowed(
config["ALLOWED_EXTENSIONS"].intersection(config["EXCEL_EXTENSIONS"]),
_(
"Only the following file extensions are allowed: "
"%(allowed_extensions)s",
allowed_extensions=", ".join(
config["ALLOWED_EXTENSIONS"].intersection(
config["EXCEL_EXTENSIONS"]
)
),
),
),
],
)
sheet_name = StringField(
_("Sheet Name"), description="Sheet Name", validators=[Optional()]
)
con = QuerySelectField(
_("Database"),
query_factory=excel_allowed_dbs,
get_pk=lambda a: a.id,
get_label=lambda a: a.database_name,
)
schema = StringField(
_("Schema"),
description=_("Specify a schema (if database flavor supports this)."),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
if_exists = SelectField(
_("Table Exists"),
description=_(
"If table exists do one of the following: "
"Fail (do nothing), Replace (drop and recreate table) "
"or Append (insert data)."
),
choices=[
("fail", _("Fail")),
("replace", _("Replace")),
("append", _("Append")),
],
validators=[DataRequired()],
)
header = IntegerField(
_("Header Row"),
description=_(
"Row containing the headers to use as "
"column names (0 is first line of data). "
"Leave empty if there is no header row."
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
index_col = IntegerField(
_("Index Column"),
description=_(
"Column to use as the row labels of the "
"dataframe. Leave empty if no index column."
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
mangle_dupe_cols = BooleanField(
_("Mangle Duplicate Columns"),
description=_('Specify duplicate columns as "X.0, X.1".'),
)
skipinitialspace = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
)
skiprows = IntegerField(
_("Skip Rows"),
description=_("Number of rows to skip at start of file."),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
nrows = IntegerField(
_("Rows to Read"),
description=_("Number of rows of file to read."),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
decimal = StringField(
_("Decimal Character"),
default=".",
description=_("Character to interpret as decimal point."),
validators=[Optional(), Length(min=1, max=1)],
widget=BS3TextFieldWidget(),
)
index = BooleanField(
_("Dataframe Index"), description=_("Write dataframe index as a column.")
)
index_label = StringField(
_("Column Label(s)"),
description=_(
"Column label for index column(s). If None is given "
"and Dataframe Index is True, Index Names are used."
),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)

View File

@@ -20,9 +20,9 @@ from typing import TYPE_CHECKING
from flask import flash, g, redirect
from flask_appbuilder import SimpleFormView
from flask_appbuilder.forms import DynamicForm
from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_babel import lazy_gettext as _
from werkzeug.wrappers import Response
from wtforms.fields import StringField
from wtforms.validators import ValidationError
@@ -32,12 +32,10 @@ from superset.connectors.sqla.models import SqlaTable
from superset.constants import RouteMethod
from superset.exceptions import CertificateException
from superset.sql_parse import Table
from superset.typing import FlaskResponse
from superset.utils import core as utils
from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
from superset.views.database.forms import CsvToDatabaseForm
from .forms import CsvToDatabaseForm
from .forms import CsvToDatabaseForm, ExcelToDatabaseForm
from .mixins import DatabaseMixin
from .validators import schema_allows_csv_upload, sqlalchemy_uri_validator
@@ -48,9 +46,7 @@ config = app.config
stats_logger = config["STATS_LOGGER"]
def sqlalchemy_uri_form_validator( # pylint: disable=unused-argument
form: DynamicForm, field: StringField
) -> None:
def sqlalchemy_uri_form_validator(_: _, field: StringField) -> None:
"""
Check if user has submitted a valid SQLAlchemy URI
"""
@@ -58,9 +54,7 @@ def sqlalchemy_uri_form_validator( # pylint: disable=unused-argument
sqlalchemy_uri_validator(field.data, exception=ValidationError)
def certificate_form_validator( # pylint: disable=unused-argument
form: DynamicForm, field: StringField
) -> None:
def certificate_form_validator(_: _, field: StringField) -> None:
"""
Check if user has submitted a valid SSL certificate
"""
@@ -116,7 +110,7 @@ class CsvToDatabaseView(SimpleFormView):
form.decimal.data = "."
form.if_exists.data = "fail"
def form_post(self, form: CsvToDatabaseForm) -> FlaskResponse:
def form_post(self, form: CsvToDatabaseForm) -> Response:
database = form.con.data
csv_table = Table(table=form.name.data, schema=form.schema.data)
@@ -249,3 +243,149 @@ class CsvToDatabaseView(SimpleFormView):
flash(message, "info")
stats_logger.incr("successful_csv_upload")
return redirect("/tablemodelview/list/")
class ExcelToDatabaseView(SimpleFormView):
form = ExcelToDatabaseForm
form_template = "superset/form_view/excel_to_database_view/edit.html"
form_title = _("Excel to Database configuration")
add_columns = ["database", "schema", "table_name"]
def form_get(self, form: ExcelToDatabaseForm) -> None:
form.header.data = 0
form.mangle_dupe_cols.data = True
form.skipinitialspace.data = False
form.decimal.data = "."
form.if_exists.data = "fail"
form.sheet_name = None
def form_post(self, form: ExcelToDatabaseForm) -> Response:
database = form.con.data
excel_table = Table(table=form.name.data, schema=form.schema.data)
if not schema_allows_csv_upload(database, excel_table.schema):
message = _(
'Database "%(database_name)s" schema "%(schema_name)s" '
"is not allowed for excel uploads. Please contact your Superset Admin.",
database_name=database.database_name,
schema_name=excel_table.schema,
)
flash(message, "danger")
return redirect("/exceltodatabaseview/form")
if "." in excel_table.table and excel_table.schema:
message = _(
"You cannot specify a namespace both in the name of the table: "
'"%(excel_table.table)s" and in the schema field: '
'"%(excel_table.schema)s". Please remove one',
table=excel_table.table,
schema=excel_table.schema,
)
flash(message, "danger")
return redirect("/exceltodatabaseview/form")
uploaded_tmp_file_path = tempfile.NamedTemporaryFile(
dir=app.config["UPLOAD_FOLDER"],
suffix=os.path.splitext(form.excel_file.data.filename)[1].lower(),
delete=False,
).name
try:
utils.ensure_path_exists(config["UPLOAD_FOLDER"])
upload_stream_write(form.excel_file.data, uploaded_tmp_file_path)
con = form.data.get("con")
database = (
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
)
excel_to_df_kwargs = {
"header": form.header.data if form.header.data else 0,
"index_col": form.index_col.data,
"mangle_dupe_cols": form.mangle_dupe_cols.data,
"skipinitialspace": form.skipinitialspace.data,
"skiprows": form.skiprows.data,
"nrows": form.nrows.data,
"sheet_name": form.sheet_name.data,
"chunksize": 1000,
}
df_to_sql_kwargs = {
"name": excel_table.table,
"if_exists": form.if_exists.data,
"index": form.index.data,
"index_label": form.index_label.data,
"chunksize": 1000,
}
database.db_engine_spec.create_table_from_excel(
uploaded_tmp_file_path,
excel_table,
database,
excel_to_df_kwargs,
df_to_sql_kwargs,
)
# Connect table to the database that should be used for exploration.
# E.g. if hive was used to upload a excel, presto will be a better option
# to explore the table.
expore_database = database
explore_database_id = database.get_extra().get("explore_database_id", None)
if explore_database_id:
expore_database = (
db.session.query(models.Database)
.filter_by(id=explore_database_id)
.one_or_none()
or database
)
sqla_table = (
db.session.query(SqlaTable)
.filter_by(
table_name=excel_table.table,
schema=excel_table.schema,
database_id=expore_database.id,
)
.one_or_none()
)
if sqla_table:
sqla_table.fetch_metadata()
if not sqla_table:
sqla_table = SqlaTable(table_name=excel_table.table)
sqla_table.database = expore_database
sqla_table.database_id = database.id
sqla_table.user_id = g.user.id
sqla_table.schema = excel_table.schema
sqla_table.fetch_metadata()
db.session.add(sqla_table)
db.session.commit()
except Exception as ex: # pylint: disable=broad-except
db.session.rollback()
try:
os.remove(uploaded_tmp_file_path)
except OSError:
pass
message = _(
'Unable to upload Excel file "%(filename)s" to table '
'"%(table_name)s" in database "%(db_name)s". '
"Error message: %(error_msg)s",
filename=form.excel_file.data.filename,
table_name=form.name.data,
db_name=database.database_name,
error_msg=str(ex),
)
flash(message, "danger")
stats_logger.incr("failed_excel_upload")
return redirect("/exceltodatabaseview/form")
os.remove(uploaded_tmp_file_path)
# Go back to welcome page / splash screen
message = _(
'CSV file "%(excel_filename)s" uploaded to table "%(table_name)s" in '
'database "%(db_name)s"',
excel_filename=form.excel_file.data.filename,
table_name=str(excel_table),
db_name=sqla_table.database.database_name,
)
flash(message, "info")
stats_logger.incr("successful_excel_upload")
return redirect("/tablemodelview/list/")