mirror of
https://github.com/apache/superset.git
synced 2026-04-20 08:34:37 +00:00
Upload excel (#9825)
This commit is contained in:
@@ -91,11 +91,15 @@ class CsvToDatabaseForm(DynamicForm):
|
||||
validators=[
|
||||
FileRequired(),
|
||||
FileAllowed(
|
||||
config["ALLOWED_EXTENSIONS"],
|
||||
config["ALLOWED_EXTENSIONS"].intersection(config["CSV_EXTENSIONS"]),
|
||||
_(
|
||||
"Only the following file extensions are allowed: "
|
||||
"%(allowed_extensions)s",
|
||||
allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
|
||||
allowed_extensions=", ".join(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(
|
||||
config["CSV_EXTENSIONS"]
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
@@ -206,3 +210,169 @@ class CsvToDatabaseForm(DynamicForm):
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
|
||||
|
||||
class ExcelToDatabaseForm(DynamicForm):
|
||||
# pylint: disable=E0211
|
||||
def excel_allowed_dbs(): # type: ignore
|
||||
excel_allowed_dbs = []
|
||||
# TODO: change allow_csv_upload to allow_file_upload
|
||||
excel_enabled_dbs = (
|
||||
db.session.query(Database).filter_by(allow_csv_upload=True).all()
|
||||
)
|
||||
for excel_enabled_db in excel_enabled_dbs:
|
||||
if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
|
||||
excel_allowed_dbs.append(excel_enabled_db)
|
||||
return excel_allowed_dbs
|
||||
|
||||
@staticmethod
|
||||
def at_least_one_schema_is_allowed(database: Database) -> bool:
|
||||
"""
|
||||
If the user has access to the database or all datasource
|
||||
1. if schemas_allowed_for_csv_upload is empty
|
||||
a) if database does not support schema
|
||||
user is able to upload excel without specifying schema name
|
||||
b) if database supports schema
|
||||
user is able to upload excel to any schema
|
||||
2. if schemas_allowed_for_csv_upload is not empty
|
||||
a) if database does not support schema
|
||||
This situation is impossible and upload will fail
|
||||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_csv_upload
|
||||
elif the user does not access to the database or all datasource
|
||||
1. if schemas_allowed_for_csv_upload is empty
|
||||
a) if database does not support schema
|
||||
user is unable to upload excel
|
||||
b) if database supports schema
|
||||
user is unable to upload excel
|
||||
2. if schemas_allowed_for_csv_upload is not empty
|
||||
a) if database does not support schema
|
||||
This situation is impossible and user is unable to upload excel
|
||||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_csv_upload
|
||||
"""
|
||||
if (
|
||||
security_manager.database_access(database)
|
||||
or security_manager.all_datasource_access()
|
||||
):
|
||||
return True
|
||||
schemas = database.get_schema_access_for_csv_upload()
|
||||
if schemas and security_manager.schemas_accessible_by_user(
|
||||
database, schemas, False
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
name = StringField(
|
||||
_("Table Name"),
|
||||
description=_("Name of table to be created from excel data."),
|
||||
validators=[DataRequired()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
excel_file = FileField(
|
||||
_("Excel File"),
|
||||
description=_("Select a Excel file to be uploaded to a database."),
|
||||
validators=[
|
||||
FileRequired(),
|
||||
FileAllowed(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(config["EXCEL_EXTENSIONS"]),
|
||||
_(
|
||||
"Only the following file extensions are allowed: "
|
||||
"%(allowed_extensions)s",
|
||||
allowed_extensions=", ".join(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(
|
||||
config["EXCEL_EXTENSIONS"]
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
sheet_name = StringField(
|
||||
_("Sheet Name"), description="Sheet Name", validators=[Optional()]
|
||||
)
|
||||
|
||||
con = QuerySelectField(
|
||||
_("Database"),
|
||||
query_factory=excel_allowed_dbs,
|
||||
get_pk=lambda a: a.id,
|
||||
get_label=lambda a: a.database_name,
|
||||
)
|
||||
schema = StringField(
|
||||
_("Schema"),
|
||||
description=_("Specify a schema (if database flavor supports this)."),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
if_exists = SelectField(
|
||||
_("Table Exists"),
|
||||
description=_(
|
||||
"If table exists do one of the following: "
|
||||
"Fail (do nothing), Replace (drop and recreate table) "
|
||||
"or Append (insert data)."
|
||||
),
|
||||
choices=[
|
||||
("fail", _("Fail")),
|
||||
("replace", _("Replace")),
|
||||
("append", _("Append")),
|
||||
],
|
||||
validators=[DataRequired()],
|
||||
)
|
||||
header = IntegerField(
|
||||
_("Header Row"),
|
||||
description=_(
|
||||
"Row containing the headers to use as "
|
||||
"column names (0 is first line of data). "
|
||||
"Leave empty if there is no header row."
|
||||
),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
index_col = IntegerField(
|
||||
_("Index Column"),
|
||||
description=_(
|
||||
"Column to use as the row labels of the "
|
||||
"dataframe. Leave empty if no index column."
|
||||
),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
mangle_dupe_cols = BooleanField(
|
||||
_("Mangle Duplicate Columns"),
|
||||
description=_('Specify duplicate columns as "X.0, X.1".'),
|
||||
)
|
||||
skipinitialspace = BooleanField(
|
||||
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
|
||||
)
|
||||
skiprows = IntegerField(
|
||||
_("Skip Rows"),
|
||||
description=_("Number of rows to skip at start of file."),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
nrows = IntegerField(
|
||||
_("Rows to Read"),
|
||||
description=_("Number of rows of file to read."),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
decimal = StringField(
|
||||
_("Decimal Character"),
|
||||
default=".",
|
||||
description=_("Character to interpret as decimal point."),
|
||||
validators=[Optional(), Length(min=1, max=1)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
index = BooleanField(
|
||||
_("Dataframe Index"), description=_("Write dataframe index as a column.")
|
||||
)
|
||||
index_label = StringField(
|
||||
_("Column Label(s)"),
|
||||
description=_(
|
||||
"Column label for index column(s). If None is given "
|
||||
"and Dataframe Index is True, Index Names are used."
|
||||
),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
|
||||
@@ -20,9 +20,9 @@ from typing import TYPE_CHECKING
|
||||
|
||||
from flask import flash, g, redirect
|
||||
from flask_appbuilder import SimpleFormView
|
||||
from flask_appbuilder.forms import DynamicForm
|
||||
from flask_appbuilder.models.sqla.interface import SQLAInterface
|
||||
from flask_babel import lazy_gettext as _
|
||||
from werkzeug.wrappers import Response
|
||||
from wtforms.fields import StringField
|
||||
from wtforms.validators import ValidationError
|
||||
|
||||
@@ -32,12 +32,10 @@ from superset.connectors.sqla.models import SqlaTable
|
||||
from superset.constants import RouteMethod
|
||||
from superset.exceptions import CertificateException
|
||||
from superset.sql_parse import Table
|
||||
from superset.typing import FlaskResponse
|
||||
from superset.utils import core as utils
|
||||
from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
|
||||
from superset.views.database.forms import CsvToDatabaseForm
|
||||
|
||||
from .forms import CsvToDatabaseForm
|
||||
from .forms import CsvToDatabaseForm, ExcelToDatabaseForm
|
||||
from .mixins import DatabaseMixin
|
||||
from .validators import schema_allows_csv_upload, sqlalchemy_uri_validator
|
||||
|
||||
@@ -48,9 +46,7 @@ config = app.config
|
||||
stats_logger = config["STATS_LOGGER"]
|
||||
|
||||
|
||||
def sqlalchemy_uri_form_validator( # pylint: disable=unused-argument
|
||||
form: DynamicForm, field: StringField
|
||||
) -> None:
|
||||
def sqlalchemy_uri_form_validator(_: _, field: StringField) -> None:
|
||||
"""
|
||||
Check if user has submitted a valid SQLAlchemy URI
|
||||
"""
|
||||
@@ -58,9 +54,7 @@ def sqlalchemy_uri_form_validator( # pylint: disable=unused-argument
|
||||
sqlalchemy_uri_validator(field.data, exception=ValidationError)
|
||||
|
||||
|
||||
def certificate_form_validator( # pylint: disable=unused-argument
|
||||
form: DynamicForm, field: StringField
|
||||
) -> None:
|
||||
def certificate_form_validator(_: _, field: StringField) -> None:
|
||||
"""
|
||||
Check if user has submitted a valid SSL certificate
|
||||
"""
|
||||
@@ -116,7 +110,7 @@ class CsvToDatabaseView(SimpleFormView):
|
||||
form.decimal.data = "."
|
||||
form.if_exists.data = "fail"
|
||||
|
||||
def form_post(self, form: CsvToDatabaseForm) -> FlaskResponse:
|
||||
def form_post(self, form: CsvToDatabaseForm) -> Response:
|
||||
database = form.con.data
|
||||
csv_table = Table(table=form.name.data, schema=form.schema.data)
|
||||
|
||||
@@ -249,3 +243,149 @@ class CsvToDatabaseView(SimpleFormView):
|
||||
flash(message, "info")
|
||||
stats_logger.incr("successful_csv_upload")
|
||||
return redirect("/tablemodelview/list/")
|
||||
|
||||
|
||||
class ExcelToDatabaseView(SimpleFormView):
|
||||
form = ExcelToDatabaseForm
|
||||
form_template = "superset/form_view/excel_to_database_view/edit.html"
|
||||
form_title = _("Excel to Database configuration")
|
||||
add_columns = ["database", "schema", "table_name"]
|
||||
|
||||
def form_get(self, form: ExcelToDatabaseForm) -> None:
|
||||
form.header.data = 0
|
||||
form.mangle_dupe_cols.data = True
|
||||
form.skipinitialspace.data = False
|
||||
form.decimal.data = "."
|
||||
form.if_exists.data = "fail"
|
||||
form.sheet_name = None
|
||||
|
||||
def form_post(self, form: ExcelToDatabaseForm) -> Response:
|
||||
database = form.con.data
|
||||
excel_table = Table(table=form.name.data, schema=form.schema.data)
|
||||
|
||||
if not schema_allows_csv_upload(database, excel_table.schema):
|
||||
message = _(
|
||||
'Database "%(database_name)s" schema "%(schema_name)s" '
|
||||
"is not allowed for excel uploads. Please contact your Superset Admin.",
|
||||
database_name=database.database_name,
|
||||
schema_name=excel_table.schema,
|
||||
)
|
||||
flash(message, "danger")
|
||||
return redirect("/exceltodatabaseview/form")
|
||||
|
||||
if "." in excel_table.table and excel_table.schema:
|
||||
message = _(
|
||||
"You cannot specify a namespace both in the name of the table: "
|
||||
'"%(excel_table.table)s" and in the schema field: '
|
||||
'"%(excel_table.schema)s". Please remove one',
|
||||
table=excel_table.table,
|
||||
schema=excel_table.schema,
|
||||
)
|
||||
flash(message, "danger")
|
||||
return redirect("/exceltodatabaseview/form")
|
||||
|
||||
uploaded_tmp_file_path = tempfile.NamedTemporaryFile(
|
||||
dir=app.config["UPLOAD_FOLDER"],
|
||||
suffix=os.path.splitext(form.excel_file.data.filename)[1].lower(),
|
||||
delete=False,
|
||||
).name
|
||||
|
||||
try:
|
||||
utils.ensure_path_exists(config["UPLOAD_FOLDER"])
|
||||
upload_stream_write(form.excel_file.data, uploaded_tmp_file_path)
|
||||
|
||||
con = form.data.get("con")
|
||||
database = (
|
||||
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
|
||||
)
|
||||
excel_to_df_kwargs = {
|
||||
"header": form.header.data if form.header.data else 0,
|
||||
"index_col": form.index_col.data,
|
||||
"mangle_dupe_cols": form.mangle_dupe_cols.data,
|
||||
"skipinitialspace": form.skipinitialspace.data,
|
||||
"skiprows": form.skiprows.data,
|
||||
"nrows": form.nrows.data,
|
||||
"sheet_name": form.sheet_name.data,
|
||||
"chunksize": 1000,
|
||||
}
|
||||
df_to_sql_kwargs = {
|
||||
"name": excel_table.table,
|
||||
"if_exists": form.if_exists.data,
|
||||
"index": form.index.data,
|
||||
"index_label": form.index_label.data,
|
||||
"chunksize": 1000,
|
||||
}
|
||||
database.db_engine_spec.create_table_from_excel(
|
||||
uploaded_tmp_file_path,
|
||||
excel_table,
|
||||
database,
|
||||
excel_to_df_kwargs,
|
||||
df_to_sql_kwargs,
|
||||
)
|
||||
|
||||
# Connect table to the database that should be used for exploration.
|
||||
# E.g. if hive was used to upload a excel, presto will be a better option
|
||||
# to explore the table.
|
||||
expore_database = database
|
||||
explore_database_id = database.get_extra().get("explore_database_id", None)
|
||||
if explore_database_id:
|
||||
expore_database = (
|
||||
db.session.query(models.Database)
|
||||
.filter_by(id=explore_database_id)
|
||||
.one_or_none()
|
||||
or database
|
||||
)
|
||||
|
||||
sqla_table = (
|
||||
db.session.query(SqlaTable)
|
||||
.filter_by(
|
||||
table_name=excel_table.table,
|
||||
schema=excel_table.schema,
|
||||
database_id=expore_database.id,
|
||||
)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if sqla_table:
|
||||
sqla_table.fetch_metadata()
|
||||
if not sqla_table:
|
||||
sqla_table = SqlaTable(table_name=excel_table.table)
|
||||
sqla_table.database = expore_database
|
||||
sqla_table.database_id = database.id
|
||||
sqla_table.user_id = g.user.id
|
||||
sqla_table.schema = excel_table.schema
|
||||
sqla_table.fetch_metadata()
|
||||
db.session.add(sqla_table)
|
||||
db.session.commit()
|
||||
except Exception as ex: # pylint: disable=broad-except
|
||||
db.session.rollback()
|
||||
try:
|
||||
os.remove(uploaded_tmp_file_path)
|
||||
except OSError:
|
||||
pass
|
||||
message = _(
|
||||
'Unable to upload Excel file "%(filename)s" to table '
|
||||
'"%(table_name)s" in database "%(db_name)s". '
|
||||
"Error message: %(error_msg)s",
|
||||
filename=form.excel_file.data.filename,
|
||||
table_name=form.name.data,
|
||||
db_name=database.database_name,
|
||||
error_msg=str(ex),
|
||||
)
|
||||
|
||||
flash(message, "danger")
|
||||
stats_logger.incr("failed_excel_upload")
|
||||
return redirect("/exceltodatabaseview/form")
|
||||
|
||||
os.remove(uploaded_tmp_file_path)
|
||||
# Go back to welcome page / splash screen
|
||||
message = _(
|
||||
'CSV file "%(excel_filename)s" uploaded to table "%(table_name)s" in '
|
||||
'database "%(db_name)s"',
|
||||
excel_filename=form.excel_file.data.filename,
|
||||
table_name=str(excel_table),
|
||||
db_name=sqla_table.database.database_name,
|
||||
)
|
||||
flash(message, "info")
|
||||
stats_logger.incr("successful_excel_upload")
|
||||
return redirect("/tablemodelview/list/")
|
||||
|
||||
Reference in New Issue
Block a user