mirror of
https://github.com/apache/superset.git
synced 2026-04-18 23:55:00 +00:00
feat: new CSV upload form and API (#27840)
This commit is contained in:
committed by
GitHub
parent
40e77be813
commit
54387b4589
@@ -20,7 +20,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from typing import Optional, Union
|
||||
from typing import Optional
|
||||
|
||||
from unittest import mock
|
||||
|
||||
@@ -129,31 +129,6 @@ def get_upload_db():
|
||||
return db.session.query(Database).filter_by(database_name=CSV_UPLOAD_DATABASE).one()
|
||||
|
||||
|
||||
def upload_csv(
|
||||
filename: str,
|
||||
table_name: str,
|
||||
extra: Optional[dict[str, str]] = None,
|
||||
dtype: Union[str, None] = None,
|
||||
):
|
||||
csv_upload_db_id = get_upload_db().id
|
||||
form_data = {
|
||||
"csv_file": open(filename, "rb"),
|
||||
"delimiter": ",",
|
||||
"table_name": table_name,
|
||||
"database": csv_upload_db_id,
|
||||
"if_exists": "fail",
|
||||
"index_label": "test_label",
|
||||
"overwrite_duplicate": False,
|
||||
}
|
||||
if schema := utils.get_example_default_schema():
|
||||
form_data["schema"] = schema
|
||||
if extra:
|
||||
form_data.update(extra)
|
||||
if dtype:
|
||||
form_data["dtype"] = dtype
|
||||
return get_resp(test_client, "/csvtodatabaseview/form", data=form_data)
|
||||
|
||||
|
||||
def upload_excel(
|
||||
filename: str, table_name: str, extra: Optional[dict[str, str]] = None
|
||||
):
|
||||
@@ -224,205 +199,6 @@ def escaped_parquet(text):
|
||||
return escaped_double_quotes(f"['{text}']")
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_csv_upload_with_context")
|
||||
@pytest.mark.usefixtures("create_csv_files")
|
||||
@mock.patch(
|
||||
"superset.models.core.config",
|
||||
{**app.config, "ALLOWED_USER_CSV_SCHEMA_FUNC": lambda d, u: ["admin_database"]},
|
||||
)
|
||||
@mock.patch("superset.db_engine_specs.hive.upload_to_s3", mock_upload_to_s3)
|
||||
@mock.patch("superset.views.database.views.event_logger.log_with_context")
|
||||
def test_import_csv_enforced_schema(mock_event_logger):
|
||||
if utils.backend() == "sqlite":
|
||||
pytest.skip("Sqlite doesn't support schema / database creation")
|
||||
|
||||
if utils.backend() == "mysql":
|
||||
pytest.skip("This test is flaky on MySQL")
|
||||
|
||||
full_table_name = f"admin_database.{CSV_UPLOAD_TABLE_W_SCHEMA}"
|
||||
|
||||
# Invalid table name
|
||||
resp = upload_csv(CSV_FILENAME1, full_table_name)
|
||||
assert "Table name cannot contain a schema" in resp
|
||||
|
||||
# no schema specified, fail upload
|
||||
resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": None})
|
||||
assert (
|
||||
f"Database {escaped_double_quotes(CSV_UPLOAD_DATABASE)} schema"
|
||||
f" {escaped_double_quotes('None')} is not allowed for csv uploads" in resp
|
||||
)
|
||||
|
||||
success_msg = f"CSV file {escaped_double_quotes(CSV_FILENAME1)} uploaded to table {escaped_double_quotes(full_table_name)}"
|
||||
|
||||
resp = upload_csv(
|
||||
CSV_FILENAME1,
|
||||
CSV_UPLOAD_TABLE_W_SCHEMA,
|
||||
extra={"schema": "admin_database", "if_exists": "replace"},
|
||||
)
|
||||
|
||||
assert success_msg in resp
|
||||
mock_event_logger.assert_called_with(
|
||||
action="successful_csv_upload",
|
||||
database=get_upload_db().name,
|
||||
schema="admin_database",
|
||||
table=CSV_UPLOAD_TABLE_W_SCHEMA,
|
||||
)
|
||||
|
||||
with get_upload_db().get_sqla_engine() as engine:
|
||||
data = engine.execute(
|
||||
f"SELECT * from {ADMIN_SCHEMA_NAME}.{CSV_UPLOAD_TABLE_W_SCHEMA} ORDER BY b"
|
||||
).fetchall()
|
||||
assert data == [("john", 1), ("paul", 2)]
|
||||
|
||||
# user specified schema doesn't match, fail
|
||||
resp = upload_csv(
|
||||
CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "gold"}
|
||||
)
|
||||
assert (
|
||||
f'Database {escaped_double_quotes(CSV_UPLOAD_DATABASE)} schema {escaped_double_quotes("gold")} is not allowed for csv uploads'
|
||||
in resp
|
||||
)
|
||||
|
||||
# user specified schema matches the expected schema, append
|
||||
if utils.backend() == "hive":
|
||||
pytest.skip("Hive database doesn't support append csv uploads.")
|
||||
resp = upload_csv(
|
||||
CSV_FILENAME1,
|
||||
CSV_UPLOAD_TABLE_W_SCHEMA,
|
||||
extra={"schema": "admin_database", "if_exists": "append"},
|
||||
)
|
||||
assert success_msg in resp
|
||||
|
||||
# Clean up
|
||||
with get_upload_db().get_sqla_engine() as engine:
|
||||
engine.execute(f"DROP TABLE {full_table_name}")
|
||||
|
||||
|
||||
@mock.patch("superset.db_engine_specs.hive.upload_to_s3", mock_upload_to_s3)
|
||||
def test_import_csv_explore_database(setup_csv_upload_with_context, create_csv_files):
|
||||
schema = utils.get_example_default_schema()
|
||||
full_table_name = (
|
||||
f"{schema}.{CSV_UPLOAD_TABLE_W_EXPLORE}"
|
||||
if schema
|
||||
else CSV_UPLOAD_TABLE_W_EXPLORE
|
||||
)
|
||||
|
||||
if utils.backend() == "sqlite":
|
||||
pytest.skip("Sqlite doesn't support schema / database creation")
|
||||
|
||||
resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_EXPLORE)
|
||||
assert (
|
||||
f"CSV file {escaped_double_quotes(CSV_FILENAME1)} uploaded to table {escaped_double_quotes(full_table_name)}"
|
||||
in resp
|
||||
)
|
||||
table = SupersetTestCase.get_table(name=CSV_UPLOAD_TABLE_W_EXPLORE)
|
||||
assert table.database_id == superset.utils.database.get_example_database().id
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_csv_upload_with_context")
|
||||
@pytest.mark.usefixtures("create_csv_files")
|
||||
@mock.patch("superset.db_engine_specs.hive.upload_to_s3", mock_upload_to_s3)
|
||||
@mock.patch("superset.views.database.views.event_logger.log_with_context")
|
||||
def test_import_csv(mock_event_logger):
|
||||
schema = utils.get_example_default_schema()
|
||||
full_table_name = f"{schema}.{CSV_UPLOAD_TABLE}" if schema else CSV_UPLOAD_TABLE
|
||||
success_msg_f1 = f"CSV file {escaped_double_quotes(CSV_FILENAME1)} uploaded to table {escaped_double_quotes(full_table_name)}"
|
||||
|
||||
test_db = get_upload_db()
|
||||
|
||||
# initial upload with fail mode
|
||||
resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE)
|
||||
assert success_msg_f1 in resp
|
||||
|
||||
# upload again with fail mode; should fail
|
||||
fail_msg = f"Unable to upload CSV file {escaped_double_quotes(CSV_FILENAME1)} to table {escaped_double_quotes(CSV_UPLOAD_TABLE)}"
|
||||
resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE)
|
||||
assert fail_msg in resp
|
||||
|
||||
if utils.backend() != "hive":
|
||||
# upload again with append mode
|
||||
resp = upload_csv(
|
||||
CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}
|
||||
)
|
||||
assert success_msg_f1 in resp
|
||||
mock_event_logger.assert_called_with(
|
||||
action="successful_csv_upload",
|
||||
database=test_db.name,
|
||||
schema=schema,
|
||||
table=CSV_UPLOAD_TABLE,
|
||||
)
|
||||
|
||||
# upload again with replace mode
|
||||
resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
|
||||
assert success_msg_f1 in resp
|
||||
|
||||
# try to append to table from file with different schema
|
||||
resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "append"})
|
||||
fail_msg_f2 = f"Unable to upload CSV file {escaped_double_quotes(CSV_FILENAME2)} to table {escaped_double_quotes(CSV_UPLOAD_TABLE)}"
|
||||
assert fail_msg_f2 in resp
|
||||
|
||||
# replace table from file with different schema
|
||||
resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
|
||||
success_msg_f2 = f"CSV file {escaped_double_quotes(CSV_FILENAME2)} uploaded to table {escaped_double_quotes(full_table_name)}"
|
||||
assert success_msg_f2 in resp
|
||||
|
||||
table = SupersetTestCase.get_table(name=CSV_UPLOAD_TABLE)
|
||||
# make sure the new column name is reflected in the table metadata
|
||||
assert "d" in table.column_names
|
||||
|
||||
# ensure user is assigned as an owner
|
||||
assert security_manager.find_user("admin") in table.owners
|
||||
|
||||
# null values are set
|
||||
upload_csv(
|
||||
CSV_FILENAME2,
|
||||
CSV_UPLOAD_TABLE,
|
||||
extra={"null_values": '["", "john"]', "if_exists": "replace"},
|
||||
)
|
||||
# make sure that john and empty string are replaced with None
|
||||
with test_db.get_sqla_engine() as engine:
|
||||
data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY c").fetchall()
|
||||
assert data == [(None, 1, "x"), ("paul", 2, None)]
|
||||
# default null values
|
||||
upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
|
||||
# make sure that john and empty string are replaced with None
|
||||
data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY c").fetchall()
|
||||
assert data == [("john", 1, "x"), ("paul", 2, None)]
|
||||
|
||||
# cleanup
|
||||
with get_upload_db().get_sqla_engine() as engine:
|
||||
engine.execute(f"DROP TABLE {full_table_name}")
|
||||
|
||||
# with dtype
|
||||
upload_csv(
|
||||
CSV_FILENAME1,
|
||||
CSV_UPLOAD_TABLE,
|
||||
dtype='{"a": "string", "b": "float64"}',
|
||||
)
|
||||
|
||||
# you can change the type to something compatible, like an object to string
|
||||
# or an int to a float
|
||||
# file upload should work as normal
|
||||
with test_db.get_sqla_engine() as engine:
|
||||
data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY b").fetchall()
|
||||
assert data == [("john", 1), ("paul", 2)]
|
||||
|
||||
# cleanup
|
||||
with get_upload_db().get_sqla_engine() as engine:
|
||||
engine.execute(f"DROP TABLE {full_table_name}")
|
||||
|
||||
# with dtype - wrong type
|
||||
resp = upload_csv(
|
||||
CSV_FILENAME1,
|
||||
CSV_UPLOAD_TABLE,
|
||||
dtype='{"a": "int"}',
|
||||
)
|
||||
|
||||
# you cannot pass an incompatible dtype
|
||||
fail_msg = f"Unable to upload CSV file {escaped_double_quotes(CSV_FILENAME1)} to table {escaped_double_quotes(CSV_UPLOAD_TABLE)}"
|
||||
assert fail_msg in resp
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_csv_upload_with_context")
|
||||
@pytest.mark.usefixtures("create_excel_files")
|
||||
@mock.patch("superset.db_engine_specs.hive.upload_to_s3", mock_upload_to_s3)
|
||||
|
||||
Reference in New Issue
Block a user