mirror of
https://github.com/apache/superset.git
synced 2026-04-19 16:14:52 +00:00
feat: new CSV upload form and API (#27840)
This commit is contained in:
committed by
GitHub
parent
40e77be813
commit
54387b4589
@@ -14,14 +14,11 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# pylint: disable=unused-argument, import-outside-toplevel, line-too-long
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from typing import Any
|
||||
from unittest.mock import Mock
|
||||
from unittest.mock import ANY, Mock
|
||||
from uuid import UUID
|
||||
|
||||
import pytest
|
||||
@@ -31,7 +28,11 @@ from pytest_mock import MockFixture
|
||||
from sqlalchemy.orm.session import Session
|
||||
|
||||
from superset import db
|
||||
from superset.commands.database.csv_import import CSVImportCommand
|
||||
from superset.db_engine_specs.sqlite import SqliteEngineSpec
|
||||
from tests.unit_tests.fixtures.common import create_csv_file
|
||||
|
||||
# pylint: disable=unused-argument, import-outside-toplevel, line-too-long
|
||||
|
||||
|
||||
def test_filter_by_uuid(
|
||||
@@ -818,3 +819,351 @@ def test_oauth2_error(
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload,cmd_called_with",
|
||||
[
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
},
|
||||
(
|
||||
1,
|
||||
"table1",
|
||||
ANY,
|
||||
{
|
||||
"already_exists": "fail",
|
||||
"delimiter": ",",
|
||||
"file": ANY,
|
||||
"table_name": "table1",
|
||||
},
|
||||
),
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table2",
|
||||
"delimiter": ";",
|
||||
"already_exists": "replace",
|
||||
"column_dates": "col1,col2",
|
||||
},
|
||||
(
|
||||
1,
|
||||
"table2",
|
||||
ANY,
|
||||
{
|
||||
"already_exists": "replace",
|
||||
"column_dates": ["col1", "col2"],
|
||||
"delimiter": ";",
|
||||
"file": ANY,
|
||||
"table_name": "table2",
|
||||
},
|
||||
),
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table2",
|
||||
"delimiter": ";",
|
||||
"already_exists": "replace",
|
||||
"columns_read": "col1,col2",
|
||||
"day_first": True,
|
||||
"rows_to_read": "1",
|
||||
"overwrite_duplicates": True,
|
||||
"skip_blank_lines": True,
|
||||
"skip_initial_space": True,
|
||||
"skip_rows": "10",
|
||||
"null_values": "None,N/A,''",
|
||||
"column_data_types": '{"col1": "str"}',
|
||||
},
|
||||
(
|
||||
1,
|
||||
"table2",
|
||||
ANY,
|
||||
{
|
||||
"already_exists": "replace",
|
||||
"columns_read": ["col1", "col2"],
|
||||
"null_values": ["None", "N/A", "''"],
|
||||
"day_first": True,
|
||||
"overwrite_duplicates": True,
|
||||
"rows_to_read": 1,
|
||||
"skip_blank_lines": True,
|
||||
"skip_initial_space": True,
|
||||
"skip_rows": 10,
|
||||
"delimiter": ";",
|
||||
"file": ANY,
|
||||
"column_data_types": {"col1": "str"},
|
||||
"table_name": "table2",
|
||||
},
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_csv_upload(
|
||||
payload: dict[str, Any],
|
||||
cmd_called_with: tuple[int, str, Any, dict[str, Any]],
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test CSV Upload success.
|
||||
"""
|
||||
init_mock = mocker.patch.object(CSVImportCommand, "__init__")
|
||||
init_mock.return_value = None
|
||||
_ = mocker.patch.object(CSVImportCommand, "run")
|
||||
response = client.post(
|
||||
f"/api/v1/database/1/csv_upload/",
|
||||
data=payload,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json == {"message": "OK"}
|
||||
init_mock.assert_called_with(*cmd_called_with)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload,expected_response",
|
||||
[
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
},
|
||||
{"message": {"table_name": ["Missing data for required field."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
},
|
||||
{"message": {"table_name": ["Length must be between 1 and 10000."]}},
|
||||
),
|
||||
(
|
||||
{"table_name": "table1", "delimiter": ",", "already_exists": "fail"},
|
||||
{"message": {"file": ["Field may not be null."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": "xpto",
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
},
|
||||
{"message": {"file": ["Field may not be null."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "xpto",
|
||||
},
|
||||
{"message": {"already_exists": ["Must be one of: fail, replace, append."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"day_first": "test1",
|
||||
},
|
||||
{"message": {"day_first": ["Not a valid boolean."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"header_row": "test1",
|
||||
},
|
||||
{"message": {"header_row": ["Not a valid integer."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"overwrite_duplicates": "test1",
|
||||
},
|
||||
{"message": {"overwrite_duplicates": ["Not a valid boolean."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"rows_to_read": 0,
|
||||
},
|
||||
{"message": {"rows_to_read": ["Must be greater than or equal to 1."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"skip_blank_lines": "test1",
|
||||
},
|
||||
{"message": {"skip_blank_lines": ["Not a valid boolean."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"skip_initial_space": "test1",
|
||||
},
|
||||
{"message": {"skip_initial_space": ["Not a valid boolean."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"skip_rows": "test1",
|
||||
},
|
||||
{"message": {"skip_rows": ["Not a valid integer."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
"already_exists": "fail",
|
||||
"column_data_types": "{test:1}",
|
||||
},
|
||||
{"message": {"_schema": ["Invalid JSON format for column_data_types"]}},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_csv_upload_validation(
|
||||
payload: Any,
|
||||
expected_response: dict[str, str],
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test CSV Upload validation fails.
|
||||
"""
|
||||
_ = mocker.patch.object(CSVImportCommand, "run")
|
||||
|
||||
response = client.post(
|
||||
f"/api/v1/database/1/csv_upload/",
|
||||
data=payload,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == expected_response
|
||||
|
||||
|
||||
def test_csv_upload_file_size_validation(
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test CSV Upload validation fails.
|
||||
"""
|
||||
_ = mocker.patch.object(CSVImportCommand, "run")
|
||||
current_app.config["CSV_UPLOAD_MAX_SIZE"] = 5
|
||||
response = client.post(
|
||||
f"/api/v1/database/1/csv_upload/",
|
||||
data={
|
||||
"file": (create_csv_file(), "out.csv"),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {
|
||||
"message": {"file": ["File size exceeds the maximum allowed size."]}
|
||||
}
|
||||
current_app.config["CSV_UPLOAD_MAX_SIZE"] = None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename",
|
||||
[
|
||||
"out.xpto",
|
||||
"out.exe",
|
||||
"out",
|
||||
"out csv",
|
||||
"",
|
||||
"out.csv.exe",
|
||||
".csv",
|
||||
"out.",
|
||||
".",
|
||||
"out csv a.exe",
|
||||
],
|
||||
)
|
||||
def test_csv_upload_file_extension_invalid(
|
||||
filename: str,
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test CSV Upload validation fails.
|
||||
"""
|
||||
_ = mocker.patch.object(CSVImportCommand, "run")
|
||||
response = client.post(
|
||||
f"/api/v1/database/1/csv_upload/",
|
||||
data={
|
||||
"file": (create_csv_file(), filename),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename",
|
||||
[
|
||||
"out.csv",
|
||||
"out.txt",
|
||||
"out.tsv",
|
||||
"spaced name.csv",
|
||||
"spaced name.txt",
|
||||
"spaced name.tsv",
|
||||
"out.exe.csv",
|
||||
"out.csv.csv",
|
||||
],
|
||||
)
|
||||
def test_csv_upload_file_extension_valid(
|
||||
filename: str,
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test CSV Upload validation fails.
|
||||
"""
|
||||
_ = mocker.patch.object(CSVImportCommand, "run")
|
||||
response = client.post(
|
||||
f"/api/v1/database/1/csv_upload/",
|
||||
data={
|
||||
"file": (create_csv_file(), filename),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import csv
|
||||
from datetime import datetime
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -23,3 +24,22 @@ import pytest
|
||||
@pytest.fixture
|
||||
def dttm() -> datetime:
|
||||
return datetime.strptime("2019-01-02 03:04:05.678900", "%Y-%m-%d %H:%M:%S.%f")
|
||||
|
||||
|
||||
def create_csv_file(data: list[list[str]] | None = None) -> BytesIO:
|
||||
data = (
|
||||
[
|
||||
["Name", "Age", "City"],
|
||||
["John", "30", "New York"],
|
||||
]
|
||||
if not data
|
||||
else data
|
||||
)
|
||||
|
||||
output = StringIO()
|
||||
writer = csv.writer(output)
|
||||
for row in data:
|
||||
writer.writerow(row)
|
||||
output.seek(0)
|
||||
bytes_buffer = BytesIO(output.getvalue().encode("utf-8"))
|
||||
return bytes_buffer
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import contextlib
|
||||
import tempfile
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from flask_wtf.file import FileField
|
||||
from wtforms import Form, ValidationError
|
||||
|
||||
from superset.forms import FileSizeLimit
|
||||
|
||||
|
||||
def _get_test_form(size_limit: Optional[int]) -> Form:
|
||||
class TestForm(Form):
|
||||
test = FileField("test", validators=[FileSizeLimit(size_limit)])
|
||||
|
||||
return TestForm()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _tempfile(contents: bytes):
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
f.write(contents)
|
||||
f.flush()
|
||||
|
||||
yield f
|
||||
|
||||
|
||||
def test_file_size_limit_pass() -> None:
|
||||
"""Permit files which do not exceed the size limit"""
|
||||
limit = 100
|
||||
form = _get_test_form(limit)
|
||||
|
||||
with _tempfile(b"." * limit) as f:
|
||||
form.test.data = f
|
||||
assert form.validate() is True
|
||||
|
||||
|
||||
def test_file_size_limit_fail() -> None:
|
||||
"""Reject files which are too large"""
|
||||
limit = 100
|
||||
form = _get_test_form(limit)
|
||||
|
||||
with _tempfile(b"." * (limit + 1)) as f:
|
||||
form.test.data = f
|
||||
assert form.validate() is False
|
||||
|
||||
|
||||
def test_file_size_limit_ignored_if_none() -> None:
|
||||
"""Permit files when there is no limit"""
|
||||
form = _get_test_form(None)
|
||||
|
||||
with _tempfile(b"." * 200) as f:
|
||||
form.test.data = f
|
||||
assert form.validate() is True
|
||||
Reference in New Issue
Block a user