feat: new Columnar upload form and API (#28192)

This commit is contained in:
Daniel Vaz Gaspar
2024-05-06 15:51:42 +01:00
committed by GitHub
parent f5843fe588
commit 9a339f08a7
29 changed files with 2267 additions and 1232 deletions

View File

@@ -34,13 +34,18 @@ from sqlalchemy.orm.session import Session
from superset import db
from superset.commands.database.uploaders.base import UploadCommand
from superset.commands.database.uploaders.columnar_reader import ColumnarReader
from superset.commands.database.uploaders.csv_reader import CSVReader
from superset.commands.database.uploaders.excel_reader import ExcelReader
from superset.db_engine_specs.sqlite import SqliteEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
from superset.sql_parse import Table
from tests.unit_tests.fixtures.common import create_csv_file, create_excel_file
from tests.unit_tests.fixtures.common import (
create_columnar_file,
create_csv_file,
create_excel_file,
)
def test_filter_by_uuid(
@@ -940,7 +945,7 @@ def test_csv_upload(
data=payload,
content_type="multipart/form-data",
)
assert response.status_code == 200
assert response.status_code == 201
assert response.json == {"message": "OK"}
init_mock.assert_called_with(*upload_called_with)
reader_mock.assert_called_with(*reader_called_with)
@@ -1135,7 +1140,7 @@ def test_csv_upload_file_extension_invalid(
response = client.post(
"/api/v1/database/1/csv_upload/",
data={
"file": (create_csv_file(), filename),
"file": create_csv_file(filename=filename),
"table_name": "table1",
"delimiter": ",",
},
@@ -1171,13 +1176,13 @@ def test_csv_upload_file_extension_valid(
response = client.post(
"/api/v1/database/1/csv_upload/",
data={
"file": (create_csv_file(), filename),
"file": create_csv_file(filename=filename),
"table_name": "table1",
"delimiter": ",",
},
content_type="multipart/form-data",
)
assert response.status_code == 200
assert response.status_code == 201
@pytest.mark.parametrize(
@@ -1282,7 +1287,7 @@ def test_excel_upload(
data=payload,
content_type="multipart/form-data",
)
assert response.status_code == 200
assert response.status_code == 201
assert response.json == {"message": "OK"}
init_mock.assert_called_with(*upload_called_with)
reader_mock.assert_called_with(*reader_called_with)
@@ -1406,7 +1411,7 @@ def test_excel_upload_file_extension_invalid(
response = client.post(
"/api/v1/database/1/excel_upload/",
data={
"file": (create_excel_file(), filename),
"file": create_excel_file(filename=filename),
"table_name": "table1",
},
content_type="multipart/form-data",
@@ -1415,6 +1420,326 @@ def test_excel_upload_file_extension_invalid(
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
@pytest.mark.parametrize(
"payload,upload_called_with,reader_called_with",
[
(
{
"file": (create_columnar_file(), "out.parquet"),
"table_name": "table1",
},
(
1,
"table1",
ANY,
None,
ANY,
),
(
{
"already_exists": "fail",
"file": ANY,
"table_name": "table1",
},
),
),
(
{
"file": (create_columnar_file(), "out.parquet"),
"table_name": "table2",
"already_exists": "replace",
"columns_read": "col1,col2",
"dataframe_index": True,
"index_label": "label",
},
(
1,
"table2",
ANY,
None,
ANY,
),
(
{
"already_exists": "replace",
"columns_read": ["col1", "col2"],
"file": ANY,
"table_name": "table2",
"dataframe_index": True,
"index_label": "label",
},
),
),
],
)
def test_columnar_upload(
payload: dict[str, Any],
upload_called_with: tuple[int, str, Any, dict[str, Any]],
reader_called_with: dict[str, Any],
mocker: MockFixture,
client: Any,
full_api_access: None,
) -> None:
"""
Test Excel Upload success.
"""
init_mock = mocker.patch.object(UploadCommand, "__init__")
init_mock.return_value = None
_ = mocker.patch.object(UploadCommand, "run")
reader_mock = mocker.patch.object(ColumnarReader, "__init__")
reader_mock.return_value = None
response = client.post(
"/api/v1/database/1/columnar_upload/",
data=payload,
content_type="multipart/form-data",
)
assert response.status_code == 201
assert response.json == {"message": "OK"}
init_mock.assert_called_with(*upload_called_with)
reader_mock.assert_called_with(*reader_called_with)
@pytest.mark.parametrize(
"payload,expected_response",
[
(
{
"file": (create_columnar_file(), "out.parquet"),
"already_exists": "fail",
},
{"message": {"table_name": ["Missing data for required field."]}},
),
(
{
"file": (create_columnar_file(), "out.parquet"),
"table_name": "",
"already_exists": "fail",
},
{"message": {"table_name": ["Length must be between 1 and 10000."]}},
),
(
{"table_name": "table1", "already_exists": "fail"},
{"message": {"file": ["Field may not be null."]}},
),
(
{
"file": "xpto",
"table_name": "table1",
"already_exists": "fail",
},
{"message": {"file": ["Field may not be null."]}},
),
(
{
"file": (create_columnar_file(), "out.parquet"),
"table_name": "table1",
"already_exists": "xpto",
},
{"message": {"already_exists": ["Must be one of: fail, replace, append."]}},
),
],
)
def test_columnar_upload_validation(
payload: Any,
expected_response: dict[str, str],
mocker: MockFixture,
client: Any,
full_api_access: None,
) -> None:
"""
Test Excel Upload validation fails.
"""
_ = mocker.patch.object(UploadCommand, "run")
response = client.post(
"/api/v1/database/1/columnar_upload/",
data=payload,
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == expected_response
@pytest.mark.parametrize(
"filename",
[
"out.parquet",
"out.zip",
"out.parquet.zip",
"out something.parquet",
"out something.zip",
],
)
def test_columnar_upload_file_extension_valid(
filename: str,
mocker: MockFixture,
client: Any,
full_api_access: None,
) -> None:
"""
Test Excel Upload file extension fails.
"""
_ = mocker.patch.object(UploadCommand, "run")
response = client.post(
"/api/v1/database/1/columnar_upload/",
data={
"file": (create_columnar_file(), filename),
"table_name": "table1",
},
content_type="multipart/form-data",
)
assert response.status_code == 201
@pytest.mark.parametrize(
"filename",
[
"out.xpto",
"out.exe",
"out",
"out zip",
"",
"out.parquet.exe",
".parquet",
"out.",
".",
"out parquet a.exe",
],
)
def test_columnar_upload_file_extension_invalid(
filename: str,
mocker: MockFixture,
client: Any,
full_api_access: None,
) -> None:
"""
Test Excel Upload file extension fails.
"""
_ = mocker.patch.object(UploadCommand, "run")
response = client.post(
"/api/v1/database/1/columnar_upload/",
data={
"file": create_columnar_file(filename=filename),
"table_name": "table1",
},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
def test_csv_metadata(mocker: MockFixture, client: Any, full_api_access: None) -> None:
_ = mocker.patch.object(CSVReader, "file_metadata")
response = client.post(
"/api/v1/database/csv_metadata/",
data={"file": create_csv_file()},
content_type="multipart/form-data",
)
assert response.status_code == 200
def test_csv_metadata_bad_extension(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(CSVReader, "file_metadata")
response = client.post(
"/api/v1/database/csv_metadata/",
data={"file": create_csv_file(filename="test.out")},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
def test_csv_metadata_validation(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(CSVReader, "file_metadata")
response = client.post(
"/api/v1/database/csv_metadata/",
data={},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["Field may not be null."]}}
def test_excel_metadata(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(ExcelReader, "file_metadata")
response = client.post(
"/api/v1/database/excel_metadata/",
data={"file": create_excel_file()},
content_type="multipart/form-data",
)
assert response.status_code == 200
def test_excel_metadata_bad_extension(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(ExcelReader, "file_metadata")
response = client.post(
"/api/v1/database/excel_metadata/",
data={"file": create_excel_file(filename="test.out")},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
def test_excel_metadata_validation(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(ExcelReader, "file_metadata")
response = client.post(
"/api/v1/database/excel_metadata/",
data={},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["Field may not be null."]}}
def test_columnar_metadata(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(ColumnarReader, "file_metadata")
response = client.post(
"/api/v1/database/columnar_metadata/",
data={"file": create_columnar_file()},
content_type="multipart/form-data",
)
assert response.status_code == 200
def test_columnar_metadata_bad_extension(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(ColumnarReader, "file_metadata")
response = client.post(
"/api/v1/database/columnar_metadata/",
data={"file": create_columnar_file(filename="test.out")},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
def test_columnar_metadata_validation(
mocker: MockFixture, client: Any, full_api_access: None
) -> None:
_ = mocker.patch.object(ColumnarReader, "file_metadata")
response = client.post(
"/api/v1/database/columnar_metadata/",
data={},
content_type="multipart/form-data",
)
assert response.status_code == 400
assert response.json == {"message": {"file": ["Field may not be null."]}}
def test_table_metadata_happy_path(
mocker: MockFixture,
client: Any,