feat: support nulls in the csv uploads (#10208)

* Support more table properties for the hive upload

Refactor

Add tests, and refactor them to be pytest friendly

Use lowercase table names

Ignore isort

* Use sql params

Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
This commit is contained in:
Bogdan
2020-07-06 13:26:43 -07:00
committed by GitHub
parent 318e5347bc
commit 84f8a51458
10 changed files with 325 additions and 160 deletions

View File

@@ -26,7 +26,11 @@ from wtforms.ext.sqlalchemy.fields import QuerySelectField
from wtforms.validators import DataRequired, Length, NumberRange, Optional
from superset import app, db, security_manager
from superset.forms import CommaSeparatedListField, filter_not_empty_values
from superset.forms import (
CommaSeparatedListField,
filter_not_empty_values,
JsonListField,
)
from superset.models.core import Database
config = app.config
@@ -210,6 +214,16 @@ class CsvToDatabaseForm(DynamicForm):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
null_values = JsonListField(
_("Null values"),
default=config["CSV_DEFAULT_NA_NAMES"],
description=_(
"Json list of the values that should be treated as null. "
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
"Warning: Hive database supports only single value. "
'Use [""] for empty string.'
),
)
class ExcelToDatabaseForm(DynamicForm):
@@ -376,3 +390,13 @@ class ExcelToDatabaseForm(DynamicForm):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
null_values = JsonListField(
_("Null values"),
default=config["CSV_DEFAULT_NA_NAMES"],
description=_(
"Json list of the values that should be treated as null. "
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
"Warning: Hive database supports only single value. "
'Use [""] for empty string.'
),
)

View File

@@ -149,6 +149,9 @@ class CsvToDatabaseView(SimpleFormView):
database = (
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
)
# More can be found here:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
csv_to_df_kwargs = {
"sep": form.sep.data,
"header": form.header.data if form.header.data else 0,
@@ -162,6 +165,12 @@ class CsvToDatabaseView(SimpleFormView):
"infer_datetime_format": form.infer_datetime_format.data,
"chunksize": 1000,
}
if form.null_values.data:
csv_to_df_kwargs["na_values"] = form.null_values.data
csv_to_df_kwargs["keep_default_na"] = False
# More can be found here:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
df_to_sql_kwargs = {
"name": csv_table.table,
"if_exists": form.if_exists.data,