mirror of
https://github.com/apache/superset.git
synced 2026-04-20 08:34:37 +00:00
feat: support nulls in the csv uploads (#10208)
* Support more table properties for the hive upload Refactor Add tests, and refactor them to be pytest friendly Use lowercase table names Ignore isort * Use sql params Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
This commit is contained in:
@@ -26,7 +26,11 @@ from wtforms.ext.sqlalchemy.fields import QuerySelectField
|
||||
from wtforms.validators import DataRequired, Length, NumberRange, Optional
|
||||
|
||||
from superset import app, db, security_manager
|
||||
from superset.forms import CommaSeparatedListField, filter_not_empty_values
|
||||
from superset.forms import (
|
||||
CommaSeparatedListField,
|
||||
filter_not_empty_values,
|
||||
JsonListField,
|
||||
)
|
||||
from superset.models.core import Database
|
||||
|
||||
config = app.config
|
||||
@@ -210,6 +214,16 @@ class CsvToDatabaseForm(DynamicForm):
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
null_values = JsonListField(
|
||||
_("Null values"),
|
||||
default=config["CSV_DEFAULT_NA_NAMES"],
|
||||
description=_(
|
||||
"Json list of the values that should be treated as null. "
|
||||
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
|
||||
"Warning: Hive database supports only single value. "
|
||||
'Use [""] for empty string.'
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ExcelToDatabaseForm(DynamicForm):
|
||||
@@ -376,3 +390,13 @@ class ExcelToDatabaseForm(DynamicForm):
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
null_values = JsonListField(
|
||||
_("Null values"),
|
||||
default=config["CSV_DEFAULT_NA_NAMES"],
|
||||
description=_(
|
||||
"Json list of the values that should be treated as null. "
|
||||
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
|
||||
"Warning: Hive database supports only single value. "
|
||||
'Use [""] for empty string.'
|
||||
),
|
||||
)
|
||||
|
||||
@@ -149,6 +149,9 @@ class CsvToDatabaseView(SimpleFormView):
|
||||
database = (
|
||||
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
|
||||
)
|
||||
|
||||
# More can be found here:
|
||||
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
|
||||
csv_to_df_kwargs = {
|
||||
"sep": form.sep.data,
|
||||
"header": form.header.data if form.header.data else 0,
|
||||
@@ -162,6 +165,12 @@ class CsvToDatabaseView(SimpleFormView):
|
||||
"infer_datetime_format": form.infer_datetime_format.data,
|
||||
"chunksize": 1000,
|
||||
}
|
||||
if form.null_values.data:
|
||||
csv_to_df_kwargs["na_values"] = form.null_values.data
|
||||
csv_to_df_kwargs["keep_default_na"] = False
|
||||
|
||||
# More can be found here:
|
||||
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
|
||||
df_to_sql_kwargs = {
|
||||
"name": csv_table.table,
|
||||
"if_exists": form.if_exists.data,
|
||||
|
||||
Reference in New Issue
Block a user