Sample test data (#10487)

Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
This commit is contained in:
Bogdan
2020-08-03 09:08:49 -07:00
committed by GitHub
parent 821916a681
commit ab404ea2cf
8 changed files with 46 additions and 19 deletions

View File

@@ -52,9 +52,10 @@ def gen_filter(
} }
def load_data(tbl_name: str, database: Database) -> None: def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
pdf = pd.read_json(get_example_data("birth_names.json.gz")) pdf = pd.read_json(get_example_data("birth_names.json.gz"))
pdf.ds = pd.to_datetime(pdf.ds, unit="ms") pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf = pdf.head(100) if sample else pdf
pdf.to_sql( pdf.to_sql(
tbl_name, tbl_name,
database.get_sqla_engine(), database.get_sqla_engine(),
@@ -72,7 +73,9 @@ def load_data(tbl_name: str, database: Database) -> None:
print("-" * 80) print("-" * 80)
def load_birth_names(only_metadata: bool = False, force: bool = False) -> None: def load_birth_names(
only_metadata: bool = False, force: bool = False, sample: bool = False
) -> None:
"""Loading birth name dataset from a zip file in the repo""" """Loading birth name dataset from a zip file in the repo"""
# pylint: disable=too-many-locals # pylint: disable=too-many-locals
tbl_name = "birth_names" tbl_name = "birth_names"
@@ -80,7 +83,7 @@ def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
table_exists = database.has_table_by_name(tbl_name) table_exists = database.has_table_by_name(tbl_name)
if not only_metadata and (not table_exists or force): if not only_metadata and (not table_exists or force):
load_data(tbl_name, database) load_data(tbl_name, database, sample=sample)
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not obj: if not obj:

View File

@@ -29,7 +29,9 @@ from superset.utils import core as utils
from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL
def load_energy(only_metadata: bool = False, force: bool = False) -> None: def load_energy(
only_metadata: bool = False, force: bool = False, sample: bool = False
) -> None:
"""Loads an energy related dataset to use with sankey and graphs""" """Loads an energy related dataset to use with sankey and graphs"""
tbl_name = "energy_usage" tbl_name = "energy_usage"
database = utils.get_example_database() database = utils.get_example_database()
@@ -38,6 +40,7 @@ def load_energy(only_metadata: bool = False, force: bool = False) -> None:
if not only_metadata and (not table_exists or force): if not only_metadata and (not table_exists or force):
data = get_example_data("energy.json.gz") data = get_example_data("energy.json.gz")
pdf = pd.read_json(data) pdf = pd.read_json(data)
pdf = pdf.head(100) if sample else pdf
pdf.to_sql( pdf.to_sql(
tbl_name, tbl_name,
database.get_sqla_engine(), database.get_sqla_engine(),

View File

@@ -36,7 +36,9 @@ from .helpers import (
) )
def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> None: def load_unicode_test_data(
only_metadata: bool = False, force: bool = False, sample: bool = False
) -> None:
"""Loading unicode test dataset from a csv file in the repo""" """Loading unicode test dataset from a csv file in the repo"""
tbl_name = "unicode_test" tbl_name = "unicode_test"
database = utils.get_example_database() database = utils.get_example_database()
@@ -50,6 +52,7 @@ def load_unicode_test_data(only_metadata: bool = False, force: bool = False) ->
# generate date/numeric data # generate date/numeric data
df["dttm"] = datetime.datetime.now().date() df["dttm"] = datetime.datetime.now().date()
df["value"] = [random.randint(1, 100) for _ in range(len(df))] df["value"] = [random.randint(1, 100) for _ in range(len(df))]
df = df.head(100) if sample else df
df.to_sql( # pylint: disable=no-member df.to_sql( # pylint: disable=no-member
tbl_name, tbl_name,
database.get_sqla_engine(), database.get_sqla_engine(),

View File

@@ -41,8 +41,8 @@ from .helpers import (
) )
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-statements
only_metadata: bool = False, force: bool = False only_metadata: bool = False, force: bool = False, sample: bool = False,
) -> None: ) -> None:
"""Loads the world bank health dataset, slices and a dashboard""" """Loads the world bank health dataset, slices and a dashboard"""
tbl_name = "wb_health_population" tbl_name = "wb_health_population"
@@ -54,6 +54,7 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
pdf = pd.read_json(data) pdf = pd.read_json(data)
pdf.columns = [col.replace(".", "_") for col in pdf.columns] pdf.columns = [col.replace(".", "_") for col in pdf.columns]
pdf.year = pd.to_datetime(pdf.year) pdf.year = pd.to_datetime(pdf.year)
pdf = pdf.head(100) if sample else pdf
pdf.to_sql( pdf.to_sql(
tbl_name, tbl_name,
database.get_sqla_engine(), database.get_sqla_engine(),

View File

@@ -677,7 +677,7 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin):
rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data") rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data")
self.assertEqual(rv.status_code, 200) self.assertEqual(rv.status_code, 200)
data = json.loads(rv.data.decode("utf-8")) data = json.loads(rv.data.decode("utf-8"))
self.assertEqual(data["result"][0]["rowcount"], 100) self.assertEqual(data["result"][0]["rowcount"], 45)
def test_chart_data_limit_offset(self): def test_chart_data_limit_offset(self):
""" """

View File

@@ -28,13 +28,13 @@ class TestSupersetDataFrame(SupersetTestCase):
self.examples.load_css_templates() self.examples.load_css_templates()
def test_load_energy(self): def test_load_energy(self):
self.examples.load_energy() self.examples.load_energy(sample=True)
def test_load_world_bank_health_n_pop(self): def test_load_world_bank_health_n_pop(self):
self.examples.load_world_bank_health_n_pop() self.examples.load_world_bank_health_n_pop(sample=True)
def test_load_birth_names(self): def test_load_birth_names(self):
self.examples.load_birth_names() self.examples.load_birth_names(sample=True)
def test_load_test_users_run(self): def test_load_test_users_run(self):
from superset.cli import load_test_users_run from superset.cli import load_test_users_run
@@ -42,4 +42,4 @@ class TestSupersetDataFrame(SupersetTestCase):
load_test_users_run() load_test_users_run()
def test_load_unicode_test_data(self): def test_load_unicode_test_data(self):
self.examples.load_unicode_test_data() self.examples.load_unicode_test_data(sample=True)

View File

@@ -231,7 +231,7 @@ class TestSqlaTableModel(SupersetTestCase):
spec.allows_joins = inner_join spec.allows_joins = inner_join
arbitrary_gby = "state || gender || '_test'" arbitrary_gby = "state || gender || '_test'"
arbitrary_metric = dict( arbitrary_metric = dict(
label="arbitrary", expressionType="SQL", sqlExpression="COUNT(1)" label="arbitrary", expressionType="SQL", sqlExpression="SUM(sum_boys)"
) )
query_obj = dict( query_obj = dict(
groupby=[arbitrary_gby, "name"], groupby=[arbitrary_gby, "name"],
@@ -264,13 +264,30 @@ class TestSqlaTableModel(SupersetTestCase):
return ret return ret
df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True) df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True)
name_list1 = cannonicalize_df(df1).name.values.tolist()
df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False) df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False)
name_list2 = cannonicalize_df(df1).name.values.tolist()
self.assertFalse(df2.empty) self.assertFalse(df2.empty)
# df1 can be empty if the db does not support join
if not df1.empty: expected_namelist = [
pandas.testing.assert_frame_equal( "Anthony",
cannonicalize_df(df1), cannonicalize_df(df2) "Brian",
) "Christopher",
"Daniel",
"David",
"Eric",
"James",
"Jeffrey",
"John",
"Joseph",
"Kenneth",
"Kevin",
"Mark",
"Michael",
"Paul",
]
assert name_list2 == expected_namelist
assert name_list1 == expected_namelist
def test_query_with_expr_groupby(self): def test_query_with_expr_groupby(self):
self.query_with_expr_helper(is_timeseries=False) self.query_with_expr_helper(is_timeseries=False)

View File

@@ -96,7 +96,7 @@ class TestSqlLab(SupersetTestCase):
f"SELECT * FROM admin_database.{tmp_table_name}" f"SELECT * FROM admin_database.{tmp_table_name}"
).fetchall() ).fetchall()
self.assertEqual( self.assertEqual(
75691, len(data) 100, len(data)
) # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True ) # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True
# cleanup # cleanup