mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
Sample test data (#10487)
Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
This commit is contained in:
@@ -52,9 +52,10 @@ def gen_filter(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_data(tbl_name: str, database: Database) -> None:
|
def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
|
||||||
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
|
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
|
||||||
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
|
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
|
||||||
|
pdf = pdf.head(100) if sample else pdf
|
||||||
pdf.to_sql(
|
pdf.to_sql(
|
||||||
tbl_name,
|
tbl_name,
|
||||||
database.get_sqla_engine(),
|
database.get_sqla_engine(),
|
||||||
@@ -72,7 +73,9 @@ def load_data(tbl_name: str, database: Database) -> None:
|
|||||||
print("-" * 80)
|
print("-" * 80)
|
||||||
|
|
||||||
|
|
||||||
def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
|
def load_birth_names(
|
||||||
|
only_metadata: bool = False, force: bool = False, sample: bool = False
|
||||||
|
) -> None:
|
||||||
"""Loading birth name dataset from a zip file in the repo"""
|
"""Loading birth name dataset from a zip file in the repo"""
|
||||||
# pylint: disable=too-many-locals
|
# pylint: disable=too-many-locals
|
||||||
tbl_name = "birth_names"
|
tbl_name = "birth_names"
|
||||||
@@ -80,7 +83,7 @@ def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
|
|||||||
table_exists = database.has_table_by_name(tbl_name)
|
table_exists = database.has_table_by_name(tbl_name)
|
||||||
|
|
||||||
if not only_metadata and (not table_exists or force):
|
if not only_metadata and (not table_exists or force):
|
||||||
load_data(tbl_name, database)
|
load_data(tbl_name, database, sample=sample)
|
||||||
|
|
||||||
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
|
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
|
||||||
if not obj:
|
if not obj:
|
||||||
|
|||||||
@@ -29,7 +29,9 @@ from superset.utils import core as utils
|
|||||||
from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL
|
from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL
|
||||||
|
|
||||||
|
|
||||||
def load_energy(only_metadata: bool = False, force: bool = False) -> None:
|
def load_energy(
|
||||||
|
only_metadata: bool = False, force: bool = False, sample: bool = False
|
||||||
|
) -> None:
|
||||||
"""Loads an energy related dataset to use with sankey and graphs"""
|
"""Loads an energy related dataset to use with sankey and graphs"""
|
||||||
tbl_name = "energy_usage"
|
tbl_name = "energy_usage"
|
||||||
database = utils.get_example_database()
|
database = utils.get_example_database()
|
||||||
@@ -38,6 +40,7 @@ def load_energy(only_metadata: bool = False, force: bool = False) -> None:
|
|||||||
if not only_metadata and (not table_exists or force):
|
if not only_metadata and (not table_exists or force):
|
||||||
data = get_example_data("energy.json.gz")
|
data = get_example_data("energy.json.gz")
|
||||||
pdf = pd.read_json(data)
|
pdf = pd.read_json(data)
|
||||||
|
pdf = pdf.head(100) if sample else pdf
|
||||||
pdf.to_sql(
|
pdf.to_sql(
|
||||||
tbl_name,
|
tbl_name,
|
||||||
database.get_sqla_engine(),
|
database.get_sqla_engine(),
|
||||||
|
|||||||
@@ -36,7 +36,9 @@ from .helpers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> None:
|
def load_unicode_test_data(
|
||||||
|
only_metadata: bool = False, force: bool = False, sample: bool = False
|
||||||
|
) -> None:
|
||||||
"""Loading unicode test dataset from a csv file in the repo"""
|
"""Loading unicode test dataset from a csv file in the repo"""
|
||||||
tbl_name = "unicode_test"
|
tbl_name = "unicode_test"
|
||||||
database = utils.get_example_database()
|
database = utils.get_example_database()
|
||||||
@@ -50,6 +52,7 @@ def load_unicode_test_data(only_metadata: bool = False, force: bool = False) ->
|
|||||||
# generate date/numeric data
|
# generate date/numeric data
|
||||||
df["dttm"] = datetime.datetime.now().date()
|
df["dttm"] = datetime.datetime.now().date()
|
||||||
df["value"] = [random.randint(1, 100) for _ in range(len(df))]
|
df["value"] = [random.randint(1, 100) for _ in range(len(df))]
|
||||||
|
df = df.head(100) if sample else df
|
||||||
df.to_sql( # pylint: disable=no-member
|
df.to_sql( # pylint: disable=no-member
|
||||||
tbl_name,
|
tbl_name,
|
||||||
database.get_sqla_engine(),
|
database.get_sqla_engine(),
|
||||||
|
|||||||
@@ -41,8 +41,8 @@ from .helpers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
|
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-statements
|
||||||
only_metadata: bool = False, force: bool = False
|
only_metadata: bool = False, force: bool = False, sample: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Loads the world bank health dataset, slices and a dashboard"""
|
"""Loads the world bank health dataset, slices and a dashboard"""
|
||||||
tbl_name = "wb_health_population"
|
tbl_name = "wb_health_population"
|
||||||
@@ -54,6 +54,7 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
|
|||||||
pdf = pd.read_json(data)
|
pdf = pd.read_json(data)
|
||||||
pdf.columns = [col.replace(".", "_") for col in pdf.columns]
|
pdf.columns = [col.replace(".", "_") for col in pdf.columns]
|
||||||
pdf.year = pd.to_datetime(pdf.year)
|
pdf.year = pd.to_datetime(pdf.year)
|
||||||
|
pdf = pdf.head(100) if sample else pdf
|
||||||
pdf.to_sql(
|
pdf.to_sql(
|
||||||
tbl_name,
|
tbl_name,
|
||||||
database.get_sqla_engine(),
|
database.get_sqla_engine(),
|
||||||
|
|||||||
@@ -677,7 +677,7 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin):
|
|||||||
rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data")
|
rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data")
|
||||||
self.assertEqual(rv.status_code, 200)
|
self.assertEqual(rv.status_code, 200)
|
||||||
data = json.loads(rv.data.decode("utf-8"))
|
data = json.loads(rv.data.decode("utf-8"))
|
||||||
self.assertEqual(data["result"][0]["rowcount"], 100)
|
self.assertEqual(data["result"][0]["rowcount"], 45)
|
||||||
|
|
||||||
def test_chart_data_limit_offset(self):
|
def test_chart_data_limit_offset(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -28,13 +28,13 @@ class TestSupersetDataFrame(SupersetTestCase):
|
|||||||
self.examples.load_css_templates()
|
self.examples.load_css_templates()
|
||||||
|
|
||||||
def test_load_energy(self):
|
def test_load_energy(self):
|
||||||
self.examples.load_energy()
|
self.examples.load_energy(sample=True)
|
||||||
|
|
||||||
def test_load_world_bank_health_n_pop(self):
|
def test_load_world_bank_health_n_pop(self):
|
||||||
self.examples.load_world_bank_health_n_pop()
|
self.examples.load_world_bank_health_n_pop(sample=True)
|
||||||
|
|
||||||
def test_load_birth_names(self):
|
def test_load_birth_names(self):
|
||||||
self.examples.load_birth_names()
|
self.examples.load_birth_names(sample=True)
|
||||||
|
|
||||||
def test_load_test_users_run(self):
|
def test_load_test_users_run(self):
|
||||||
from superset.cli import load_test_users_run
|
from superset.cli import load_test_users_run
|
||||||
@@ -42,4 +42,4 @@ class TestSupersetDataFrame(SupersetTestCase):
|
|||||||
load_test_users_run()
|
load_test_users_run()
|
||||||
|
|
||||||
def test_load_unicode_test_data(self):
|
def test_load_unicode_test_data(self):
|
||||||
self.examples.load_unicode_test_data()
|
self.examples.load_unicode_test_data(sample=True)
|
||||||
|
|||||||
@@ -231,7 +231,7 @@ class TestSqlaTableModel(SupersetTestCase):
|
|||||||
spec.allows_joins = inner_join
|
spec.allows_joins = inner_join
|
||||||
arbitrary_gby = "state || gender || '_test'"
|
arbitrary_gby = "state || gender || '_test'"
|
||||||
arbitrary_metric = dict(
|
arbitrary_metric = dict(
|
||||||
label="arbitrary", expressionType="SQL", sqlExpression="COUNT(1)"
|
label="arbitrary", expressionType="SQL", sqlExpression="SUM(sum_boys)"
|
||||||
)
|
)
|
||||||
query_obj = dict(
|
query_obj = dict(
|
||||||
groupby=[arbitrary_gby, "name"],
|
groupby=[arbitrary_gby, "name"],
|
||||||
@@ -264,13 +264,30 @@ class TestSqlaTableModel(SupersetTestCase):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True)
|
df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True)
|
||||||
|
name_list1 = cannonicalize_df(df1).name.values.tolist()
|
||||||
df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False)
|
df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False)
|
||||||
|
name_list2 = cannonicalize_df(df1).name.values.tolist()
|
||||||
self.assertFalse(df2.empty)
|
self.assertFalse(df2.empty)
|
||||||
# df1 can be empty if the db does not support join
|
|
||||||
if not df1.empty:
|
expected_namelist = [
|
||||||
pandas.testing.assert_frame_equal(
|
"Anthony",
|
||||||
cannonicalize_df(df1), cannonicalize_df(df2)
|
"Brian",
|
||||||
)
|
"Christopher",
|
||||||
|
"Daniel",
|
||||||
|
"David",
|
||||||
|
"Eric",
|
||||||
|
"James",
|
||||||
|
"Jeffrey",
|
||||||
|
"John",
|
||||||
|
"Joseph",
|
||||||
|
"Kenneth",
|
||||||
|
"Kevin",
|
||||||
|
"Mark",
|
||||||
|
"Michael",
|
||||||
|
"Paul",
|
||||||
|
]
|
||||||
|
assert name_list2 == expected_namelist
|
||||||
|
assert name_list1 == expected_namelist
|
||||||
|
|
||||||
def test_query_with_expr_groupby(self):
|
def test_query_with_expr_groupby(self):
|
||||||
self.query_with_expr_helper(is_timeseries=False)
|
self.query_with_expr_helper(is_timeseries=False)
|
||||||
|
|||||||
@@ -96,7 +96,7 @@ class TestSqlLab(SupersetTestCase):
|
|||||||
f"SELECT * FROM admin_database.{tmp_table_name}"
|
f"SELECT * FROM admin_database.{tmp_table_name}"
|
||||||
).fetchall()
|
).fetchall()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
75691, len(data)
|
100, len(data)
|
||||||
) # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True
|
) # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True
|
||||||
|
|
||||||
# cleanup
|
# cleanup
|
||||||
|
|||||||
Reference in New Issue
Block a user