mirror of
https://github.com/apache/superset.git
synced 2026-04-07 18:35:15 +00:00
480 lines
15 KiB
Python
480 lines
15 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
import logging
|
|
import os
|
|
|
|
import pandas as pd
|
|
from flask import current_app
|
|
from sqlalchemy import DateTime, inspect, String
|
|
from sqlalchemy.sql import column
|
|
|
|
import superset.utils.database
|
|
from superset import db
|
|
from superset.connectors.sqla.models import BaseDatasource, SqlMetric
|
|
from superset.examples.helpers import (
|
|
get_examples_folder,
|
|
get_slice_json,
|
|
get_table_connector_registry,
|
|
merge_slice,
|
|
misc_dash_slices,
|
|
read_example_data,
|
|
update_slice_ids,
|
|
)
|
|
from superset.models.dashboard import Dashboard
|
|
from superset.models.slice import Slice
|
|
from superset.sql.parse import Table
|
|
from superset.utils import core as utils, json
|
|
from superset.utils.core import DatasourceType
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
|
|
only_metadata: bool = False,
|
|
force: bool = False,
|
|
sample: bool = False,
|
|
) -> None:
|
|
"""Loads the world bank health dataset, slices and a dashboard"""
|
|
tbl_name = "wb_health_population"
|
|
database = superset.utils.database.get_example_database()
|
|
with database.get_sqla_engine() as engine:
|
|
schema = inspect(engine).default_schema_name
|
|
table_exists = database.has_table(Table(tbl_name, schema))
|
|
|
|
if not only_metadata and (not table_exists or force):
|
|
pdf = read_example_data("examples://countries.json.gz", compression="gzip")
|
|
pdf.columns = [col.replace(".", "_") for col in pdf.columns]
|
|
if database.backend == "presto":
|
|
pdf.year = pd.to_datetime(pdf.year)
|
|
pdf.year = pdf.year.dt.strftime("%Y-%m-%d %H:%M%:%S")
|
|
else:
|
|
pdf.year = pd.to_datetime(pdf.year)
|
|
pdf = pdf.head(100) if sample else pdf
|
|
|
|
pdf.to_sql(
|
|
tbl_name,
|
|
engine,
|
|
schema=schema,
|
|
if_exists="replace",
|
|
chunksize=50,
|
|
dtype={
|
|
# TODO(bkyryliuk): use TIMESTAMP type for presto
|
|
"year": DateTime if database.backend != "presto" else String(255),
|
|
"country_code": String(3),
|
|
"country_name": String(255),
|
|
"region": String(255),
|
|
},
|
|
method="multi",
|
|
index=False,
|
|
)
|
|
|
|
logger.debug("Creating table [wb_health_population] reference")
|
|
table = get_table_connector_registry()
|
|
tbl = db.session.query(table).filter_by(table_name=tbl_name).first()
|
|
if not tbl:
|
|
tbl = table(table_name=tbl_name, schema=schema)
|
|
db.session.add(tbl)
|
|
tbl.description = utils.readfile(
|
|
os.path.join(get_examples_folder(), "countries.md")
|
|
)
|
|
tbl.main_dttm_col = "year"
|
|
tbl.database = database
|
|
tbl.filter_select_enabled = True
|
|
|
|
metrics = [
|
|
"sum__SP_POP_TOTL",
|
|
"sum__SH_DYN_AIDS",
|
|
"sum__SH_DYN_AIDS",
|
|
"sum__SP_RUR_TOTL_ZS",
|
|
"sum__SP_DYN_LE00_IN",
|
|
"sum__SP_RUR_TOTL",
|
|
]
|
|
for metric in metrics:
|
|
if not any(col.metric_name == metric for col in tbl.metrics):
|
|
aggr_func = metric[:3]
|
|
col = str(column(metric[5:]).compile(db.engine))
|
|
tbl.metrics.append(
|
|
SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})")
|
|
)
|
|
|
|
tbl.fetch_metadata()
|
|
|
|
slices = create_slices(tbl)
|
|
misc_dash_slices.add(slices[-1].slice_name)
|
|
for slc in slices:
|
|
merge_slice(slc)
|
|
|
|
logger.debug("Creating a World's Health Bank dashboard")
|
|
dash_name = "World Bank's Data"
|
|
slug = "world_health"
|
|
dash = db.session.query(Dashboard).filter_by(slug=slug).first()
|
|
|
|
if not dash:
|
|
dash = Dashboard()
|
|
db.session.add(dash)
|
|
dash.published = True
|
|
pos = dashboard_positions
|
|
slices = update_slice_ids(pos)
|
|
|
|
dash.dashboard_title = dash_name
|
|
dash.position_json = json.dumps(pos, indent=4)
|
|
dash.slug = slug
|
|
dash.slices = slices
|
|
|
|
|
|
def create_slices(tbl: BaseDatasource) -> list[Slice]:
|
|
metric = "sum__SP_POP_TOTL"
|
|
metrics = ["sum__SP_POP_TOTL"]
|
|
secondary_metric = {
|
|
"aggregate": "SUM",
|
|
"column": {
|
|
"column_name": "SP_RUR_TOTL",
|
|
"optionName": "_col_SP_RUR_TOTL",
|
|
"type": "DOUBLE",
|
|
},
|
|
"expressionType": "SIMPLE",
|
|
"hasCustomLabel": True,
|
|
"label": "Rural Population",
|
|
}
|
|
defaults = {
|
|
"compare_lag": "10",
|
|
"compare_suffix": "o10Y",
|
|
"limit": "25",
|
|
"granularity_sqla": "year",
|
|
"groupby": [],
|
|
"row_limit": current_app.config["ROW_LIMIT"],
|
|
"since": "2014-01-01",
|
|
"until": "2014-01-02",
|
|
"time_range": "2014-01-01 : 2014-01-02",
|
|
"markup_type": "markdown",
|
|
"country_fieldtype": "cca3",
|
|
"entity": "country_code",
|
|
"show_bubbles": True,
|
|
}
|
|
|
|
return [
|
|
Slice(
|
|
slice_name="World's Population",
|
|
viz_type="big_number",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
since="2000",
|
|
viz_type="big_number",
|
|
compare_lag="10",
|
|
metric="sum__SP_POP_TOTL",
|
|
compare_suffix="over 10Y",
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Most Populated Countries",
|
|
viz_type="table",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
viz_type="table",
|
|
metrics=["sum__SP_POP_TOTL"],
|
|
groupby=["country_name"],
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Growth Rate",
|
|
viz_type="echarts_timeseries_line",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
viz_type="echarts_timeseries_line",
|
|
since="1960-01-01",
|
|
metrics=["sum__SP_POP_TOTL"],
|
|
num_period_compare="10",
|
|
groupby=["country_name"],
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="% Rural",
|
|
viz_type="world_map",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
viz_type="world_map",
|
|
metric="sum__SP_RUR_TOTL_ZS",
|
|
num_period_compare="10",
|
|
secondary_metric=secondary_metric,
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Life Expectancy VS Rural %",
|
|
viz_type="bubble",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
viz_type="bubble",
|
|
since="2011-01-01",
|
|
until="2011-01-02",
|
|
series="region",
|
|
limit=0,
|
|
entity="country_name",
|
|
x="sum__SP_RUR_TOTL_ZS",
|
|
y="sum__SP_DYN_LE00_IN",
|
|
size="sum__SP_POP_TOTL",
|
|
max_bubble_size="50",
|
|
adhoc_filters=[
|
|
{
|
|
"clause": "WHERE",
|
|
"expressionType": "SIMPLE",
|
|
"filterOptionName": "2745eae5",
|
|
"comparator": [
|
|
"TCA",
|
|
"MNP",
|
|
"DMA",
|
|
"MHL",
|
|
"MCO",
|
|
"SXM",
|
|
"CYM",
|
|
"TUV",
|
|
"IMY",
|
|
"KNA",
|
|
"ASM",
|
|
"ADO",
|
|
"AMA",
|
|
"PLW",
|
|
],
|
|
"operator": "NOT IN",
|
|
"subject": "country_code",
|
|
}
|
|
],
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Rural Breakdown",
|
|
viz_type="sunburst_v2",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
viz_type="sunburst_v2",
|
|
columns=["region", "country_name"],
|
|
since="2011-01-01",
|
|
until="2011-01-02",
|
|
metric=metric,
|
|
secondary_metric=secondary_metric,
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="World's Pop Growth",
|
|
viz_type="echarts_area",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
since="1960-01-01",
|
|
until="now",
|
|
viz_type="echarts_area",
|
|
groupby=["region"],
|
|
metrics=metrics,
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Box plot",
|
|
viz_type="box_plot",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
since="1960-01-01",
|
|
until="now",
|
|
whisker_options="Min/max (no outliers)",
|
|
x_ticks_layout="staggered",
|
|
viz_type="box_plot",
|
|
groupby=["region"],
|
|
metrics=metrics,
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Treemap",
|
|
viz_type="treemap_v2",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
since="1960-01-01",
|
|
until="now",
|
|
viz_type="treemap_v2",
|
|
metric="sum__SP_POP_TOTL",
|
|
groupby=["region", "country_code"],
|
|
),
|
|
),
|
|
Slice(
|
|
slice_name="Parallel Coordinates",
|
|
viz_type="para",
|
|
datasource_type=DatasourceType.TABLE,
|
|
datasource_id=tbl.id,
|
|
params=get_slice_json(
|
|
defaults,
|
|
since="2011-01-01",
|
|
until="2012-01-01",
|
|
viz_type="para",
|
|
limit=100,
|
|
metrics=["sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS"],
|
|
secondary_metric="sum__SP_POP_TOTL",
|
|
series="country_name",
|
|
),
|
|
),
|
|
]
|
|
|
|
|
|
dashboard_positions = {
|
|
"CHART-37982887": {
|
|
"children": [],
|
|
"id": "CHART-37982887",
|
|
"meta": {
|
|
"chartId": 41,
|
|
"height": 52,
|
|
"sliceName": "World's Population",
|
|
"width": 2,
|
|
},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-17e0f8d8": {
|
|
"children": [],
|
|
"id": "CHART-17e0f8d8",
|
|
"meta": {
|
|
"chartId": 42,
|
|
"height": 92,
|
|
"sliceName": "Most Populated Countries",
|
|
"width": 3,
|
|
},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-2ee52f30": {
|
|
"children": [],
|
|
"id": "CHART-2ee52f30",
|
|
"meta": {"chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-2d5b6871": {
|
|
"children": [],
|
|
"id": "CHART-2d5b6871",
|
|
"meta": {"chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-0fd0d252": {
|
|
"children": [],
|
|
"id": "CHART-0fd0d252",
|
|
"meta": {
|
|
"chartId": 45,
|
|
"height": 50,
|
|
"sliceName": "Life Expectancy VS Rural %",
|
|
"width": 8,
|
|
},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-97f4cb48": {
|
|
"children": [],
|
|
"id": "CHART-97f4cb48",
|
|
"meta": {
|
|
"chartId": 46,
|
|
"height": 38,
|
|
"sliceName": "Rural Breakdown",
|
|
"width": 3,
|
|
},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-b5e05d6f": {
|
|
"children": [],
|
|
"id": "CHART-b5e05d6f",
|
|
"meta": {
|
|
"chartId": 47,
|
|
"height": 50,
|
|
"sliceName": "World's Pop Growth",
|
|
"width": 4,
|
|
},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-e76e9f5f": {
|
|
"children": [],
|
|
"id": "CHART-e76e9f5f",
|
|
"meta": {"chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4},
|
|
"type": "CHART",
|
|
},
|
|
"CHART-a4808bba": {
|
|
"children": [],
|
|
"id": "CHART-a4808bba",
|
|
"meta": {"chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8},
|
|
"type": "CHART",
|
|
},
|
|
"COLUMN-071bbbad": {
|
|
"children": ["ROW-1e064e3c", "ROW-afdefba9"],
|
|
"id": "COLUMN-071bbbad",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT", "width": 9},
|
|
"type": "COLUMN",
|
|
},
|
|
"COLUMN-fe3914b8": {
|
|
"children": ["CHART-37982887"],
|
|
"id": "COLUMN-fe3914b8",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT", "width": 2},
|
|
"type": "COLUMN",
|
|
},
|
|
"GRID_ID": {
|
|
"children": ["ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13"],
|
|
"id": "GRID_ID",
|
|
"type": "GRID",
|
|
},
|
|
"HEADER_ID": {
|
|
"id": "HEADER_ID",
|
|
"meta": {"text": "World's Bank Data"},
|
|
"type": "HEADER",
|
|
},
|
|
"ROOT_ID": {"children": ["GRID_ID"], "id": "ROOT_ID", "type": "ROOT"},
|
|
"ROW-1e064e3c": {
|
|
"children": ["COLUMN-fe3914b8", "CHART-2d5b6871"],
|
|
"id": "ROW-1e064e3c",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT"},
|
|
"type": "ROW",
|
|
},
|
|
"ROW-3fa26c5d": {
|
|
"children": ["CHART-b5e05d6f", "CHART-0fd0d252"],
|
|
"id": "ROW-3fa26c5d",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT"},
|
|
"type": "ROW",
|
|
},
|
|
"ROW-46632bc2": {
|
|
"children": ["COLUMN-071bbbad", "CHART-17e0f8d8"],
|
|
"id": "ROW-46632bc2",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT"},
|
|
"type": "ROW",
|
|
},
|
|
"ROW-812b3f13": {
|
|
"children": ["CHART-a4808bba", "CHART-e76e9f5f"],
|
|
"id": "ROW-812b3f13",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT"},
|
|
"type": "ROW",
|
|
},
|
|
"ROW-afdefba9": {
|
|
"children": ["CHART-2ee52f30", "CHART-97f4cb48"],
|
|
"id": "ROW-afdefba9",
|
|
"meta": {"background": "BACKGROUND_TRANSPARENT"},
|
|
"type": "ROW",
|
|
},
|
|
"DASHBOARD_VERSION_KEY": "v2",
|
|
}
|