Reduce dashboard bootstrap payload (#9284)

This commit is contained in:
Erik Ritter
2020-03-13 12:18:22 -07:00
committed by GitHub
parent e9b0095754
commit f80fadff0e
6 changed files with 140 additions and 4 deletions

View File

@@ -282,6 +282,7 @@ DEFAULT_FEATURE_FLAGS = {
"ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False,
"KV_STORE": False,
"PRESTO_EXPAND_DATA": False,
"REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD": False,
"SHARE_QUERIES_VIA_KV_STORE": False,
"TAGGING_SYSTEM": False,
}

View File

@@ -27,6 +27,28 @@ from superset.models.helpers import AuditMixinNullable, ImportMixin, QueryResult
from superset.models.slice import Slice
from superset.utils import core as utils
METRIC_FORM_DATA_PARAMS = [
"metric",
"metrics",
"metric_2",
"percent_metrics",
"secondary_metric",
"size",
"timeseries_limit_metric",
"x",
"y",
]
COLUMN_FORM_DATA_PARAMS = [
"all_columns",
"all_columns_x",
"columns",
"entity",
"groupby",
"order_by_cols",
"series",
]
class BaseDatasource(
AuditMixinNullable, ImportMixin
@@ -213,6 +235,70 @@ class BaseDatasource(
"select_star": self.select_star,
}
def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]:
"""
The representation of the datasource containing only the required data
to render the provided slices.
Used to reduce the payload when loading a dashboard.
"""
data = self.data
metric_names = set()
column_names = set()
for slc in slices:
form_data = slc.form_data
# pull out all required metrics from the form_data
for param in METRIC_FORM_DATA_PARAMS:
for metric in utils.get_iterable(form_data.get(param) or []):
metric_names.add(utils.get_metric_name(metric))
if utils.is_adhoc_metric(metric):
column_names.add(
(metric.get("column") or {}).get("column_name")
)
# pull out all required columns from the form_data
for filter_ in form_data.get("adhoc_filters") or []:
if filter_["clause"] == "WHERE" and filter_.get("subject"):
column_names.add(filter_.get("subject"))
for param in COLUMN_FORM_DATA_PARAMS:
for column in utils.get_iterable(form_data.get(param) or []):
column_names.add(column)
filtered_metrics = [
metric
for metric in data["metrics"]
if metric["metric_name"] in metric_names
]
filtered_columns = [
column
for column in data["columns"]
if column["column_name"] in column_names
]
del data["description"]
data.update({"metrics": filtered_metrics})
data.update({"columns": filtered_columns})
verbose_map = {"__timestamp": "Time"}
verbose_map.update(
{
metric["metric_name"]: metric["verbose_name"] or metric["metric_name"]
for metric in filtered_metrics
}
)
verbose_map.update(
{
column["column_name"]: column["verbose_name"] or column["column_name"]
for column in filtered_columns
}
)
data["verbose_map"] = verbose_map
return data
@staticmethod
def filter_values_handler(
values, target_column_is_numeric=False, is_list_target=False
@@ -353,6 +439,14 @@ class BaseDatasource(
"""
return []
def __hash__(self) -> int:
return hash(self.uid)
def __eq__(self, other: object) -> bool:
if not isinstance(other, BaseDatasource):
return NotImplemented
return self.uid == other.uid
class BaseColumn(AuditMixinNullable, ImportMixin):
"""Interface for column"""

View File

@@ -1211,6 +1211,17 @@ def split(
yield s[i:]
def get_iterable(x: Any) -> List:
"""
Get an iterable (list) representation of the object.
:param x: The object
:returns: An iterable representation
"""
return x if isinstance(x, list) else [x]
class TimeRangeEndpoint(str, Enum):
"""
The time range endpoint types which represent inclusive, exclusive, or unknown.

View File

@@ -17,6 +17,7 @@
# pylint: disable=C,R,W
import logging
import re
from collections import defaultdict
from contextlib import closing
from datetime import datetime, timedelta
from typing import Any, Callable, cast, Dict, List, Optional, Union
@@ -1791,11 +1792,12 @@ class Superset(BaseSupersetView):
dash = qry.one_or_none()
if not dash:
abort(404)
datasources = set()
datasources = defaultdict(list)
for slc in dash.slices:
datasource = slc.datasource
if datasource:
datasources.add(datasource)
datasources[datasource].append(slc)
if config["ENABLE_ACCESS_REQUEST"]:
for datasource in datasources:
@@ -1810,6 +1812,14 @@ class Superset(BaseSupersetView):
"superset/request_access/?" f"dashboard_id={dash.id}&"
)
# Filter out unneeded fields from the datasource payload
datasources_payload = {
datasource.uid: datasource.data_for_slices(slices)
if is_feature_enabled("REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD")
else datasource.data
for datasource, slices in datasources.items()
}
dash_edit_perm = check_ownership(
dash, raise_if_false=False
) and security_manager.can_access("can_save_dash", "Superset")
@@ -1857,7 +1867,7 @@ class Superset(BaseSupersetView):
bootstrap_data = {
"user_id": g.user.get_id(),
"dashboard_data": dashboard_data,
"datasources": {ds.uid: ds.data for ds in datasources},
"datasources": datasources_payload,
"common": common_bootstrap_payload(),
"editMode": edit_mode,
"urlParams": url_params,

View File

@@ -22,8 +22,9 @@ import pandas
from sqlalchemy.engine.url import make_url
import tests.test_app
from superset import app
from superset import app, db as metadata_db
from superset.models.core import Database
from superset.models.slice import Slice
from superset.utils.core import get_example_database, QueryStatus
from .base_tests import SupersetTestCase
@@ -318,3 +319,16 @@ class SqlaTableModelTestCase(SupersetTestCase):
tbl.get_query_str(query_obj)
self.assertTrue("Metric 'invalid' does not exist", context.exception)
def test_data_for_slices(self):
tbl = self.get_table_by_name("birth_names")
slc = (
metadata_db.session.query(Slice)
.filter_by(datasource_id=tbl.id, datasource_type=tbl.type)
.first()
)
data_for_slices = tbl.data_for_slices([slc])
self.assertEquals(len(data_for_slices["columns"]), 0)
self.assertEquals(len(data_for_slices["metrics"]), 1)
self.assertEquals(len(data_for_slices["verbose_map"].keys()), 2)

View File

@@ -36,6 +36,7 @@ from superset.utils.core import (
convert_legacy_filters_into_adhoc,
datetime_f,
format_timedelta,
get_iterable,
get_or_create_db,
get_since_until,
get_stacktrace,
@@ -950,3 +951,8 @@ class UtilsTestCase(SupersetTestCase):
get_time_range_endpoints(form_data={"datasource": "1__table"}, slc=slc),
(TimeRangeEndpoint.INCLUSIVE, TimeRangeEndpoint.EXCLUSIVE),
)
def test_get_iterable(self):
self.assertListEqual(get_iterable(123), [123])
self.assertListEqual(get_iterable([123]), [123])
self.assertListEqual(get_iterable("foo"), ["foo"])