mirror of
https://github.com/apache/superset.git
synced 2026-04-13 13:18:25 +00:00
* Sparkline dates aren't formatting in Time Series Table (#6976) * Exclude venv for python linter to ignore * Fix NaN error * Fix the white background shown in SQL editor on drag (#7021) This PR sets the background-color css property on `.ace_scroller` instead of `.ace_content` to prevent the white background shown during resizing of the SQL editor before drag ends. * Show tooltip with time frame (#6979) * Fix time filter control (#6978) * Enhancement of query context and object. (#6962) * added more functionalities for query context and object. * fixed cache logic * added default value for groupby * updated comments and removed print (cherry picked from commitd5b9795f87) * [fix] /superset/slice/id url is too long (#6989) (cherry picked from commit6a4d507ab6) * [WIP] fix user specified JSON metadata not updating dashboard on refresh (#7027) (cherry picked from commitcc58f0e661) * feat: add ability to change font size in big number (#7003) * Add ability to change font sizes in Big Number * rename big number to header * Add comment to clarify font size values * Allow LIMIT to be specified in parameters (#7052) * [fix] Cursor jumping when editing chart and dashboard titles (#7038) (cherry picked from commitfc1770f7b7) * Changing time table viz to pass formatTime a date (#7020) (cherry picked from commit7f3c145b1f) * [db-engine-spec] Aligning Hive/Presto partition logic (#7007) (cherry picked from commit05be866117) * [fix] explore chart from dashboard missed slice title (#7046) (cherry picked from commita6d48d4052) * fix inaccurate data calculation with adata rolling and contribution (#7035) (cherry picked from commit0782e831cd) * Adding warning message for sqllab save query (#7028) (cherry picked from commitead3d48133) * [datasource] Ensuring consistent behavior of datasource editing/saving. (#7037) * Update datasource.py * Update datasource.py (cherry picked from commitc771625f10) * [csv-upload] Fixing message encoding (#6971) (cherry picked from commit48431ab5b9) * [sql-parse] Fixing LIMIT exceptions (#6963) (cherry picked from commit3e076cb60b) * Adding custom control overrides (#6956) * Adding extraOverrides to line chart * Updating extraOverrides to fit with more cases * Moving extraOverrides to index.js * Removing webpack-merge in package.json * Fixing metrics control clearing metric (cherry picked from commite6194051f4) * [sqlparse] Fixing table name extraction for ill-defined query (#7029) (cherry picked from commit07c340cf82) * [missing values] Removing replacing missing values (#4905) (cherry picked from commit61add606ca) * [SQL Lab] Improved query and results tabs rendering reliability (#7082) closes #7080 (cherry picked from commit9b58e9f492) * Fix filter_box migration PR #6523 (#7066) * Fix filter_box migration PR #6523 * Fix druid-related bug (cherry picked from commitb210742ad2) * SQL editor layout makeover (#7102) This PR includes the following layout and css tweaks: - Using flex to layout the north and south sub panes of query pane so resizing works properly in both Chrome and Firefox - Removal of necessary wrapper divs and tweaking of css in sql lab so we can scroll to the bottom of both the table list and the results pane - Make sql lab's content not overflow vertically and layout the query result area to eliminate double scroll bars - css tweaks on the basic.html page so the loading animation appears in the center of the page across the board (cherry picked from commit71f1bbd2ec) * [forms] Fix handling of NULLs (cherry picked from commite83a07d3df) * handle null column_name in sqla and druid models (cherry picked from commit2ff721ae07) * Use metric name instead of metric in filter box (#7106) (cherry picked from commit003364e74e) * Bump python lib croniter to an existing version (#7132) Package maintainers should really never delete packages, but it appears this happened with croniter and resulted in breaking our builds. This PR bumps to a more recent existing version of the library (cherry picked from commit215ed392a1) * Revert PR #6933 (#7162) * Celery worker for warming up cache * Remove testing changes * Add documentation * Fix lint * WIP dashboard filters * Use new cache so it works with dashboards * Add more unit tests, fix old ones * Fix flake8 and docs * Sparkline dates aren't formatting in Time Series Table (#6976) * Exclude venv for python linter to ignore * Fix NaN error * Changing time table viz to pass formatTime a date (#7020) (cherry picked from commit7f3c145b1f) * SQL editor layout makeover (#7102) This PR includes the following layout and css tweaks: - Using flex to layout the north and south sub panes of query pane so resizing works properly in both Chrome and Firefox - Removal of necessary wrapper divs and tweaking of css in sql lab so we can scroll to the bottom of both the table list and the results pane - Make sql lab's content not overflow vertically and layout the query result area to eliminate double scroll bars - css tweaks on the basic.html page so the loading animation appears in the center of the page across the board (cherry picked from commit71f1bbd2ec) * Celery worker for warming up cache * Remove testing changes * Add documentation * Fix lint * WIP dashboard filters * Use new cache so it works with dashboards * Add more unit tests, fix old ones * Fix flake8 and docs * Fix bad merge and pylint
317 lines
9.1 KiB
Python
317 lines
9.1 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
# pylint: disable=too-few-public-methods
|
|
|
|
import json
|
|
import logging
|
|
import urllib.parse
|
|
|
|
from celery.utils.log import get_task_logger
|
|
from flask import url_for
|
|
import requests
|
|
from requests.exceptions import RequestException
|
|
from sqlalchemy import and_, func
|
|
|
|
from superset import app, db
|
|
from superset.models.core import Dashboard, Log, Slice
|
|
from superset.models.tags import Tag, TaggedObject
|
|
from superset.tasks.celery_app import app as celery_app
|
|
from superset.utils.core import parse_human_datetime
|
|
|
|
|
|
logger = get_task_logger(__name__)
|
|
logger.setLevel(logging.INFO)
|
|
|
|
|
|
def get_form_data(chart_id, dashboard=None):
|
|
"""
|
|
Build `form_data` for chart GET request from dashboard's `default_filters`.
|
|
|
|
When a dashboard has `default_filters` they need to be added as extra
|
|
filters in the GET request for charts.
|
|
|
|
"""
|
|
form_data = {'slice_id': chart_id}
|
|
|
|
if dashboard is None or not dashboard.json_metadata:
|
|
return form_data
|
|
|
|
json_metadata = json.loads(dashboard.json_metadata)
|
|
|
|
# do not apply filters if chart is immune to them
|
|
if chart_id in json_metadata.get('filter_immune_slices', []):
|
|
return form_data
|
|
|
|
default_filters = json.loads(json_metadata.get('default_filters', 'null'))
|
|
if not default_filters:
|
|
return form_data
|
|
|
|
# are some of the fields in the chart immune to filters?
|
|
filter_immune_slice_fields = json_metadata.get('filter_immune_slice_fields', {})
|
|
immune_fields = filter_immune_slice_fields.get(str(chart_id), [])
|
|
|
|
extra_filters = []
|
|
for filters in default_filters.values():
|
|
for col, val in filters.items():
|
|
if col not in immune_fields:
|
|
extra_filters.append({'col': col, 'op': 'in', 'val': val})
|
|
if extra_filters:
|
|
form_data['extra_filters'] = extra_filters
|
|
|
|
return form_data
|
|
|
|
|
|
def get_url(params):
|
|
"""Return external URL for warming up a given chart/table cache."""
|
|
baseurl = 'http://{SUPERSET_WEBSERVER_ADDRESS}:{SUPERSET_WEBSERVER_PORT}/'.format(
|
|
**app.config)
|
|
with app.test_request_context():
|
|
return urllib.parse.urljoin(
|
|
baseurl,
|
|
url_for('Superset.explore_json', **params),
|
|
)
|
|
|
|
|
|
class Strategy:
|
|
"""
|
|
A cache warm up strategy.
|
|
|
|
Each strategy defines a `get_urls` method that returns a list of URLs to
|
|
be fetched from the `/superset/warm_up_cache/` endpoint.
|
|
|
|
Strategies can be configured in `superset/config.py`:
|
|
|
|
CELERYBEAT_SCHEDULE = {
|
|
'cache-warmup-hourly': {
|
|
'task': 'cache-warmup',
|
|
'schedule': crontab(minute=1, hour='*'), # @hourly
|
|
'kwargs': {
|
|
'strategy_name': 'top_n_dashboards',
|
|
'top_n': 10,
|
|
'since': '7 days ago',
|
|
},
|
|
},
|
|
}
|
|
|
|
"""
|
|
def __init__(self):
|
|
pass
|
|
|
|
def get_urls(self):
|
|
raise NotImplementedError('Subclasses must implement get_urls!')
|
|
|
|
|
|
class DummyStrategy(Strategy):
|
|
"""
|
|
Warm up all charts.
|
|
|
|
This is a dummy strategy that will fetch all charts. Can be configured by:
|
|
|
|
CELERYBEAT_SCHEDULE = {
|
|
'cache-warmup-hourly': {
|
|
'task': 'cache-warmup',
|
|
'schedule': crontab(minute=1, hour='*'), # @hourly
|
|
'kwargs': {'strategy_name': 'dummy'},
|
|
},
|
|
}
|
|
|
|
"""
|
|
|
|
name = 'dummy'
|
|
|
|
def get_urls(self):
|
|
session = db.create_scoped_session()
|
|
charts = session.query(Slice).all()
|
|
|
|
return [get_url({'form_data': get_form_data(chart.id)}) for chart in charts]
|
|
|
|
|
|
class TopNDashboardsStrategy(Strategy):
|
|
"""
|
|
Warm up charts in the top-n dashboards.
|
|
|
|
CELERYBEAT_SCHEDULE = {
|
|
'cache-warmup-hourly': {
|
|
'task': 'cache-warmup',
|
|
'schedule': crontab(minute=1, hour='*'), # @hourly
|
|
'kwargs': {
|
|
'strategy_name': 'top_n_dashboards',
|
|
'top_n': 5,
|
|
'since': '7 days ago',
|
|
},
|
|
},
|
|
}
|
|
|
|
"""
|
|
|
|
name = 'top_n_dashboards'
|
|
|
|
def __init__(self, top_n=5, since='7 days ago'):
|
|
super(TopNDashboardsStrategy, self).__init__()
|
|
self.top_n = top_n
|
|
self.since = parse_human_datetime(since)
|
|
|
|
def get_urls(self):
|
|
urls = []
|
|
session = db.create_scoped_session()
|
|
|
|
records = (
|
|
session
|
|
.query(Log.dashboard_id, func.count(Log.dashboard_id))
|
|
.filter(and_(
|
|
Log.dashboard_id.isnot(None),
|
|
Log.dttm >= self.since,
|
|
))
|
|
.group_by(Log.dashboard_id)
|
|
.order_by(func.count(Log.dashboard_id).desc())
|
|
.limit(self.top_n)
|
|
.all()
|
|
)
|
|
dash_ids = [record.dashboard_id for record in records]
|
|
dashboards = (
|
|
session
|
|
.query(Dashboard)
|
|
.filter(Dashboard.id.in_(dash_ids))
|
|
.all()
|
|
)
|
|
for dashboard in dashboards:
|
|
for chart in dashboard.slices:
|
|
urls.append(
|
|
get_url({'form_data': get_form_data(chart.id, dashboard)}))
|
|
|
|
return urls
|
|
|
|
|
|
class DashboardTagsStrategy(Strategy):
|
|
"""
|
|
Warm up charts in dashboards with custom tags.
|
|
|
|
CELERYBEAT_SCHEDULE = {
|
|
'cache-warmup-hourly': {
|
|
'task': 'cache-warmup',
|
|
'schedule': crontab(minute=1, hour='*'), # @hourly
|
|
'kwargs': {
|
|
'strategy_name': 'dashboard_tags',
|
|
'tags': ['core', 'warmup'],
|
|
},
|
|
},
|
|
}
|
|
"""
|
|
|
|
name = 'dashboard_tags'
|
|
|
|
def __init__(self, tags=None):
|
|
super(DashboardTagsStrategy, self).__init__()
|
|
self.tags = tags or []
|
|
|
|
def get_urls(self):
|
|
urls = []
|
|
session = db.create_scoped_session()
|
|
|
|
tags = (
|
|
session
|
|
.query(Tag)
|
|
.filter(Tag.name.in_(self.tags))
|
|
.all()
|
|
)
|
|
tag_ids = [tag.id for tag in tags]
|
|
|
|
# add dashboards that are tagged
|
|
tagged_objects = (
|
|
session
|
|
.query(TaggedObject)
|
|
.filter(and_(
|
|
TaggedObject.object_type == 'dashboard',
|
|
TaggedObject.tag_id.in_(tag_ids),
|
|
))
|
|
.all()
|
|
)
|
|
dash_ids = [tagged_object.object_id for tagged_object in tagged_objects]
|
|
tagged_dashboards = (
|
|
session
|
|
.query(Dashboard)
|
|
.filter(Dashboard.id.in_(dash_ids))
|
|
)
|
|
for dashboard in tagged_dashboards:
|
|
for chart in dashboard.slices:
|
|
urls.append(
|
|
get_url({'form_data': get_form_data(chart.id, dashboard)}))
|
|
|
|
# add charts that are tagged
|
|
tagged_objects = (
|
|
session
|
|
.query(TaggedObject)
|
|
.filter(and_(
|
|
TaggedObject.object_type == 'chart',
|
|
TaggedObject.tag_id.in_(tag_ids),
|
|
))
|
|
.all()
|
|
)
|
|
chart_ids = [tagged_object.object_id for tagged_object in tagged_objects]
|
|
tagged_charts = (
|
|
session
|
|
.query(Slice)
|
|
.filter(Slice.id.in_(chart_ids))
|
|
)
|
|
for chart in tagged_charts:
|
|
urls.append(get_url({'form_data': get_form_data(chart.id)}))
|
|
|
|
return urls
|
|
|
|
|
|
strategies = [DummyStrategy, TopNDashboardsStrategy, DashboardTagsStrategy]
|
|
|
|
|
|
@celery_app.task(name='cache-warmup')
|
|
def cache_warmup(strategy_name, *args, **kwargs):
|
|
"""
|
|
Warm up cache.
|
|
|
|
This task periodically hits charts to warm up the cache.
|
|
|
|
"""
|
|
logger.info('Loading strategy')
|
|
class_ = None
|
|
for class_ in strategies:
|
|
if class_.name == strategy_name:
|
|
break
|
|
else:
|
|
message = f'No strategy {strategy_name} found!'
|
|
logger.error(message)
|
|
return message
|
|
|
|
logger.info(f'Loading {class_.__name__}')
|
|
try:
|
|
strategy = class_(*args, **kwargs)
|
|
logger.info('Success!')
|
|
except TypeError:
|
|
message = 'Error loading strategy!'
|
|
logger.exception(message)
|
|
return message
|
|
|
|
results = {'success': [], 'errors': []}
|
|
for url in strategy.get_urls():
|
|
try:
|
|
logger.info(f'Fetching {url}')
|
|
requests.get(url)
|
|
results['success'].append(url)
|
|
except RequestException:
|
|
logger.exception('Error warming up cache!')
|
|
results['errors'].append(url)
|
|
|
|
return results
|